1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164 AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165 TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195 #[allow(clippy::field_reassign_with_default)]
196 {
197 let mut s = DataQualityStats::default();
198 s.total_records = n_entries;
199 s.missing_values.total_records = n_entries;
200 s.format_variations.total_processed = n_entries;
201 s.duplicates.total_processed = n_entries;
202 s
203 }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207 let payment_behavior = &schema_config.payment_behavior;
208 let late_dist = &payment_behavior.late_payment_days_distribution;
209
210 P2PGeneratorConfig {
211 three_way_match_rate: schema_config.three_way_match_rate,
212 partial_delivery_rate: schema_config.partial_delivery_rate,
213 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214 price_variance_rate: schema_config.price_variance_rate,
215 max_price_variance_percent: schema_config.max_price_variance_percent,
216 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219 payment_method_distribution: vec![
220 (PaymentMethod::BankTransfer, 0.60),
221 (PaymentMethod::Check, 0.25),
222 (PaymentMethod::Wire, 0.10),
223 (PaymentMethod::CreditCard, 0.05),
224 ],
225 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226 payment_behavior: P2PPaymentBehavior {
227 late_payment_rate: payment_behavior.late_payment_rate,
228 late_payment_distribution: LatePaymentDistribution {
229 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230 late_8_to_14: late_dist.late_8_to_14,
231 very_late_15_to_30: late_dist.very_late_15_to_30,
232 severely_late_31_to_60: late_dist.severely_late_31_to_60,
233 extremely_late_over_60: late_dist.extremely_late_over_60,
234 },
235 partial_payment_rate: payment_behavior.partial_payment_rate,
236 payment_correction_rate: payment_behavior.payment_correction_rate,
237 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238 },
239 }
240}
241
242fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244 let payment_behavior = &schema_config.payment_behavior;
245
246 O2CGeneratorConfig {
247 credit_check_failure_rate: schema_config.credit_check_failure_rate,
248 partial_shipment_rate: schema_config.partial_shipment_rate,
249 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253 bad_debt_rate: schema_config.bad_debt_rate,
254 returns_rate: schema_config.return_rate,
255 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256 payment_method_distribution: vec![
257 (PaymentMethod::BankTransfer, 0.50),
258 (PaymentMethod::Check, 0.30),
259 (PaymentMethod::Wire, 0.15),
260 (PaymentMethod::CreditCard, 0.05),
261 ],
262 payment_behavior: O2CPaymentBehavior {
263 partial_payment_rate: payment_behavior.partial_payments.rate,
264 short_payment_rate: payment_behavior.short_payments.rate,
265 max_short_percent: payment_behavior.short_payments.max_short_percent,
266 on_account_rate: payment_behavior.on_account_payments.rate,
267 payment_correction_rate: payment_behavior.payment_corrections.rate,
268 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269 },
270 }
271}
272
273#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276 pub generate_master_data: bool,
278 pub generate_document_flows: bool,
280 pub generate_ocpm_events: bool,
282 pub generate_journal_entries: bool,
284 pub inject_anomalies: bool,
286 pub inject_data_quality: bool,
288 pub validate_balances: bool,
290 pub validate_coa_coverage_strict: bool,
294 pub show_progress: bool,
296 pub vendors_per_company: usize,
298 pub customers_per_company: usize,
300 pub materials_per_company: usize,
302 pub assets_per_company: usize,
304 pub employees_per_company: usize,
306 pub p2p_chains: usize,
308 pub o2c_chains: usize,
310 pub generate_audit: bool,
312 pub audit_engagements: usize,
314 pub workpapers_per_engagement: usize,
316 pub evidence_per_workpaper: usize,
318 pub risks_per_engagement: usize,
320 pub findings_per_engagement: usize,
322 pub judgments_per_engagement: usize,
324 pub generate_banking: bool,
326 pub generate_graph_export: bool,
328 pub generate_sourcing: bool,
330 pub generate_bank_reconciliation: bool,
332 pub generate_financial_statements: bool,
334 pub generate_accounting_standards: bool,
336 pub generate_manufacturing: bool,
338 pub generate_sales_kpi_budgets: bool,
340 pub generate_tax: bool,
342 pub generate_esg: bool,
344 pub generate_intercompany: bool,
346 pub generate_evolution_events: bool,
348 pub generate_counterfactuals: bool,
350 pub generate_compliance_regulations: bool,
352 pub generate_period_close: bool,
354 pub generate_hr: bool,
356 pub generate_treasury: bool,
358 pub generate_project_accounting: bool,
360 pub generate_legal_documents: bool,
364 pub generate_it_controls: bool,
368 pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376 fn default() -> Self {
377 Self {
378 generate_master_data: true,
379 generate_document_flows: true,
380 generate_ocpm_events: false, generate_journal_entries: true,
382 inject_anomalies: false,
383 inject_data_quality: false, validate_balances: true,
385 validate_coa_coverage_strict: false,
386 show_progress: true,
387 vendors_per_company: 50,
388 customers_per_company: 100,
389 materials_per_company: 200,
390 assets_per_company: 50,
391 employees_per_company: 100,
392 p2p_chains: 100,
393 o2c_chains: 100,
394 generate_audit: false, audit_engagements: 5,
396 workpapers_per_engagement: 20,
397 evidence_per_workpaper: 5,
398 risks_per_engagement: 15,
399 findings_per_engagement: 8,
400 judgments_per_engagement: 10,
401 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
423 }
424}
425
426impl PhaseConfig {
427 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432 Self {
433 generate_master_data: true,
435 generate_document_flows: true,
436 generate_journal_entries: true,
437 validate_balances: true,
438 validate_coa_coverage_strict: false,
439 generate_period_close: true,
440 generate_evolution_events: true,
441 show_progress: true,
442
443 generate_audit: cfg.audit.enabled,
445 generate_banking: cfg.banking.enabled,
446 generate_graph_export: cfg.graph_export.enabled,
447 generate_sourcing: cfg.source_to_pay.enabled,
448 generate_intercompany: cfg.intercompany.enabled,
449 generate_financial_statements: cfg.financial_reporting.enabled,
450 generate_bank_reconciliation: cfg.financial_reporting.enabled,
451 generate_accounting_standards: cfg.accounting_standards.enabled,
452 generate_manufacturing: cfg.manufacturing.enabled,
453 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454 generate_tax: cfg.tax.enabled,
455 generate_esg: cfg.esg.enabled,
456 generate_ocpm_events: cfg.ocpm.enabled,
457 generate_compliance_regulations: cfg.compliance_regulations.enabled,
458 generate_hr: cfg.hr.enabled,
459 generate_treasury: cfg.treasury.enabled,
460 generate_project_accounting: cfg.project_accounting.enabled,
461
462 generate_legal_documents: cfg.compliance_regulations.enabled
466 && cfg.compliance_regulations.legal_documents.enabled,
467 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470 generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478 inject_data_quality: cfg.data_quality.enabled,
479
480 vendors_per_company: 50,
482 customers_per_company: 100,
483 materials_per_company: 200,
484 assets_per_company: 50,
485 employees_per_company: 100,
486 p2p_chains: 100,
487 o2c_chains: 100,
488 audit_engagements: 5,
489 workpapers_per_engagement: 20,
490 evidence_per_workpaper: 5,
491 risks_per_engagement: 15,
492 findings_per_engagement: 8,
493 judgments_per_engagement: 10,
494 }
495 }
496}
497
498#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501 pub vendors: Vec<Vendor>,
503 pub customers: Vec<Customer>,
505 pub materials: Vec<Material>,
507 pub assets: Vec<FixedAsset>,
509 pub employees: Vec<Employee>,
511 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513 pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528 pub node_count: usize,
530 pub edge_count: usize,
532 pub hyperedge_count: usize,
534 pub output_path: PathBuf,
536}
537
538#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541 pub p2p_chains: Vec<P2PDocumentChain>,
543 pub o2c_chains: Vec<O2CDocumentChain>,
545 pub purchase_orders: Vec<documents::PurchaseOrder>,
547 pub goods_receipts: Vec<documents::GoodsReceipt>,
549 pub vendor_invoices: Vec<documents::VendorInvoice>,
551 pub sales_orders: Vec<documents::SalesOrder>,
553 pub deliveries: Vec<documents::Delivery>,
555 pub customer_invoices: Vec<documents::CustomerInvoice>,
557 pub payments: Vec<documents::Payment>,
559 pub document_references: Vec<documents::DocumentReference>,
562}
563
564#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567 pub ap_invoices: Vec<APInvoice>,
569 pub ar_invoices: Vec<ARInvoice>,
571 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577 pub ar_aging_reports: Vec<ARAgingReport>,
579 pub ap_aging_reports: Vec<APAgingReport>,
581 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594 pub event_log: Option<OcpmEventLog>,
596 pub event_count: usize,
598 pub object_count: usize,
600 pub case_count: usize,
602}
603
604#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607 pub engagements: Vec<AuditEngagement>,
609 pub workpapers: Vec<Workpaper>,
611 pub evidence: Vec<AuditEvidence>,
613 pub risk_assessments: Vec<RiskAssessment>,
615 pub findings: Vec<AuditFinding>,
617 pub judgments: Vec<ProfessionalJudgment>,
619 pub confirmations: Vec<ExternalConfirmation>,
621 pub confirmation_responses: Vec<ConfirmationResponse>,
623 pub procedure_steps: Vec<AuditProcedureStep>,
625 pub samples: Vec<AuditSample>,
627 pub analytical_results: Vec<AnalyticalProcedureResult>,
629 pub ia_functions: Vec<InternalAuditFunction>,
631 pub ia_reports: Vec<InternalAuditReport>,
633 pub related_parties: Vec<RelatedParty>,
635 pub related_party_transactions: Vec<RelatedPartyTransaction>,
637 pub component_auditors: Vec<ComponentAuditor>,
640 pub group_audit_plan: Option<GroupAuditPlan>,
642 pub component_instructions: Vec<ComponentInstruction>,
644 pub component_reports: Vec<ComponentAuditorReport>,
646 pub engagement_letters: Vec<EngagementLetter>,
649 pub subsequent_events: Vec<SubsequentEvent>,
652 pub service_organizations: Vec<ServiceOrganization>,
655 pub soc_reports: Vec<SocReport>,
657 pub user_entity_controls: Vec<UserEntityControl>,
659 pub going_concern_assessments:
662 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663 pub accounting_estimates:
666 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677 pub materiality_calculations:
680 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681 pub combined_risk_assessments:
684 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690 pub significant_transaction_classes:
693 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697 pub analytical_relationships:
700 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733 pub customers: Vec<BankingCustomer>,
735 pub accounts: Vec<BankAccount>,
737 pub transactions: Vec<BankTransaction>,
739 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749 pub suspicious_count: usize,
751 pub scenario_count: usize,
753}
754
755#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758 pub exported: bool,
760 pub graph_count: usize,
762 pub exports: HashMap<String, GraphExportInfo>,
764}
765
766#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769 pub name: String,
771 pub format: String,
773 pub output_path: PathBuf,
775 pub node_count: usize,
777 pub edge_count: usize,
779}
780
781#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784 pub spend_analyses: Vec<SpendAnalysis>,
786 pub sourcing_projects: Vec<SourcingProject>,
788 pub qualifications: Vec<SupplierQualification>,
790 pub rfx_events: Vec<RfxEvent>,
792 pub bids: Vec<SupplierBid>,
794 pub bid_evaluations: Vec<BidEvaluation>,
796 pub contracts: Vec<ProcurementContract>,
798 pub catalog_items: Vec<CatalogItem>,
800 pub scorecards: Vec<SupplierScorecard>,
802}
803
804#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816 pub fiscal_year: u16,
818 pub fiscal_period: u8,
820 pub period_start: NaiveDate,
822 pub period_end: NaiveDate,
824 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826}
827
828impl PeriodTrialBalance {
829 pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
838 let mut total_debits = Decimal::ZERO;
839 let mut total_credits = Decimal::ZERO;
840 let lines: Vec<TrialBalanceLine> = self
841 .entries
842 .into_iter()
843 .map(|e| {
844 total_debits += e.debit_balance;
845 total_credits += e.credit_balance;
846 let category = AccountCategory::from_account_code(&e.account_code);
847 TrialBalanceLine {
848 account_code: e.account_code,
849 account_description: e.account_name,
850 category,
851 account_type: AccountType::Asset,
852 opening_balance: Decimal::ZERO,
853 period_debits: e.debit_balance,
854 period_credits: e.credit_balance,
855 closing_balance: e.debit_balance - e.credit_balance,
856 debit_balance: e.debit_balance,
857 credit_balance: e.credit_balance,
858 cost_center: None,
859 profit_center: None,
860 }
861 })
862 .collect();
863 let imbalance = total_debits - total_credits;
864 let is_balanced = imbalance.abs() < Decimal::new(1, 2);
865 TrialBalance {
866 trial_balance_id: format!(
867 "{company_code}-{:04}{:02}",
868 self.fiscal_year, self.fiscal_period
869 ),
870 company_code: company_code.to_string(),
871 company_name: None,
872 as_of_date: self.period_end,
873 fiscal_year: self.fiscal_year as i32,
874 fiscal_period: self.fiscal_period as u32,
875 currency: currency.to_string(),
876 balance_type: TrialBalanceType::Adjusted,
877 lines,
878 total_debits,
879 total_credits,
880 is_balanced,
881 out_of_balance: imbalance,
882 is_equation_valid: is_balanced,
883 equation_difference: imbalance,
884 category_summary: std::collections::HashMap::new(),
885 created_at: self
886 .period_start
887 .and_hms_opt(0, 0, 0)
888 .expect("midnight is a valid time"),
889 created_by: "ORCHESTRATOR".to_string(),
890 approved_by: None,
891 approved_at: None,
892 status: TrialBalanceStatus::Final,
893 }
894 }
895}
896
897#[derive(Debug, Clone, Default)]
899pub struct FinancialReportingSnapshot {
900 pub financial_statements: Vec<FinancialStatement>,
903 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
906 pub consolidated_statements: Vec<FinancialStatement>,
908 pub consolidation_schedules: Vec<ConsolidationSchedule>,
910 pub bank_reconciliations: Vec<BankReconciliation>,
912 pub trial_balances: Vec<PeriodTrialBalance>,
914 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
916 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
918 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
920}
921
922#[derive(Debug, Clone, Default)]
924pub struct HrSnapshot {
925 pub payroll_runs: Vec<PayrollRun>,
927 pub payroll_line_items: Vec<PayrollLineItem>,
929 pub time_entries: Vec<TimeEntry>,
931 pub expense_reports: Vec<ExpenseReport>,
933 pub benefit_enrollments: Vec<BenefitEnrollment>,
935 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
937 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
939 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
941 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
943 pub pension_journal_entries: Vec<JournalEntry>,
945 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
947 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
949 pub stock_comp_journal_entries: Vec<JournalEntry>,
951 pub payroll_run_count: usize,
953 pub payroll_line_item_count: usize,
955 pub time_entry_count: usize,
957 pub expense_report_count: usize,
959 pub benefit_enrollment_count: usize,
961 pub pension_plan_count: usize,
963 pub stock_grant_count: usize,
965}
966
967#[derive(Debug, Clone, Default)]
969pub struct AccountingStandardsSnapshot {
970 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
972 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
974 pub business_combinations:
976 Vec<datasynth_core::models::business_combination::BusinessCombination>,
977 pub business_combination_journal_entries: Vec<JournalEntry>,
979 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
981 pub ecl_provision_movements:
983 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
984 pub ecl_journal_entries: Vec<JournalEntry>,
986 pub provisions: Vec<datasynth_core::models::provision::Provision>,
988 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
990 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
992 pub provision_journal_entries: Vec<JournalEntry>,
994 pub currency_translation_results:
996 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
997 pub revenue_contract_count: usize,
999 pub impairment_test_count: usize,
1001 pub business_combination_count: usize,
1003 pub ecl_model_count: usize,
1005 pub provision_count: usize,
1007 pub currency_translation_count: usize,
1009 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1013 pub fair_value_measurements:
1015 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1016 pub framework_differences:
1018 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1019 pub framework_reconciliations:
1021 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1022 pub lease_count: usize,
1024 pub fair_value_measurement_count: usize,
1025 pub framework_difference_count: usize,
1026}
1027
1028#[derive(Debug, Clone, Default)]
1030pub struct ComplianceRegulationsSnapshot {
1031 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1033 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1035 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1037 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1039 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1041 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1043 pub compliance_graph: Option<datasynth_graph::Graph>,
1045}
1046
1047#[derive(Debug, Clone, Default)]
1049pub struct ManufacturingSnapshot {
1050 pub production_orders: Vec<ProductionOrder>,
1052 pub quality_inspections: Vec<QualityInspection>,
1054 pub cycle_counts: Vec<CycleCount>,
1056 pub bom_components: Vec<BomComponent>,
1058 pub inventory_movements: Vec<InventoryMovement>,
1060 pub production_order_count: usize,
1062 pub quality_inspection_count: usize,
1064 pub cycle_count_count: usize,
1066 pub bom_component_count: usize,
1068 pub inventory_movement_count: usize,
1070}
1071
1072#[derive(Debug, Clone, Default)]
1074pub struct SalesKpiBudgetsSnapshot {
1075 pub sales_quotes: Vec<SalesQuote>,
1077 pub kpis: Vec<ManagementKpi>,
1079 pub budgets: Vec<Budget>,
1081 pub sales_quote_count: usize,
1083 pub kpi_count: usize,
1085 pub budget_line_count: usize,
1087}
1088
1089#[derive(Debug, Clone, Default)]
1091pub struct AnomalyLabels {
1092 pub labels: Vec<LabeledAnomaly>,
1094 pub summary: Option<AnomalySummary>,
1096 pub by_type: HashMap<String, usize>,
1098}
1099
1100#[derive(Debug, Clone, Default)]
1102pub struct BalanceValidationResult {
1103 pub validated: bool,
1105 pub is_balanced: bool,
1107 pub entries_processed: u64,
1109 pub total_debits: rust_decimal::Decimal,
1111 pub total_credits: rust_decimal::Decimal,
1113 pub accounts_tracked: usize,
1115 pub companies_tracked: usize,
1117 pub validation_errors: Vec<ValidationError>,
1119 pub has_unbalanced_entries: bool,
1121}
1122
1123#[derive(Debug, Clone, Default)]
1125pub struct TaxSnapshot {
1126 pub jurisdictions: Vec<TaxJurisdiction>,
1128 pub codes: Vec<TaxCode>,
1130 pub tax_lines: Vec<TaxLine>,
1132 pub tax_returns: Vec<TaxReturn>,
1134 pub tax_provisions: Vec<TaxProvision>,
1136 pub withholding_records: Vec<WithholdingTaxRecord>,
1138 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1140 pub jurisdiction_count: usize,
1142 pub code_count: usize,
1144 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1146 pub tax_posting_journal_entries: Vec<JournalEntry>,
1148}
1149
1150#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1152pub struct IntercompanySnapshot {
1153 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1155 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1157 pub seller_journal_entries: Vec<JournalEntry>,
1159 pub buyer_journal_entries: Vec<JournalEntry>,
1161 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1163 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1165 #[serde(skip)]
1167 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1168 pub matched_pair_count: usize,
1170 pub elimination_entry_count: usize,
1172 pub match_rate: f64,
1174}
1175
1176#[derive(Debug, Clone, Default)]
1178pub struct EsgSnapshot {
1179 pub emissions: Vec<EmissionRecord>,
1181 pub energy: Vec<EnergyConsumption>,
1183 pub water: Vec<WaterUsage>,
1185 pub waste: Vec<WasteRecord>,
1187 pub diversity: Vec<WorkforceDiversityMetric>,
1189 pub pay_equity: Vec<PayEquityMetric>,
1191 pub safety_incidents: Vec<SafetyIncident>,
1193 pub safety_metrics: Vec<SafetyMetric>,
1195 pub governance: Vec<GovernanceMetric>,
1197 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1199 pub materiality: Vec<MaterialityAssessment>,
1201 pub disclosures: Vec<EsgDisclosure>,
1203 pub climate_scenarios: Vec<ClimateScenario>,
1205 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1207 pub emission_count: usize,
1209 pub disclosure_count: usize,
1211}
1212
1213#[derive(Debug, Clone, Default)]
1215pub struct TreasurySnapshot {
1216 pub cash_positions: Vec<CashPosition>,
1218 pub cash_forecasts: Vec<CashForecast>,
1220 pub cash_pools: Vec<CashPool>,
1222 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1224 pub hedging_instruments: Vec<HedgingInstrument>,
1226 pub hedge_relationships: Vec<HedgeRelationship>,
1228 pub debt_instruments: Vec<DebtInstrument>,
1230 pub bank_guarantees: Vec<BankGuarantee>,
1232 pub netting_runs: Vec<NettingRun>,
1234 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1236 pub journal_entries: Vec<JournalEntry>,
1239}
1240
1241#[derive(Debug, Clone, Default)]
1243pub struct ProjectAccountingSnapshot {
1244 pub projects: Vec<Project>,
1246 pub cost_lines: Vec<ProjectCostLine>,
1248 pub revenue_records: Vec<ProjectRevenue>,
1250 pub earned_value_metrics: Vec<EarnedValueMetric>,
1252 pub change_orders: Vec<ChangeOrder>,
1254 pub milestones: Vec<ProjectMilestone>,
1256}
1257
1258#[derive(Debug, Default)]
1260pub struct EnhancedGenerationResult {
1261 pub chart_of_accounts: ChartOfAccounts,
1263 pub master_data: MasterDataSnapshot,
1265 pub document_flows: DocumentFlowSnapshot,
1267 pub subledger: SubledgerSnapshot,
1269 pub ocpm: OcpmSnapshot,
1271 pub audit: AuditSnapshot,
1273 pub banking: BankingSnapshot,
1275 pub graph_export: GraphExportSnapshot,
1277 pub sourcing: SourcingSnapshot,
1279 pub financial_reporting: FinancialReportingSnapshot,
1281 pub hr: HrSnapshot,
1283 pub accounting_standards: AccountingStandardsSnapshot,
1285 pub manufacturing: ManufacturingSnapshot,
1287 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1289 pub tax: TaxSnapshot,
1291 pub esg: EsgSnapshot,
1293 pub treasury: TreasurySnapshot,
1295 pub project_accounting: ProjectAccountingSnapshot,
1297 pub process_evolution: Vec<ProcessEvolutionEvent>,
1299 pub organizational_events: Vec<OrganizationalEvent>,
1301 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1303 pub intercompany: IntercompanySnapshot,
1305 pub journal_entries: Vec<JournalEntry>,
1307 pub anomaly_labels: AnomalyLabels,
1309 pub balance_validation: BalanceValidationResult,
1311 pub data_quality_stats: DataQualityStats,
1313 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1315 pub statistics: EnhancedGenerationStatistics,
1317 pub lineage: Option<super::lineage::LineageGraph>,
1319 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1321 pub internal_controls: Vec<InternalControl>,
1323 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1327 pub opening_balances: Vec<GeneratedOpeningBalance>,
1329 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1331 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1333 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1335 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1337 pub temporal_vendor_chains:
1339 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1340 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1342 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1344 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1346 pub coa_semantic_prior:
1352 Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1353 pub compliance_regulations: ComplianceRegulationsSnapshot,
1355 pub analytics_metadata: AnalyticsMetadataSnapshot,
1359 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1363 pub interconnectivity: InterconnectivitySnapshot,
1369}
1370
1371#[derive(Debug, Clone, Default)]
1377pub struct InterconnectivitySnapshot {
1378 pub vendor_tiers: Vec<(String, u8)>,
1381 pub vendor_clusters: Vec<(String, String)>,
1385 pub customer_value_segments: Vec<(String, String)>,
1388 pub customer_lifecycle_stages: Vec<(String, String)>,
1392 pub industry_metadata: Vec<String>,
1395}
1396
1397#[derive(Debug, Clone, Default)]
1399pub struct AnalyticsMetadataSnapshot {
1400 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1402 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1404 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1406 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1408}
1409
1410#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1412pub struct EnhancedGenerationStatistics {
1413 pub total_entries: u64,
1415 pub total_line_items: u64,
1417 pub accounts_count: usize,
1419 pub companies_count: usize,
1421 pub period_months: u32,
1423 pub vendor_count: usize,
1425 pub customer_count: usize,
1426 pub material_count: usize,
1427 pub asset_count: usize,
1428 pub employee_count: usize,
1429 pub p2p_chain_count: usize,
1431 pub o2c_chain_count: usize,
1432 pub ap_invoice_count: usize,
1434 pub ar_invoice_count: usize,
1435 pub ocpm_event_count: usize,
1437 pub ocpm_object_count: usize,
1438 pub ocpm_case_count: usize,
1439 pub audit_engagement_count: usize,
1441 pub audit_workpaper_count: usize,
1442 pub audit_evidence_count: usize,
1443 pub audit_risk_count: usize,
1444 pub audit_finding_count: usize,
1445 pub audit_judgment_count: usize,
1446 #[serde(default)]
1448 pub audit_confirmation_count: usize,
1449 #[serde(default)]
1450 pub audit_confirmation_response_count: usize,
1451 #[serde(default)]
1453 pub audit_procedure_step_count: usize,
1454 #[serde(default)]
1455 pub audit_sample_count: usize,
1456 #[serde(default)]
1458 pub audit_analytical_result_count: usize,
1459 #[serde(default)]
1461 pub audit_ia_function_count: usize,
1462 #[serde(default)]
1463 pub audit_ia_report_count: usize,
1464 #[serde(default)]
1466 pub audit_related_party_count: usize,
1467 #[serde(default)]
1468 pub audit_related_party_transaction_count: usize,
1469 pub anomalies_injected: usize,
1471 pub data_quality_issues: usize,
1473 pub banking_customer_count: usize,
1475 pub banking_account_count: usize,
1476 pub banking_transaction_count: usize,
1477 pub banking_suspicious_count: usize,
1478 pub graph_export_count: usize,
1480 pub graph_node_count: usize,
1481 pub graph_edge_count: usize,
1482 #[serde(default)]
1484 pub llm_enrichment_ms: u64,
1485 #[serde(default)]
1487 pub llm_vendors_enriched: usize,
1488 #[serde(default)]
1490 pub llm_customers_enriched: usize,
1491 #[serde(default)]
1493 pub llm_materials_enriched: usize,
1494 #[serde(default)]
1496 pub llm_findings_enriched: usize,
1497 #[serde(default)]
1499 pub diffusion_enhancement_ms: u64,
1500 #[serde(default)]
1502 pub diffusion_samples_generated: usize,
1503 #[serde(default, skip_serializing_if = "Option::is_none")]
1506 pub neural_hybrid_weight: Option<f64>,
1507 #[serde(default, skip_serializing_if = "Option::is_none")]
1509 pub neural_hybrid_strategy: Option<String>,
1510 #[serde(default, skip_serializing_if = "Option::is_none")]
1512 pub neural_routed_column_count: Option<usize>,
1513 #[serde(default)]
1515 pub causal_generation_ms: u64,
1516 #[serde(default)]
1518 pub causal_samples_generated: usize,
1519 #[serde(default)]
1521 pub causal_validation_passed: Option<bool>,
1522 #[serde(default)]
1524 pub sourcing_project_count: usize,
1525 #[serde(default)]
1526 pub rfx_event_count: usize,
1527 #[serde(default)]
1528 pub bid_count: usize,
1529 #[serde(default)]
1530 pub contract_count: usize,
1531 #[serde(default)]
1532 pub catalog_item_count: usize,
1533 #[serde(default)]
1534 pub scorecard_count: usize,
1535 #[serde(default)]
1537 pub financial_statement_count: usize,
1538 #[serde(default)]
1539 pub bank_reconciliation_count: usize,
1540 #[serde(default)]
1542 pub payroll_run_count: usize,
1543 #[serde(default)]
1544 pub time_entry_count: usize,
1545 #[serde(default)]
1546 pub expense_report_count: usize,
1547 #[serde(default)]
1548 pub benefit_enrollment_count: usize,
1549 #[serde(default)]
1550 pub pension_plan_count: usize,
1551 #[serde(default)]
1552 pub stock_grant_count: usize,
1553 #[serde(default)]
1555 pub revenue_contract_count: usize,
1556 #[serde(default)]
1557 pub impairment_test_count: usize,
1558 #[serde(default)]
1559 pub business_combination_count: usize,
1560 #[serde(default)]
1561 pub ecl_model_count: usize,
1562 #[serde(default)]
1563 pub provision_count: usize,
1564 #[serde(default)]
1566 pub production_order_count: usize,
1567 #[serde(default)]
1568 pub quality_inspection_count: usize,
1569 #[serde(default)]
1570 pub cycle_count_count: usize,
1571 #[serde(default)]
1572 pub bom_component_count: usize,
1573 #[serde(default)]
1574 pub inventory_movement_count: usize,
1575 #[serde(default)]
1577 pub sales_quote_count: usize,
1578 #[serde(default)]
1579 pub kpi_count: usize,
1580 #[serde(default)]
1581 pub budget_line_count: usize,
1582 #[serde(default)]
1584 pub tax_jurisdiction_count: usize,
1585 #[serde(default)]
1586 pub tax_code_count: usize,
1587 #[serde(default)]
1589 pub esg_emission_count: usize,
1590 #[serde(default)]
1591 pub esg_disclosure_count: usize,
1592 #[serde(default)]
1594 pub ic_matched_pair_count: usize,
1595 #[serde(default)]
1596 pub ic_elimination_count: usize,
1597 #[serde(default)]
1599 pub ic_transaction_count: usize,
1600 #[serde(default)]
1602 pub fa_subledger_count: usize,
1603 #[serde(default)]
1605 pub inventory_subledger_count: usize,
1606 #[serde(default)]
1608 pub treasury_debt_instrument_count: usize,
1609 #[serde(default)]
1611 pub treasury_hedging_instrument_count: usize,
1612 #[serde(default)]
1614 pub project_count: usize,
1615 #[serde(default)]
1617 pub project_change_order_count: usize,
1618 #[serde(default)]
1620 pub tax_provision_count: usize,
1621 #[serde(default)]
1623 pub opening_balance_count: usize,
1624 #[serde(default)]
1626 pub subledger_reconciliation_count: usize,
1627 #[serde(default)]
1629 pub tax_line_count: usize,
1630 #[serde(default)]
1632 pub project_cost_line_count: usize,
1633 #[serde(default)]
1635 pub cash_position_count: usize,
1636 #[serde(default)]
1638 pub cash_forecast_count: usize,
1639 #[serde(default)]
1641 pub cash_pool_count: usize,
1642 #[serde(default)]
1644 pub process_evolution_event_count: usize,
1645 #[serde(default)]
1647 pub organizational_event_count: usize,
1648 #[serde(default)]
1650 pub counterfactual_pair_count: usize,
1651 #[serde(default)]
1653 pub red_flag_count: usize,
1654 #[serde(default)]
1656 pub collusion_ring_count: usize,
1657 #[serde(default)]
1659 pub temporal_version_chain_count: usize,
1660 #[serde(default)]
1662 pub entity_relationship_node_count: usize,
1663 #[serde(default)]
1665 pub entity_relationship_edge_count: usize,
1666 #[serde(default)]
1668 pub cross_process_link_count: usize,
1669 #[serde(default)]
1671 pub disruption_event_count: usize,
1672 #[serde(default)]
1674 pub industry_gl_account_count: usize,
1675 #[serde(default)]
1677 pub period_close_je_count: usize,
1678}
1679
1680pub struct EnhancedOrchestrator {
1682 config: GeneratorConfig,
1683 phase_config: PhaseConfig,
1684 coa: Option<Arc<ChartOfAccounts>>,
1685 master_data: MasterDataSnapshot,
1686 seed: u64,
1687 multi_progress: Option<MultiProgress>,
1688 resource_guard: ResourceGuard,
1690 output_path: Option<PathBuf>,
1692 copula_generators: Vec<CopulaGeneratorSpec>,
1694 country_pack_registry: datasynth_core::CountryPackRegistry,
1696 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1698 template_provider: datasynth_core::templates::SharedTemplateProvider,
1705 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1712 shard_context: Option<crate::shard_context::ShardContext>,
1715 cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1719}
1720
1721impl EnhancedOrchestrator {
1722 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1724 datasynth_config::validate_config(&config)?;
1725
1726 let seed = config.global.seed.unwrap_or_else(rand::random);
1727
1728 let resource_guard = Self::build_resource_guard(&config, None);
1730
1731 let country_pack_registry = match &config.country_packs {
1733 Some(cp) => {
1734 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1735 .map_err(|e| SynthError::config(e.to_string()))?
1736 }
1737 None => datasynth_core::CountryPackRegistry::builtin_only()
1738 .map_err(|e| SynthError::config(e.to_string()))?,
1739 };
1740
1741 let template_provider = Self::build_template_provider(&config)?;
1745
1746 let temporal_context = Self::build_temporal_context(&config)?;
1750
1751 Ok(Self {
1752 config,
1753 phase_config,
1754 coa: None,
1755 master_data: MasterDataSnapshot::default(),
1756 seed,
1757 multi_progress: None,
1758 resource_guard,
1759 output_path: None,
1760 copula_generators: Vec::new(),
1761 country_pack_registry,
1762 phase_sink: None,
1763 template_provider,
1764 temporal_context,
1765 shard_context: None,
1766 cached_priors: None,
1767 })
1768 }
1769
1770 pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1776 self.shard_context = Some(ctx);
1777 }
1778
1779 fn build_temporal_context(
1785 config: &GeneratorConfig,
1786 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1787 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1788
1789 let tp = &config.temporal_patterns;
1790 if !tp.enabled || !tp.business_days.enabled {
1791 return Ok(None);
1792 }
1793
1794 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1795 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1796 let end_date = start_date + chrono::Months::new(config.global.period_months);
1797
1798 let region_code = tp
1799 .calendars
1800 .regions
1801 .first()
1802 .cloned()
1803 .unwrap_or_else(|| "US".to_string());
1804 let region = parse_region_code(®ion_code);
1805
1806 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1807 }
1808
1809 fn build_template_provider(
1817 config: &GeneratorConfig,
1818 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1819 use datasynth_core::templates::{
1820 loader::{MergeStrategy, TemplateLoader},
1821 DefaultTemplateProvider,
1822 };
1823 use std::sync::Arc;
1824
1825 let provider = match &config.templates.path {
1826 None => DefaultTemplateProvider::new(),
1827 Some(path) => {
1828 let data = if path.is_dir() {
1829 TemplateLoader::load_from_directory(path)
1830 } else {
1831 TemplateLoader::load_from_file(path)
1832 }
1833 .map_err(|e| {
1834 SynthError::config(format!(
1835 "Failed to load templates from {}: {e}",
1836 path.display()
1837 ))
1838 })?;
1839 let strategy = match config.templates.merge_strategy {
1840 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1841 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1842 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1843 MergeStrategy::MergePreferFile
1844 }
1845 };
1846 DefaultTemplateProvider::with_templates(data, strategy)
1847 }
1848 };
1849 Ok(Arc::new(provider))
1850 }
1851
1852 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1854 Self::new(config, PhaseConfig::default())
1855 }
1856
1857 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1859 self.phase_sink = Some(sink);
1860 self
1861 }
1862
1863 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1865 self.phase_sink = Some(sink);
1866 }
1867
1868 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1870 if let Some(ref sink) = self.phase_sink {
1871 for item in items {
1872 if let Ok(value) = serde_json::to_value(item) {
1873 if let Err(e) = sink.emit(phase, type_name, &value) {
1874 warn!(
1875 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1876 );
1877 }
1878 }
1879 }
1880 if let Err(e) = sink.phase_complete(phase) {
1881 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1882 }
1883 }
1884 }
1885
1886 pub fn with_progress(mut self, show: bool) -> Self {
1888 self.phase_config.show_progress = show;
1889 if show {
1890 self.multi_progress = Some(MultiProgress::new());
1891 }
1892 self
1893 }
1894
1895 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1897 let path = path.into();
1898 self.output_path = Some(path.clone());
1899 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1901 self
1902 }
1903
1904 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1906 &self.country_pack_registry
1907 }
1908
1909 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1911 self.country_pack_registry.get_by_str(country)
1912 }
1913
1914 fn primary_country_code(&self) -> &str {
1917 self.config
1918 .companies
1919 .first()
1920 .map(|c| c.country.as_str())
1921 .unwrap_or("US")
1922 }
1923
1924 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1926 self.country_pack_for(self.primary_country_code())
1927 }
1928
1929 fn resolve_coa_framework(&self) -> CoAFramework {
1931 if self.config.accounting_standards.enabled {
1932 match self.config.accounting_standards.framework {
1933 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1934 return CoAFramework::FrenchPcg;
1935 }
1936 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1937 return CoAFramework::GermanSkr04;
1938 }
1939 _ => {}
1940 }
1941 }
1942 let pack = self.primary_pack();
1944 match pack.accounting.framework.as_str() {
1945 "french_gaap" => CoAFramework::FrenchPcg,
1946 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1947 _ => CoAFramework::UsGaap,
1948 }
1949 }
1950
1951 pub fn has_copulas(&self) -> bool {
1956 !self.copula_generators.is_empty()
1957 }
1958
1959 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1965 &self.copula_generators
1966 }
1967
1968 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1972 &mut self.copula_generators
1973 }
1974
1975 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1979 self.copula_generators
1980 .iter_mut()
1981 .find(|c| c.name == copula_name)
1982 .map(|c| c.generator.sample())
1983 }
1984
1985 pub fn from_fingerprint(
2008 fingerprint_path: &std::path::Path,
2009 phase_config: PhaseConfig,
2010 scale: f64,
2011 ) -> SynthResult<Self> {
2012 info!("Loading fingerprint from: {}", fingerprint_path.display());
2013
2014 let reader = FingerprintReader::new();
2016 let fingerprint = reader
2017 .read_from_file(fingerprint_path)
2018 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2019
2020 Self::from_fingerprint_data(fingerprint, phase_config, scale)
2021 }
2022
2023 pub fn from_fingerprint_data(
2030 fingerprint: Fingerprint,
2031 phase_config: PhaseConfig,
2032 scale: f64,
2033 ) -> SynthResult<Self> {
2034 info!(
2035 "Synthesizing config from fingerprint (version: {}, tables: {})",
2036 fingerprint.manifest.version,
2037 fingerprint.schema.tables.len()
2038 );
2039
2040 let seed: u64 = rand::random();
2042 info!("Fingerprint synthesis seed: {}", seed);
2043
2044 let options = SynthesisOptions {
2046 scale,
2047 seed: Some(seed),
2048 preserve_correlations: true,
2049 inject_anomalies: true,
2050 };
2051 let synthesizer = ConfigSynthesizer::with_options(options);
2052
2053 let synthesis_result = synthesizer
2055 .synthesize_full(&fingerprint, seed)
2056 .map_err(|e| {
2057 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2058 })?;
2059
2060 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2062 Self::base_config_for_industry(industry)
2063 } else {
2064 Self::base_config_for_industry("manufacturing")
2065 };
2066
2067 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2069
2070 info!(
2072 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2073 fingerprint.schema.tables.len(),
2074 scale,
2075 synthesis_result.copula_generators.len()
2076 );
2077
2078 if !synthesis_result.copula_generators.is_empty() {
2079 for spec in &synthesis_result.copula_generators {
2080 info!(
2081 " Copula '{}' for table '{}': {} columns",
2082 spec.name,
2083 spec.table,
2084 spec.columns.len()
2085 );
2086 }
2087 }
2088
2089 let mut orchestrator = Self::new(config, phase_config)?;
2091
2092 orchestrator.copula_generators = synthesis_result.copula_generators;
2094
2095 Ok(orchestrator)
2096 }
2097
2098 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2100 use datasynth_config::presets::create_preset;
2101 use datasynth_config::TransactionVolume;
2102 use datasynth_core::models::{CoAComplexity, IndustrySector};
2103
2104 let sector = match industry.to_lowercase().as_str() {
2105 "manufacturing" => IndustrySector::Manufacturing,
2106 "retail" => IndustrySector::Retail,
2107 "financial" | "financial_services" => IndustrySector::FinancialServices,
2108 "healthcare" => IndustrySector::Healthcare,
2109 "technology" | "tech" => IndustrySector::Technology,
2110 _ => IndustrySector::Manufacturing,
2111 };
2112
2113 create_preset(
2115 sector,
2116 1, 12, CoAComplexity::Medium,
2119 TransactionVolume::TenK,
2120 )
2121 }
2122
2123 fn apply_config_patch(
2125 mut config: GeneratorConfig,
2126 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2127 ) -> GeneratorConfig {
2128 use datasynth_fingerprint::synthesis::ConfigValue;
2129
2130 for (key, value) in patch.values() {
2131 match (key.as_str(), value) {
2132 ("transactions.count", ConfigValue::Integer(n)) => {
2135 info!(
2136 "Fingerprint suggests {} transactions (apply via company volumes)",
2137 n
2138 );
2139 }
2140 ("global.period_months", ConfigValue::Integer(n)) => {
2141 config.global.period_months = (*n).clamp(1, 120) as u32;
2142 }
2143 ("global.start_date", ConfigValue::String(s)) => {
2144 config.global.start_date = s.clone();
2145 }
2146 ("global.seed", ConfigValue::Integer(n)) => {
2147 config.global.seed = Some(*n as u64);
2148 }
2149 ("fraud.enabled", ConfigValue::Bool(b)) => {
2150 config.fraud.enabled = *b;
2151 }
2152 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2153 config.fraud.fraud_rate = *f;
2154 }
2155 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2156 config.data_quality.enabled = *b;
2157 }
2158 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2160 config.fraud.enabled = *b;
2161 }
2162 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2163 config.fraud.fraud_rate = *f;
2164 }
2165 _ => {
2166 debug!("Ignoring unknown config patch key: {}", key);
2167 }
2168 }
2169 }
2170
2171 config
2172 }
2173
2174 fn build_resource_guard(
2176 config: &GeneratorConfig,
2177 output_path: Option<PathBuf>,
2178 ) -> ResourceGuard {
2179 let mut builder = ResourceGuardBuilder::new();
2180
2181 if config.global.memory_limit_mb > 0 {
2183 builder = builder.memory_limit(config.global.memory_limit_mb);
2184 }
2185
2186 if let Some(path) = output_path {
2188 builder = builder.output_path(path).min_free_disk(100); }
2190
2191 builder = builder.conservative();
2193
2194 builder.build()
2195 }
2196
2197 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2202 self.resource_guard.check()
2203 }
2204
2205 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2207 let level = self.resource_guard.check()?;
2208
2209 if level != DegradationLevel::Normal {
2210 warn!(
2211 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2212 phase,
2213 level,
2214 self.resource_guard.current_memory_mb(),
2215 self.resource_guard.available_disk_mb()
2216 );
2217 }
2218
2219 Ok(level)
2220 }
2221
2222 fn get_degradation_actions(&self) -> DegradationActions {
2224 self.resource_guard.get_actions()
2225 }
2226
2227 fn check_memory_limit(&self) -> SynthResult<()> {
2229 self.check_resources()?;
2230 Ok(())
2231 }
2232
2233 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2235 info!("Starting enhanced generation workflow");
2236 info!(
2237 "Config: industry={:?}, period_months={}, companies={}",
2238 self.config.global.industry,
2239 self.config.global.period_months,
2240 self.config.companies.len()
2241 );
2242
2243 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2246 datasynth_core::serde_decimal::set_numeric_native(is_native);
2247 struct NumericModeGuard;
2248 impl Drop for NumericModeGuard {
2249 fn drop(&mut self) {
2250 datasynth_core::serde_decimal::set_numeric_native(false);
2251 }
2252 }
2253 let _numeric_guard = if is_native {
2254 Some(NumericModeGuard)
2255 } else {
2256 None
2257 };
2258
2259 let initial_level = self.check_resources_with_log("initial")?;
2261 if initial_level == DegradationLevel::Emergency {
2262 return Err(SynthError::resource(
2263 "Insufficient resources to start generation",
2264 ));
2265 }
2266
2267 let mut stats = EnhancedGenerationStatistics {
2268 companies_count: self.config.companies.len(),
2269 period_months: self.config.global.period_months,
2270 ..Default::default()
2271 };
2272
2273 let coa = self.phase_chart_of_accounts(&mut stats)?;
2275
2276 self.phase_master_data(&mut stats)?;
2278
2279 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2281 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2282 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2283
2284 let (mut document_flows, mut subledger, fa_journal_entries) =
2286 self.phase_document_flows(&mut stats)?;
2287
2288 self.emit_phase_items(
2290 "document_flows",
2291 "PurchaseOrder",
2292 &document_flows.purchase_orders,
2293 );
2294 self.emit_phase_items(
2295 "document_flows",
2296 "GoodsReceipt",
2297 &document_flows.goods_receipts,
2298 );
2299 self.emit_phase_items(
2300 "document_flows",
2301 "VendorInvoice",
2302 &document_flows.vendor_invoices,
2303 );
2304 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2305 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2306
2307 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2309
2310 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2315 .iter()
2316 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2317 .collect();
2318 if !opening_balance_jes.is_empty() {
2319 debug!(
2320 "Prepending {} opening balance JEs to entries",
2321 opening_balance_jes.len()
2322 );
2323 }
2324
2325 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2327
2328 if !opening_balance_jes.is_empty() {
2331 let mut combined = opening_balance_jes;
2332 combined.extend(entries);
2333 entries = combined;
2334 }
2335
2336 if !fa_journal_entries.is_empty() {
2338 debug!(
2339 "Appending {} FA acquisition JEs to main entries",
2340 fa_journal_entries.len()
2341 );
2342 entries.extend(fa_journal_entries);
2343 }
2344
2345 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2347
2348 let actions = self.get_degradation_actions();
2350
2351 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2353
2354 if !sourcing.contracts.is_empty() {
2357 let mut linked_count = 0usize;
2358 let po_vendor_pairs: Vec<(String, String)> = document_flows
2360 .p2p_chains
2361 .iter()
2362 .map(|chain| {
2363 (
2364 chain.purchase_order.vendor_id.clone(),
2365 chain.purchase_order.header.document_id.clone(),
2366 )
2367 })
2368 .collect();
2369
2370 for chain in &mut document_flows.p2p_chains {
2371 if chain.purchase_order.contract_id.is_none() {
2372 if let Some(contract) = sourcing
2373 .contracts
2374 .iter()
2375 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2376 {
2377 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2378 linked_count += 1;
2379 }
2380 }
2381 }
2382
2383 for contract in &mut sourcing.contracts {
2385 let po_ids: Vec<String> = po_vendor_pairs
2386 .iter()
2387 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2388 .map(|(_, po_id)| po_id.clone())
2389 .collect();
2390 if !po_ids.is_empty() {
2391 contract.purchase_order_ids = po_ids;
2392 }
2393 }
2394
2395 if linked_count > 0 {
2396 debug!(
2397 "Linked {} purchase orders to S2C contracts by vendor match",
2398 linked_count
2399 );
2400 }
2401 }
2402
2403 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2405
2406 if !intercompany.seller_journal_entries.is_empty()
2408 || !intercompany.buyer_journal_entries.is_empty()
2409 {
2410 let ic_je_count = intercompany.seller_journal_entries.len()
2411 + intercompany.buyer_journal_entries.len();
2412 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2413 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2414 debug!(
2415 "Appended {} IC journal entries to main entries",
2416 ic_je_count
2417 );
2418 }
2419
2420 if !intercompany.elimination_entries.is_empty() {
2422 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2423 &intercompany.elimination_entries,
2424 );
2425 if !elim_jes.is_empty() {
2426 debug!(
2427 "Appended {} elimination journal entries to main entries",
2428 elim_jes.len()
2429 );
2430 let elim_debit: rust_decimal::Decimal =
2432 elim_jes.iter().map(|je| je.total_debit()).sum();
2433 let elim_credit: rust_decimal::Decimal =
2434 elim_jes.iter().map(|je| je.total_credit()).sum();
2435 let elim_diff = (elim_debit - elim_credit).abs();
2436 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2438 return Err(datasynth_core::error::SynthError::generation(format!(
2439 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2440 elim_debit, elim_credit, elim_diff, tolerance
2441 )));
2442 }
2443 debug!(
2444 "IC elimination balance verified: debits={}, credits={} (diff={})",
2445 elim_debit, elim_credit, elim_diff
2446 );
2447 entries.extend(elim_jes);
2448 }
2449 }
2450
2451 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2453 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2454 document_flows
2455 .customer_invoices
2456 .extend(ic_docs.seller_invoices.iter().cloned());
2457 document_flows
2458 .purchase_orders
2459 .extend(ic_docs.buyer_orders.iter().cloned());
2460 document_flows
2461 .goods_receipts
2462 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2463 document_flows
2464 .vendor_invoices
2465 .extend(ic_docs.buyer_invoices.iter().cloned());
2466 debug!(
2467 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2468 ic_docs.seller_invoices.len(),
2469 ic_docs.buyer_orders.len(),
2470 ic_docs.buyer_goods_receipts.len(),
2471 ic_docs.buyer_invoices.len(),
2472 );
2473 }
2474 }
2475
2476 let hr = self.phase_hr_data(&mut stats)?;
2478
2479 if !hr.payroll_runs.is_empty() {
2481 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2482 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2483 entries.extend(payroll_jes);
2484 }
2485
2486 if !hr.pension_journal_entries.is_empty() {
2488 debug!(
2489 "Generated {} JEs from pension plans",
2490 hr.pension_journal_entries.len()
2491 );
2492 entries.extend(hr.pension_journal_entries.iter().cloned());
2493 }
2494
2495 if !hr.stock_comp_journal_entries.is_empty() {
2497 debug!(
2498 "Generated {} JEs from stock-based compensation",
2499 hr.stock_comp_journal_entries.len()
2500 );
2501 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2502 }
2503
2504 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2506
2507 if !manufacturing_snap.production_orders.is_empty() {
2509 let currency = self
2510 .config
2511 .companies
2512 .first()
2513 .map(|c| c.currency.as_str())
2514 .unwrap_or("USD");
2515 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2516 &manufacturing_snap.production_orders,
2517 &manufacturing_snap.quality_inspections,
2518 currency,
2519 );
2520 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2521 entries.extend(mfg_jes);
2522 }
2523
2524 if !manufacturing_snap.quality_inspections.is_empty() {
2526 let framework = match self.config.accounting_standards.framework {
2527 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2528 _ => "US_GAAP",
2529 };
2530 for company in &self.config.companies {
2531 let company_orders: Vec<_> = manufacturing_snap
2532 .production_orders
2533 .iter()
2534 .filter(|o| o.company_code == company.code)
2535 .cloned()
2536 .collect();
2537 let company_inspections: Vec<_> = manufacturing_snap
2538 .quality_inspections
2539 .iter()
2540 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2541 .cloned()
2542 .collect();
2543 if company_inspections.is_empty() {
2544 continue;
2545 }
2546 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2547 let warranty_result = warranty_gen.generate(
2548 &company.code,
2549 &company_orders,
2550 &company_inspections,
2551 &company.currency,
2552 framework,
2553 );
2554 if !warranty_result.journal_entries.is_empty() {
2555 debug!(
2556 "Generated {} warranty provision JEs for {}",
2557 warranty_result.journal_entries.len(),
2558 company.code
2559 );
2560 entries.extend(warranty_result.journal_entries);
2561 }
2562 }
2563 }
2564
2565 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2567 {
2568 let cogs_currency = self
2569 .config
2570 .companies
2571 .first()
2572 .map(|c| c.currency.as_str())
2573 .unwrap_or("USD");
2574 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2575 &document_flows.deliveries,
2576 &manufacturing_snap.production_orders,
2577 cogs_currency,
2578 );
2579 if !cogs_jes.is_empty() {
2580 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2581 entries.extend(cogs_jes);
2582 }
2583 }
2584
2585 if !manufacturing_snap.inventory_movements.is_empty()
2591 && !subledger.inventory_positions.is_empty()
2592 {
2593 use datasynth_core::models::MovementType as MfgMovementType;
2594 let mut receipt_count = 0usize;
2595 let mut issue_count = 0usize;
2596 for movement in &manufacturing_snap.inventory_movements {
2597 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2599 p.material_id == movement.material_code
2600 && p.company_code == movement.entity_code
2601 }) {
2602 match movement.movement_type {
2603 MfgMovementType::GoodsReceipt => {
2604 pos.add_quantity(
2606 movement.quantity,
2607 movement.value,
2608 movement.movement_date,
2609 );
2610 receipt_count += 1;
2611 }
2612 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2613 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2615 issue_count += 1;
2616 }
2617 _ => {}
2618 }
2619 }
2620 }
2621 debug!(
2622 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2623 manufacturing_snap.inventory_movements.len(),
2624 receipt_count,
2625 issue_count,
2626 );
2627 }
2628
2629 if !entries.is_empty() {
2632 stats.total_entries = entries.len() as u64;
2633 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2634 debug!(
2635 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2636 stats.total_entries, stats.total_line_items
2637 );
2638 }
2639
2640 if self.config.internal_controls.enabled && !entries.is_empty() {
2642 info!("Phase 7b: Applying internal controls to journal entries");
2643 let control_config = ControlGeneratorConfig {
2644 exception_rate: self.config.internal_controls.exception_rate,
2645 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2646 enable_sox_marking: true,
2647 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2648 self.config.internal_controls.sox_materiality_threshold,
2649 )
2650 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2651 ..Default::default()
2652 };
2653 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2654 for entry in &mut entries {
2655 control_gen.apply_controls(entry, &coa);
2656 }
2657 let with_controls = entries
2658 .iter()
2659 .filter(|e| !e.header.control_ids.is_empty())
2660 .count();
2661 info!(
2662 "Applied controls to {} entries ({} with control IDs assigned)",
2663 entries.len(),
2664 with_controls
2665 );
2666 }
2667
2668 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2672 .iter()
2673 .filter(|e| e.header.sod_violation)
2674 .filter_map(|e| {
2675 e.header.sod_conflict_type.map(|ct| {
2676 use datasynth_core::models::{RiskLevel, SodViolation};
2677 let severity = match ct {
2678 datasynth_core::models::SodConflictType::PaymentReleaser
2679 | datasynth_core::models::SodConflictType::RequesterApprover => {
2680 RiskLevel::Critical
2681 }
2682 datasynth_core::models::SodConflictType::PreparerApprover
2683 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2684 | datasynth_core::models::SodConflictType::JournalEntryPoster
2685 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2686 RiskLevel::High
2687 }
2688 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2689 RiskLevel::Medium
2690 }
2691 };
2692 let action = format!(
2693 "SoD conflict {:?} on entry {} ({})",
2694 ct, e.header.document_id, e.header.company_code
2695 );
2696 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2697 })
2698 })
2699 .collect();
2700 if !sod_violations.is_empty() {
2701 info!(
2702 "Phase 7c: Extracted {} SoD violations from {} entries",
2703 sod_violations.len(),
2704 entries.len()
2705 );
2706 }
2707
2708 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2710
2711 {
2719 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2720 if self.config.fraud.enabled && doc_rate > 0.0 {
2721 use datasynth_core::fraud_propagation::{
2722 inject_document_fraud, propagate_documents_to_entries,
2723 };
2724 use datasynth_core::utils::weighted_select;
2725 use datasynth_core::FraudType;
2726 use rand_chacha::rand_core::SeedableRng;
2727
2728 let dist = &self.config.fraud.fraud_type_distribution;
2729 let fraud_type_weights: [(FraudType, f64); 8] = [
2730 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2731 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2732 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2733 (
2734 FraudType::ImproperCapitalization,
2735 dist.expense_capitalization,
2736 ),
2737 (FraudType::SplitTransaction, dist.split_transaction),
2738 (FraudType::TimingAnomaly, dist.timing_anomaly),
2739 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2740 (FraudType::DuplicatePayment, dist.duplicate_payment),
2741 ];
2742 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2743 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2744 if weights_sum <= 0.0 {
2745 FraudType::FictitiousEntry
2746 } else {
2747 *weighted_select(rng, &fraud_type_weights)
2748 }
2749 };
2750
2751 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2752 let mut doc_tagged = 0usize;
2753 macro_rules! inject_into {
2754 ($collection:expr) => {{
2755 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2756 $collection.iter_mut().map(|d| &mut d.header).collect();
2757 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2758 }};
2759 }
2760 inject_into!(document_flows.purchase_orders);
2761 inject_into!(document_flows.goods_receipts);
2762 inject_into!(document_flows.vendor_invoices);
2763 inject_into!(document_flows.payments);
2764 inject_into!(document_flows.sales_orders);
2765 inject_into!(document_flows.deliveries);
2766 inject_into!(document_flows.customer_invoices);
2767 if doc_tagged > 0 {
2768 info!(
2769 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2770 );
2771 }
2772
2773 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2774 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2775 Vec::new();
2776 headers.extend(
2777 document_flows
2778 .purchase_orders
2779 .iter()
2780 .map(|d| d.header.clone()),
2781 );
2782 headers.extend(
2783 document_flows
2784 .goods_receipts
2785 .iter()
2786 .map(|d| d.header.clone()),
2787 );
2788 headers.extend(
2789 document_flows
2790 .vendor_invoices
2791 .iter()
2792 .map(|d| d.header.clone()),
2793 );
2794 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2795 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2796 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2797 headers.extend(
2798 document_flows
2799 .customer_invoices
2800 .iter()
2801 .map(|d| d.header.clone()),
2802 );
2803 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2804 if propagated > 0 {
2805 info!(
2806 "Propagated document-level fraud to {propagated} derived journal entries"
2807 );
2808 }
2809 }
2810 }
2811 }
2812
2813 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2815
2816 {
2834 use datasynth_core::fraud_bias::{
2835 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2836 };
2837 use rand_chacha::rand_core::SeedableRng;
2838 let cfg = FraudBehavioralBiasConfig::default();
2839 if cfg.enabled {
2840 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2841 let mut swept = 0usize;
2842 for entry in entries.iter_mut() {
2843 if entry.header.is_fraud && !entry.header.is_anomaly {
2844 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2845 swept += 1;
2846 }
2847 }
2848 if swept > 0 {
2849 info!(
2850 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2851 (doc-propagated + je_generator intrinsic fraud)"
2852 );
2853 }
2854 }
2855 }
2856
2857 self.emit_phase_items(
2859 "anomaly_injection",
2860 "LabeledAnomaly",
2861 &anomaly_labels.labels,
2862 );
2863
2864 if self.config.fraud.propagate_to_document {
2872 use std::collections::HashMap;
2873 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2886 for je in &entries {
2887 if je.header.is_fraud {
2888 if let Some(ref fraud_type) = je.header.fraud_type {
2889 if let Some(ref reference) = je.header.reference {
2890 fraud_map.insert(reference.clone(), *fraud_type);
2892 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2895 if !bare.is_empty() {
2896 fraud_map.insert(bare.to_string(), *fraud_type);
2897 }
2898 }
2899 }
2900 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2902 }
2903 }
2904 }
2905 if !fraud_map.is_empty() {
2906 let mut propagated = 0usize;
2907 macro_rules! propagate_to {
2909 ($collection:expr) => {
2910 for doc in &mut $collection {
2911 if doc.header.propagate_fraud(&fraud_map) {
2912 propagated += 1;
2913 }
2914 }
2915 };
2916 }
2917 propagate_to!(document_flows.purchase_orders);
2918 propagate_to!(document_flows.goods_receipts);
2919 propagate_to!(document_flows.vendor_invoices);
2920 propagate_to!(document_flows.payments);
2921 propagate_to!(document_flows.sales_orders);
2922 propagate_to!(document_flows.deliveries);
2923 propagate_to!(document_flows.customer_invoices);
2924 if propagated > 0 {
2925 info!(
2926 "Propagated fraud labels to {} document flow records",
2927 propagated
2928 );
2929 }
2930 }
2931 }
2932
2933 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2935
2936 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2938
2939 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2941
2942 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2944
2945 self.phase_tb_drift_correction(&mut entries)?;
2950
2951 let balance_validation = self.phase_balance_validation(&entries)?;
2953
2954 self.validate_coa_coverage(&entries, coa.as_ref())?;
2958
2959 let subledger_reconciliation =
2961 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2962
2963 let (data_quality_stats, quality_issues) =
2965 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2966
2967 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2969
2970 {
2972 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2977 for je in &entries {
2978 if je.header.is_fraud || je.header.is_anomaly {
2979 continue;
2980 }
2981 let diff = (je.total_debit() - je.total_credit()).abs();
2982 if diff > tolerance {
2983 unbalanced_clean += 1;
2984 if unbalanced_clean <= 3 {
2985 warn!(
2986 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2987 je.header.document_id,
2988 je.total_debit(),
2989 je.total_credit(),
2990 diff
2991 );
2992 }
2993 }
2994 }
2995 if unbalanced_clean > 0 {
2996 return Err(datasynth_core::error::SynthError::generation(format!(
2997 "{} non-anomaly JEs are unbalanced (debits != credits). \
2998 First few logged above. Tolerance={}",
2999 unbalanced_clean, tolerance
3000 )));
3001 }
3002 debug!(
3003 "Phase 10c: All {} non-anomaly JEs individually balanced",
3004 entries
3005 .iter()
3006 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3007 .count()
3008 );
3009
3010 let company_codes: Vec<String> = self
3012 .config
3013 .companies
3014 .iter()
3015 .map(|c| c.code.clone())
3016 .collect();
3017 for company_code in &company_codes {
3018 let mut assets = rust_decimal::Decimal::ZERO;
3019 let mut liab_equity = rust_decimal::Decimal::ZERO;
3020
3021 for entry in &entries {
3022 if entry.header.company_code != *company_code {
3023 continue;
3024 }
3025 for line in &entry.lines {
3026 let acct = &line.gl_account;
3027 let net = line.debit_amount - line.credit_amount;
3028 if acct.starts_with('1') {
3030 assets += net;
3031 }
3032 else if acct.starts_with('2') || acct.starts_with('3') {
3034 liab_equity -= net; }
3036 }
3039 }
3040
3041 let bs_diff = (assets - liab_equity).abs();
3042 if bs_diff > tolerance {
3043 warn!(
3044 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3045 revenue/expense closing entries may not fully offset",
3046 company_code, assets, liab_equity, bs_diff
3047 );
3048 } else {
3052 debug!(
3053 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3054 company_code, assets, liab_equity, bs_diff
3055 );
3056 }
3057 }
3058
3059 info!("Phase 10c: All generation-time accounting assertions passed");
3060 }
3061
3062 let audit = self.phase_audit_data(&entries, &mut stats)?;
3064
3065 let mut banking = self.phase_banking_data(&mut stats)?;
3067
3068 if self.phase_config.generate_banking
3073 && !document_flows.payments.is_empty()
3074 && !banking.accounts.is_empty()
3075 {
3076 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3077 if bridge_rate > 0.0 {
3078 let mut bridge =
3079 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3080 self.seed,
3081 );
3082 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3083 &document_flows.payments,
3084 &banking.customers,
3085 &banking.accounts,
3086 bridge_rate,
3087 );
3088 info!(
3089 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3090 bridge_stats.bridged_count,
3091 bridge_stats.transactions_emitted,
3092 bridge_stats.fraud_propagated,
3093 );
3094 let bridged_count = bridged_txns.len();
3095 banking.transactions.extend(bridged_txns);
3096
3097 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3100 datasynth_banking::generators::velocity_computer::compute_velocity_features(
3101 &mut banking.transactions,
3102 );
3103 }
3104
3105 banking.suspicious_count = banking
3107 .transactions
3108 .iter()
3109 .filter(|t| t.is_suspicious)
3110 .count();
3111 stats.banking_transaction_count = banking.transactions.len();
3112 stats.banking_suspicious_count = banking.suspicious_count;
3113 }
3114 }
3115
3116 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3118
3119 self.phase_llm_enrichment(&mut stats);
3121
3122 self.phase_diffusion_enhancement(&entries, &mut stats);
3124
3125 self.phase_causal_overlay(&mut stats);
3127
3128 let mut financial_reporting = self.phase_financial_reporting(
3132 &document_flows,
3133 &entries,
3134 &coa,
3135 &hr,
3136 &audit,
3137 &mut stats,
3138 )?;
3139
3140 {
3142 use datasynth_core::models::StatementType;
3143 for stmt in &financial_reporting.consolidated_statements {
3144 if stmt.statement_type == StatementType::BalanceSheet {
3145 let total_assets: rust_decimal::Decimal = stmt
3146 .line_items
3147 .iter()
3148 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3149 .map(|li| li.amount)
3150 .sum();
3151 let total_le: rust_decimal::Decimal = stmt
3152 .line_items
3153 .iter()
3154 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3155 .map(|li| li.amount)
3156 .sum();
3157 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3158 warn!(
3159 "BS equation imbalance: assets={}, L+E={}",
3160 total_assets, total_le
3161 );
3162 }
3163 }
3164 }
3165 }
3166
3167 let accounting_standards =
3169 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3170
3171 if !accounting_standards.ecl_journal_entries.is_empty() {
3173 debug!(
3174 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3175 accounting_standards.ecl_journal_entries.len()
3176 );
3177 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3178 }
3179
3180 if !accounting_standards.provision_journal_entries.is_empty() {
3182 debug!(
3183 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3184 accounting_standards.provision_journal_entries.len()
3185 );
3186 entries.extend(
3187 accounting_standards
3188 .provision_journal_entries
3189 .iter()
3190 .cloned(),
3191 );
3192 }
3193
3194 let mut ocpm = self.phase_ocpm_events(
3196 &document_flows,
3197 &sourcing,
3198 &hr,
3199 &manufacturing_snap,
3200 &banking,
3201 &audit,
3202 &financial_reporting,
3203 &mut stats,
3204 )?;
3205
3206 if let Some(ref event_log) = ocpm.event_log {
3208 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3209 }
3210
3211 if let Some(ref event_log) = ocpm.event_log {
3213 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3215 std::collections::HashMap::new();
3216 for (idx, event) in event_log.events.iter().enumerate() {
3217 if let Some(ref doc_ref) = event.document_ref {
3218 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3219 }
3220 }
3221
3222 if !doc_index.is_empty() {
3223 let mut annotated = 0usize;
3224 for entry in &mut entries {
3225 let doc_id_str = entry.header.document_id.to_string();
3226 let mut matched_indices: Vec<usize> = Vec::new();
3228 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3229 matched_indices.extend(indices);
3230 }
3231 if let Some(ref reference) = entry.header.reference {
3232 let bare_ref = reference
3233 .find(':')
3234 .map(|i| &reference[i + 1..])
3235 .unwrap_or(reference.as_str());
3236 if let Some(indices) = doc_index.get(bare_ref) {
3237 for &idx in indices {
3238 if !matched_indices.contains(&idx) {
3239 matched_indices.push(idx);
3240 }
3241 }
3242 }
3243 }
3244 if !matched_indices.is_empty() {
3246 for &idx in &matched_indices {
3247 let event = &event_log.events[idx];
3248 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3249 entry.header.ocpm_event_ids.push(event.event_id);
3250 }
3251 for obj_ref in &event.object_refs {
3252 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3253 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3254 }
3255 }
3256 if entry.header.ocpm_case_id.is_none() {
3257 entry.header.ocpm_case_id = event.case_id;
3258 }
3259 }
3260 annotated += 1;
3261 }
3262 }
3263 debug!(
3264 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3265 annotated
3266 );
3267 }
3268 }
3269
3270 if let Some(ref mut event_log) = ocpm.event_log {
3274 let synthesized =
3275 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3276 if synthesized > 0 {
3277 info!(
3278 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3279 );
3280 }
3281
3282 let anomaly_events =
3287 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3288 if anomaly_events > 0 {
3289 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3290 }
3291
3292 let p2p_cfg = &self.config.ocpm.p2p_process;
3297 let any_imperfection = p2p_cfg.rework_probability > 0.0
3298 || p2p_cfg.skip_step_probability > 0.0
3299 || p2p_cfg.out_of_order_probability > 0.0;
3300 if any_imperfection {
3301 use rand_chacha::rand_core::SeedableRng;
3302 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3303 rework_rate: p2p_cfg.rework_probability,
3304 skip_rate: p2p_cfg.skip_step_probability,
3305 out_of_order_rate: p2p_cfg.out_of_order_probability,
3306 };
3307 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3308 let stats =
3309 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3310 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3311 info!(
3312 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3313 stats.rework, stats.skipped, stats.out_of_order
3314 );
3315 }
3316 }
3317 }
3318
3319 let sales_kpi_budgets =
3321 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3322
3323 let treasury =
3327 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3328
3329 if !treasury.journal_entries.is_empty() {
3331 debug!(
3332 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3333 treasury.journal_entries.len()
3334 );
3335 entries.extend(treasury.journal_entries.iter().cloned());
3336 }
3337
3338 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3340
3341 if !tax.tax_posting_journal_entries.is_empty() {
3343 debug!(
3344 "Merging {} tax posting JEs into GL",
3345 tax.tax_posting_journal_entries.len()
3346 );
3347 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3348 }
3349
3350 {
3368 use datasynth_core::fraud_bias::{
3369 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3370 };
3371 use rand_chacha::rand_core::SeedableRng;
3372 let cfg = FraudBehavioralBiasConfig::default();
3373 if cfg.enabled {
3374 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3375 let mut swept = 0usize;
3376 for entry in entries.iter_mut() {
3377 if entry.header.is_fraud && !entry.header.is_anomaly {
3378 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3379 swept += 1;
3380 }
3381 }
3382 if swept > 0 {
3383 info!(
3384 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3385 non-anomaly fraud entries (covers late-added JEs from \
3386 ECL / provisions / treasury / tax / period-close)"
3387 );
3388 }
3389 }
3390 }
3391
3392 {
3396 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3397
3398 let framework_str = {
3399 use datasynth_config::schema::AccountingFrameworkConfig;
3400 match self
3401 .config
3402 .accounting_standards
3403 .framework
3404 .unwrap_or_default()
3405 {
3406 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3407 "IFRS"
3408 }
3409 _ => "US_GAAP",
3410 }
3411 };
3412
3413 let depreciation_total: rust_decimal::Decimal = entries
3415 .iter()
3416 .filter(|je| je.header.document_type == "CL")
3417 .flat_map(|je| je.lines.iter())
3418 .filter(|l| l.gl_account.starts_with("6000"))
3419 .map(|l| l.debit_amount)
3420 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3421
3422 let interest_paid: rust_decimal::Decimal = entries
3424 .iter()
3425 .flat_map(|je| je.lines.iter())
3426 .filter(|l| l.gl_account.starts_with("7100"))
3427 .map(|l| l.debit_amount)
3428 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3429
3430 let tax_paid: rust_decimal::Decimal = entries
3432 .iter()
3433 .flat_map(|je| je.lines.iter())
3434 .filter(|l| l.gl_account.starts_with("8000"))
3435 .map(|l| l.debit_amount)
3436 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3437
3438 let capex: rust_decimal::Decimal = entries
3440 .iter()
3441 .flat_map(|je| je.lines.iter())
3442 .filter(|l| l.gl_account.starts_with("1500"))
3443 .map(|l| l.debit_amount)
3444 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3445
3446 let dividends_paid: rust_decimal::Decimal = entries
3448 .iter()
3449 .flat_map(|je| je.lines.iter())
3450 .filter(|l| l.gl_account == "2170")
3451 .map(|l| l.debit_amount)
3452 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3453
3454 let cf_data = CashFlowSourceData {
3455 depreciation_total,
3456 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3458 delta_ap: rust_decimal::Decimal::ZERO,
3459 delta_inventory: rust_decimal::Decimal::ZERO,
3460 capex,
3461 debt_issuance: rust_decimal::Decimal::ZERO,
3462 debt_repayment: rust_decimal::Decimal::ZERO,
3463 interest_paid,
3464 tax_paid,
3465 dividends_paid,
3466 framework: framework_str.to_string(),
3467 };
3468
3469 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3470 if !enhanced_cf_items.is_empty() {
3471 use datasynth_core::models::StatementType;
3473 let merge_count = enhanced_cf_items.len();
3474 for stmt in financial_reporting
3475 .financial_statements
3476 .iter_mut()
3477 .chain(financial_reporting.consolidated_statements.iter_mut())
3478 .chain(
3479 financial_reporting
3480 .standalone_statements
3481 .values_mut()
3482 .flat_map(|v| v.iter_mut()),
3483 )
3484 {
3485 if stmt.statement_type == StatementType::CashFlowStatement {
3486 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3487 }
3488 }
3489 info!(
3490 "Enhanced cash flow: {} supplementary items merged into CF statements",
3491 merge_count
3492 );
3493 }
3494 }
3495
3496 self.generate_notes_to_financial_statements(
3499 &mut financial_reporting,
3500 &accounting_standards,
3501 &tax,
3502 &hr,
3503 &audit,
3504 &treasury,
3505 );
3506
3507 if self.config.companies.len() >= 2 && !entries.is_empty() {
3511 let companies: Vec<(String, String)> = self
3512 .config
3513 .companies
3514 .iter()
3515 .map(|c| (c.code.clone(), c.name.clone()))
3516 .collect();
3517 let ic_elim: rust_decimal::Decimal =
3518 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3519 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3520 .unwrap_or(NaiveDate::MIN);
3521 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3522 let period_label = format!(
3523 "{}-{:02}",
3524 end_date.year(),
3525 (end_date - chrono::Days::new(1)).month()
3526 );
3527
3528 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3529 let (je_segments, je_recon) =
3530 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3531 if !je_segments.is_empty() {
3532 info!(
3533 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3534 je_segments.len(),
3535 ic_elim,
3536 );
3537 if financial_reporting.segment_reports.is_empty() {
3539 financial_reporting.segment_reports = je_segments;
3540 financial_reporting.segment_reconciliations = vec![je_recon];
3541 } else {
3542 financial_reporting.segment_reports.extend(je_segments);
3543 financial_reporting.segment_reconciliations.push(je_recon);
3544 }
3545 }
3546 }
3547
3548 let esg_snap =
3550 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3551
3552 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3554
3555 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3557
3558 let disruption_events = self.phase_disruption_events(&mut stats)?;
3560
3561 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3563
3564 let (entity_relationship_graph, cross_process_links) =
3566 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3567
3568 let industry_output = self.phase_industry_data(&mut stats);
3570
3571 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3573
3574 if self.config.diffusion.enabled
3592 && (self.config.diffusion.backend == "neural"
3593 || self.config.diffusion.backend == "hybrid")
3594 {
3595 let neural = &self.config.diffusion.neural;
3596 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3597 stats.neural_hybrid_weight = Some(weight);
3598 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3599 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3600 warn!(
3601 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3602 the neural/hybrid training path is not yet shipped. Config \
3603 is captured in stats (weight={weight:.2}, strategy={}, \
3604 columns={}) but no neural training runs. Statistical \
3605 diffusion (backend='statistical') continues to work.",
3606 self.config.diffusion.backend,
3607 neural.hybrid_strategy,
3608 neural.neural_columns.len(),
3609 );
3610 }
3611
3612 self.phase_hypergraph_export(
3614 &coa,
3615 &entries,
3616 &document_flows,
3617 &sourcing,
3618 &hr,
3619 &manufacturing_snap,
3620 &banking,
3621 &audit,
3622 &financial_reporting,
3623 &ocpm,
3624 &compliance_regulations,
3625 &mut stats,
3626 )?;
3627
3628 if self.phase_config.generate_graph_export {
3631 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3632 }
3633
3634 if self.config.streaming.enabled {
3636 info!("Note: streaming config is enabled but batch mode does not use it");
3637 }
3638 if self.config.vendor_network.enabled {
3639 debug!("Vendor network config available; relationship graph generation is partial");
3640 }
3641 if self.config.customer_segmentation.enabled {
3642 debug!("Customer segmentation config available; segment-aware generation is partial");
3643 }
3644
3645 let resource_stats = self.resource_guard.stats();
3647 info!(
3648 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3649 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3650 resource_stats.disk.estimated_bytes_written,
3651 resource_stats.degradation_level
3652 );
3653
3654 if let Some(ref sink) = self.phase_sink {
3656 if let Err(e) = sink.flush() {
3657 warn!("Stream sink flush failed: {e}");
3658 }
3659 }
3660
3661 let lineage = self.build_lineage_graph();
3663
3664 let gate_result = if self.config.quality_gates.enabled {
3666 let profile_name = &self.config.quality_gates.profile;
3667 match datasynth_eval::gates::get_profile(profile_name) {
3668 Some(profile) => {
3669 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3671
3672 if balance_validation.validated {
3674 eval.coherence.balance =
3675 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3676 equation_balanced: balance_validation.is_balanced,
3677 max_imbalance: (balance_validation.total_debits
3678 - balance_validation.total_credits)
3679 .abs(),
3680 periods_evaluated: 1,
3681 periods_imbalanced: if balance_validation.is_balanced {
3682 0
3683 } else {
3684 1
3685 },
3686 period_results: Vec::new(),
3687 companies_evaluated: self.config.companies.len(),
3688 });
3689 }
3690
3691 eval.coherence.passes = balance_validation.is_balanced;
3693 if !balance_validation.is_balanced {
3694 eval.coherence
3695 .failures
3696 .push("Balance sheet equation not satisfied".to_string());
3697 }
3698
3699 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3701 eval.statistical.passes = !entries.is_empty();
3702
3703 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3706
3707 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3708 info!(
3709 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3710 profile_name, result.gates_passed, result.gates_total, result.summary
3711 );
3712 Some(result)
3713 }
3714 None => {
3715 warn!(
3716 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3717 profile_name
3718 );
3719 None
3720 }
3721 }
3722 } else {
3723 None
3724 };
3725
3726 let internal_controls = if self.config.internal_controls.enabled {
3728 InternalControl::standard_controls()
3729 } else {
3730 Vec::new()
3731 };
3732
3733 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3737
3738 let statistical_validation = self.phase_statistical_validation(&entries)?;
3743
3744 let interconnectivity = self.phase_interconnectivity();
3748
3749 let coa_semantic_prior = self
3753 .cached_priors
3754 .as_ref()
3755 .and_then(|p| p.coa_semantic.clone());
3756
3757 Ok(EnhancedGenerationResult {
3758 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3759 master_data: std::mem::take(&mut self.master_data),
3760 document_flows,
3761 subledger,
3762 ocpm,
3763 audit,
3764 banking,
3765 graph_export,
3766 sourcing,
3767 financial_reporting,
3768 hr,
3769 accounting_standards,
3770 manufacturing: manufacturing_snap,
3771 sales_kpi_budgets,
3772 tax,
3773 esg: esg_snap,
3774 treasury,
3775 project_accounting,
3776 process_evolution,
3777 organizational_events,
3778 disruption_events,
3779 intercompany,
3780 journal_entries: entries,
3781 anomaly_labels,
3782 balance_validation,
3783 data_quality_stats,
3784 quality_issues,
3785 statistics: stats,
3786 lineage: Some(lineage),
3787 gate_result,
3788 internal_controls,
3789 sod_violations,
3790 opening_balances,
3791 subledger_reconciliation,
3792 counterfactual_pairs,
3793 red_flags,
3794 collusion_rings,
3795 temporal_vendor_chains,
3796 entity_relationship_graph,
3797 cross_process_links,
3798 industry_output,
3799 coa_semantic_prior,
3800 compliance_regulations,
3801 analytics_metadata,
3802 statistical_validation,
3803 interconnectivity,
3804 })
3805 }
3806
3807 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3811 use rand::{RngExt, SeedableRng};
3812 use rand_chacha::ChaCha8Rng;
3813
3814 let mut snap = InterconnectivitySnapshot::default();
3815 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3816
3817 let vn = &self.config.vendor_network;
3819 if vn.enabled {
3820 let total = self.master_data.vendors.len();
3821 if total > 0 {
3822 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3823 let remaining_after_t1 = total.saturating_sub(tier1_count);
3824 let depth = vn.depth.clamp(1, 3);
3825 let tier2_count = if depth >= 2 {
3826 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3827 (tier1_count * avg).min(remaining_after_t1)
3828 } else {
3829 0
3830 };
3831 let tier3_count = total
3832 .saturating_sub(tier1_count)
3833 .saturating_sub(tier2_count);
3834
3835 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3836 let tier = if idx < tier1_count {
3837 1
3838 } else if idx < tier1_count + tier2_count {
3839 2
3840 } else {
3841 3
3842 };
3843 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3844
3845 let cl = &vn.clusters;
3847 let roll: f64 = rng.random();
3848 let cluster = if roll < cl.reliable_strategic {
3849 "reliable_strategic"
3850 } else if roll < cl.reliable_strategic + cl.standard_operational {
3851 "standard_operational"
3852 } else if roll
3853 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3854 {
3855 "transactional"
3856 } else {
3857 "problematic"
3858 };
3859 snap.vendor_clusters
3860 .push((vendor.vendor_id.clone(), cluster.to_string()));
3861 }
3862 let _ = tier3_count; }
3864 }
3865
3866 let cs = &self.config.customer_segmentation;
3868 if cs.enabled {
3869 let seg = &cs.value_segments;
3870 for customer in &self.master_data.customers {
3871 let roll: f64 = rng.random();
3872 let value_segment = if roll < seg.enterprise.customer_share {
3873 "enterprise"
3874 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3875 "mid_market"
3876 } else if roll
3877 < seg.enterprise.customer_share
3878 + seg.mid_market.customer_share
3879 + seg.smb.customer_share
3880 {
3881 "smb"
3882 } else {
3883 "consumer"
3884 };
3885 snap.customer_value_segments
3886 .push((customer.customer_id.clone(), value_segment.to_string()));
3887
3888 let roll2: f64 = rng.random();
3889 let life = &cs.lifecycle;
3890 let lifecycle = if roll2 < life.prospect_rate {
3891 "prospect"
3892 } else if roll2 < life.prospect_rate + life.new_rate {
3893 "new"
3894 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3895 "growth"
3896 } else if roll2
3897 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3898 {
3899 "mature"
3900 } else if roll2
3901 < life.prospect_rate
3902 + life.new_rate
3903 + life.growth_rate
3904 + life.mature_rate
3905 + life.at_risk_rate
3906 {
3907 "at_risk"
3908 } else if roll2
3909 < life.prospect_rate
3910 + life.new_rate
3911 + life.growth_rate
3912 + life.mature_rate
3913 + life.at_risk_rate
3914 + life.churned_rate
3915 {
3916 "churned"
3917 } else {
3918 "won_back"
3919 };
3920 snap.customer_lifecycle_stages
3921 .push((customer.customer_id.clone(), lifecycle.to_string()));
3922 }
3923 }
3924
3925 let is = &self.config.industry_specific;
3927 if is.enabled {
3928 snap.industry_metadata.push(format!(
3929 "industry_specific.enabled=true (industry={:?})",
3930 self.config.global.industry
3931 ));
3932 }
3933
3934 snap
3935 }
3936
3937 fn phase_chart_of_accounts(
3943 &mut self,
3944 stats: &mut EnhancedGenerationStatistics,
3945 ) -> SynthResult<Arc<ChartOfAccounts>> {
3946 info!("Phase 1: Generating Chart of Accounts");
3947 let coa = self.generate_coa()?;
3948 stats.accounts_count = coa.account_count();
3949 info!(
3950 "Chart of Accounts generated: {} accounts",
3951 stats.accounts_count
3952 );
3953 self.check_resources_with_log("post-coa")?;
3954 Ok(coa)
3955 }
3956
3957 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3959 if self.phase_config.generate_master_data {
3960 info!("Phase 2: Generating Master Data");
3961 self.generate_master_data()?;
3962 stats.vendor_count = self.master_data.vendors.len();
3963 stats.customer_count = self.master_data.customers.len();
3964 stats.material_count = self.master_data.materials.len();
3965 stats.asset_count = self.master_data.assets.len();
3966 stats.employee_count = self.master_data.employees.len();
3967 info!(
3968 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3969 stats.vendor_count, stats.customer_count, stats.material_count,
3970 stats.asset_count, stats.employee_count
3971 );
3972 self.check_resources_with_log("post-master-data")?;
3973 } else {
3974 debug!("Phase 2: Skipped (master data generation disabled)");
3975 }
3976 Ok(())
3977 }
3978
3979 fn phase_document_flows(
3981 &mut self,
3982 stats: &mut EnhancedGenerationStatistics,
3983 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3984 let mut document_flows = DocumentFlowSnapshot::default();
3985 let mut subledger = SubledgerSnapshot::default();
3986 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3989
3990 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3991 info!("Phase 3: Generating Document Flows");
3992 self.generate_document_flows(&mut document_flows)?;
3993 stats.p2p_chain_count = document_flows.p2p_chains.len();
3994 stats.o2c_chain_count = document_flows.o2c_chains.len();
3995 info!(
3996 "Document flows generated: {} P2P chains, {} O2C chains",
3997 stats.p2p_chain_count, stats.o2c_chain_count
3998 );
3999
4000 debug!("Phase 3b: Linking document flows to subledgers");
4002 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4003 stats.ap_invoice_count = subledger.ap_invoices.len();
4004 stats.ar_invoice_count = subledger.ar_invoices.len();
4005 debug!(
4006 "Subledgers linked: {} AP invoices, {} AR invoices",
4007 stats.ap_invoice_count, stats.ar_invoice_count
4008 );
4009
4010 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4015 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4016 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4017 debug!("Payment settlements applied to AP and AR subledgers");
4018
4019 if let Ok(start_date) =
4022 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4023 {
4024 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4025 - chrono::Days::new(1);
4026 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4027 for company in &self.config.companies {
4034 let ar_report = ARAgingReport::from_invoices(
4035 company.code.clone(),
4036 &subledger.ar_invoices,
4037 as_of_date,
4038 );
4039 subledger.ar_aging_reports.push(ar_report);
4040
4041 let ap_report = APAgingReport::from_invoices(
4042 company.code.clone(),
4043 &subledger.ap_invoices,
4044 as_of_date,
4045 );
4046 subledger.ap_aging_reports.push(ap_report);
4047 }
4048 debug!(
4049 "AR/AP aging reports built: {} AR, {} AP",
4050 subledger.ar_aging_reports.len(),
4051 subledger.ap_aging_reports.len()
4052 );
4053
4054 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4056 {
4057 use datasynth_generators::DunningGenerator;
4058 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4059 for company in &self.config.companies {
4060 let currency = company.currency.as_str();
4061 let mut company_invoices: Vec<
4064 datasynth_core::models::subledger::ar::ARInvoice,
4065 > = subledger
4066 .ar_invoices
4067 .iter()
4068 .filter(|inv| inv.company_code == company.code)
4069 .cloned()
4070 .collect();
4071
4072 if company_invoices.is_empty() {
4073 continue;
4074 }
4075
4076 let result = dunning_gen.execute_dunning_run(
4077 &company.code,
4078 as_of_date,
4079 &mut company_invoices,
4080 currency,
4081 );
4082
4083 for updated in &company_invoices {
4085 if let Some(orig) = subledger
4086 .ar_invoices
4087 .iter_mut()
4088 .find(|i| i.invoice_number == updated.invoice_number)
4089 {
4090 orig.dunning_info = updated.dunning_info.clone();
4091 }
4092 }
4093
4094 subledger.dunning_runs.push(result.dunning_run);
4095 subledger.dunning_letters.extend(result.letters);
4096 dunning_journal_entries.extend(result.journal_entries);
4098 }
4099 debug!(
4100 "Dunning runs complete: {} runs, {} letters",
4101 subledger.dunning_runs.len(),
4102 subledger.dunning_letters.len()
4103 );
4104 }
4105 }
4106
4107 self.check_resources_with_log("post-document-flows")?;
4108 } else {
4109 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4110 }
4111
4112 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4114 if !self.master_data.assets.is_empty() {
4115 debug!("Generating FA subledger records");
4116 let company_code = self
4117 .config
4118 .companies
4119 .first()
4120 .map(|c| c.code.as_str())
4121 .unwrap_or("1000");
4122 let currency = self
4123 .config
4124 .companies
4125 .first()
4126 .map(|c| c.currency.as_str())
4127 .unwrap_or("USD");
4128
4129 let mut fa_gen = datasynth_generators::FAGenerator::new(
4130 datasynth_generators::FAGeneratorConfig::default(),
4131 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4132 );
4133
4134 for asset in &self.master_data.assets {
4135 let (record, je) = fa_gen.generate_asset_acquisition(
4136 company_code,
4137 &format!("{:?}", asset.asset_class),
4138 &asset.description,
4139 asset.acquisition_date,
4140 currency,
4141 asset.cost_center.as_deref(),
4142 );
4143 subledger.fa_records.push(record);
4144 fa_journal_entries.push(je);
4145 }
4146
4147 stats.fa_subledger_count = subledger.fa_records.len();
4148 debug!(
4149 "FA subledger records generated: {} (with {} acquisition JEs)",
4150 stats.fa_subledger_count,
4151 fa_journal_entries.len()
4152 );
4153 }
4154
4155 if !self.master_data.materials.is_empty() {
4157 debug!("Generating Inventory subledger records");
4158 let first_company = self.config.companies.first();
4159 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4160 let inv_currency = first_company
4161 .map(|c| c.currency.clone())
4162 .unwrap_or_else(|| "USD".to_string());
4163
4164 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4165 datasynth_generators::InventoryGeneratorConfig::default(),
4166 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4167 inv_currency.clone(),
4168 );
4169
4170 for (i, material) in self.master_data.materials.iter().enumerate() {
4171 let plant = format!("PLANT{:02}", (i % 3) + 1);
4172 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4173 let initial_qty = rust_decimal::Decimal::from(
4174 material
4175 .safety_stock
4176 .to_string()
4177 .parse::<i64>()
4178 .unwrap_or(100),
4179 );
4180
4181 let position = inv_gen.generate_position(
4182 company_code,
4183 &plant,
4184 &storage_loc,
4185 &material.material_id,
4186 &material.description,
4187 initial_qty,
4188 Some(material.standard_cost),
4189 &inv_currency,
4190 );
4191 subledger.inventory_positions.push(position);
4192 }
4193
4194 stats.inventory_subledger_count = subledger.inventory_positions.len();
4195 debug!(
4196 "Inventory subledger records generated: {}",
4197 stats.inventory_subledger_count
4198 );
4199 }
4200
4201 if !subledger.fa_records.is_empty() {
4203 if let Ok(start_date) =
4204 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4205 {
4206 let company_code = self
4207 .config
4208 .companies
4209 .first()
4210 .map(|c| c.code.as_str())
4211 .unwrap_or("1000");
4212 let fiscal_year = start_date.year();
4213 let start_period = start_date.month();
4214 let end_period =
4215 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4216
4217 let depr_cfg = FaDepreciationScheduleConfig {
4218 fiscal_year,
4219 start_period,
4220 end_period,
4221 seed_offset: 800,
4222 };
4223 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4224 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4225 let run_count = runs.len();
4226 subledger.depreciation_runs = runs;
4227 debug!(
4228 "Depreciation runs generated: {} runs for {} periods",
4229 run_count, self.config.global.period_months
4230 );
4231 }
4232 }
4233
4234 if !subledger.inventory_positions.is_empty() {
4236 if let Ok(start_date) =
4237 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4238 {
4239 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4240 - chrono::Days::new(1);
4241
4242 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4243 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4244
4245 for company in &self.config.companies {
4246 let result = inv_val_gen.generate(
4247 &company.code,
4248 &subledger.inventory_positions,
4249 as_of_date,
4250 );
4251 subledger.inventory_valuations.push(result);
4252 }
4253 debug!(
4254 "Inventory valuations generated: {} company reports",
4255 subledger.inventory_valuations.len()
4256 );
4257 }
4258 }
4259
4260 Ok((document_flows, subledger, fa_journal_entries))
4261 }
4262
4263 #[allow(clippy::too_many_arguments)]
4265 fn phase_ocpm_events(
4266 &mut self,
4267 document_flows: &DocumentFlowSnapshot,
4268 sourcing: &SourcingSnapshot,
4269 hr: &HrSnapshot,
4270 manufacturing: &ManufacturingSnapshot,
4271 banking: &BankingSnapshot,
4272 audit: &AuditSnapshot,
4273 financial_reporting: &FinancialReportingSnapshot,
4274 stats: &mut EnhancedGenerationStatistics,
4275 ) -> SynthResult<OcpmSnapshot> {
4276 let degradation = self.check_resources()?;
4277 if degradation >= DegradationLevel::Reduced {
4278 debug!(
4279 "Phase skipped due to resource pressure (degradation: {:?})",
4280 degradation
4281 );
4282 return Ok(OcpmSnapshot::default());
4283 }
4284 if self.phase_config.generate_ocpm_events {
4285 info!("Phase 3c: Generating OCPM Events");
4286 let ocpm_snapshot = self.generate_ocpm_events(
4287 document_flows,
4288 sourcing,
4289 hr,
4290 manufacturing,
4291 banking,
4292 audit,
4293 financial_reporting,
4294 )?;
4295 stats.ocpm_event_count = ocpm_snapshot.event_count;
4296 stats.ocpm_object_count = ocpm_snapshot.object_count;
4297 stats.ocpm_case_count = ocpm_snapshot.case_count;
4298 info!(
4299 "OCPM events generated: {} events, {} objects, {} cases",
4300 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4301 );
4302 self.check_resources_with_log("post-ocpm")?;
4303 Ok(ocpm_snapshot)
4304 } else {
4305 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4306 Ok(OcpmSnapshot::default())
4307 }
4308 }
4309
4310 fn phase_journal_entries(
4312 &mut self,
4313 coa: &Arc<ChartOfAccounts>,
4314 document_flows: &DocumentFlowSnapshot,
4315 _stats: &mut EnhancedGenerationStatistics,
4316 ) -> SynthResult<Vec<JournalEntry>> {
4317 let mut entries = Vec::new();
4318
4319 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4321 debug!("Phase 4a: Generating JEs from document flows");
4322 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4323 debug!("Generated {} JEs from document flows", flow_entries.len());
4324 entries.extend(flow_entries);
4325 }
4326
4327 if self.phase_config.generate_journal_entries {
4329 info!("Phase 4: Generating Journal Entries");
4330 let je_entries = self.generate_journal_entries(coa)?;
4331 info!("Generated {} standalone journal entries", je_entries.len());
4332 entries.extend(je_entries);
4333 } else {
4334 debug!("Phase 4: Skipped (journal entry generation disabled)");
4335 }
4336
4337 if let Some(ctx) = &self.shard_context {
4341 if !ctx.extra_journal_entries.is_empty() {
4342 debug!(
4343 "Phase 4c: appending {} shard-mode IC journal entries",
4344 ctx.extra_journal_entries.len()
4345 );
4346 entries.extend(ctx.extra_journal_entries.iter().cloned());
4347 }
4348 }
4349
4350 if !entries.is_empty() {
4351 self.check_resources_with_log("post-journal-entries")?;
4354 }
4355
4356 Ok(entries)
4357 }
4358
4359 fn phase_anomaly_injection(
4361 &mut self,
4362 entries: &mut [JournalEntry],
4363 actions: &DegradationActions,
4364 stats: &mut EnhancedGenerationStatistics,
4365 ) -> SynthResult<AnomalyLabels> {
4366 if self.phase_config.inject_anomalies
4367 && !entries.is_empty()
4368 && !actions.skip_anomaly_injection
4369 {
4370 info!("Phase 5: Injecting Anomalies");
4371 let result = self.inject_anomalies(entries)?;
4372 stats.anomalies_injected = result.labels.len();
4373 info!("Injected {} anomalies", stats.anomalies_injected);
4374 self.check_resources_with_log("post-anomaly-injection")?;
4375 Ok(result)
4376 } else if actions.skip_anomaly_injection {
4377 warn!("Phase 5: Skipped due to resource degradation");
4378 Ok(AnomalyLabels::default())
4379 } else {
4380 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4381 Ok(AnomalyLabels::default())
4382 }
4383 }
4384
4385 fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4394 let tb_anchor = match &self.cached_priors {
4396 Some(priors) => match &priors.tb_anchor {
4397 Some(anchor) => anchor.clone(),
4398 None => return Ok(()),
4399 },
4400 None => return Ok(()),
4401 };
4402
4403 if !tb_anchor.has_data() {
4404 return Ok(());
4405 }
4406
4407 tracing::info!(
4408 target: "datasynth_runtime::tb_anchor",
4409 accounts = tb_anchor.per_account.len(),
4410 total_assets = tb_anchor.total_assets,
4411 "W8.1 — TB anchor loaded; running drift-correction pass"
4412 );
4413
4414 let tracker_config = BalanceTrackerConfig {
4416 validate_on_each_entry: false,
4417 track_history: false,
4418 fail_on_validation_error: false,
4419 ..Default::default()
4420 };
4421 let currency = self
4422 .config
4423 .companies
4424 .first()
4425 .map(|c| c.currency.clone())
4426 .unwrap_or_else(|| "USD".to_string());
4427
4428 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4429 tracker.set_tb_anchor(tb_anchor.clone());
4430 let _ = tracker.apply_entries(entries);
4431
4432 for company in &self.config.companies {
4436 let code = &company.code;
4437 let drifts = tracker.account_drift(code);
4438 let mut sorted_drifts = drifts.clone();
4439 sorted_drifts.sort_by(|a, b| {
4440 b.1.abs()
4441 .partial_cmp(&a.1.abs())
4442 .unwrap_or(std::cmp::Ordering::Equal)
4443 });
4444 let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4445 let correction_needed = tracker.drift_correction_needed(code);
4446 tracing::info!(
4447 target: "datasynth_runtime::tb_anchor",
4448 company = %code,
4449 anchor_accounts = tb_anchor.per_account.len(),
4450 tracked_accounts = drifts.len(),
4451 aggregate_drift = aggregate_drift,
4452 correction_needed = correction_needed,
4453 "W8.1 SP5.1 — per-company drift summary before correction"
4454 );
4455 for (acc, drift) in sorted_drifts.iter().take(5) {
4456 tracing::info!(
4457 target: "datasynth_runtime::tb_anchor",
4458 company = %code,
4459 account = %acc,
4460 drift = drift,
4461 "W8.1 SP5.1 — top-5 drifted accounts"
4462 );
4463 }
4464 }
4465
4466 let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4468 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4469 .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4470
4471 use rand_chacha::rand_core::SeedableRng as _;
4473 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4474
4475 let mut correction_count = 0usize;
4476 for company in &self.config.companies {
4477 let code = &company.code;
4478 if !tracker.drift_correction_needed(code) {
4479 tracing::debug!(
4480 target: "datasynth_runtime::tb_anchor",
4481 company = %code,
4482 "W8.1 — drift_correction_needed returned false; skipping company"
4483 );
4484 continue;
4485 }
4486 if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4487 tracing::debug!(
4488 target: "datasynth_runtime::tb_anchor",
4489 company = %code,
4490 lines = je.lines.len(),
4491 debit = %je.total_debit(),
4492 credit = %je.total_credit(),
4493 "W8.1 — emitting drift-correction JE"
4494 );
4495 let _ = tracker.apply_entry(&je);
4497 entries.push(je);
4498 correction_count += 1;
4499 }
4500 }
4501
4502 if correction_count > 0 {
4503 tracing::info!(
4504 target: "datasynth_runtime::tb_anchor",
4505 correction_count,
4506 "W8.1 — drift-correction pass emitted {} JE(s)",
4507 correction_count
4508 );
4509 } else {
4510 tracing::debug!(
4511 target: "datasynth_runtime::tb_anchor",
4512 "W8.1 — drift-correction pass: no corrections needed"
4513 );
4514 }
4515
4516 Ok(())
4517 }
4518
4519 fn phase_balance_validation(
4521 &mut self,
4522 entries: &[JournalEntry],
4523 ) -> SynthResult<BalanceValidationResult> {
4524 if self.phase_config.validate_balances && !entries.is_empty() {
4525 debug!("Phase 6: Validating Balances");
4526 let balance_validation = self.validate_journal_entries(entries)?;
4527 if balance_validation.is_balanced {
4528 debug!("Balance validation passed");
4529 } else {
4530 warn!(
4531 "Balance validation found {} errors",
4532 balance_validation.validation_errors.len()
4533 );
4534 }
4535 Ok(balance_validation)
4536 } else {
4537 Ok(BalanceValidationResult::default())
4538 }
4539 }
4540
4541 fn validate_coa_coverage(
4548 &self,
4549 entries: &[JournalEntry],
4550 coa: &ChartOfAccounts,
4551 ) -> SynthResult<()> {
4552 if entries.is_empty() {
4553 return Ok(());
4554 }
4555 let coa_set: std::collections::HashSet<&str> = coa
4556 .accounts
4557 .iter()
4558 .map(|a| a.account_number.as_str())
4559 .collect();
4560 let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4561 for je in entries {
4562 for line in je.lines.iter() {
4563 if !coa_set.contains(line.gl_account.as_str()) {
4564 missing.insert(line.gl_account.clone());
4565 }
4566 }
4567 }
4568 if missing.is_empty() {
4569 debug!("COA coverage validation passed");
4570 return Ok(());
4571 }
4572 let msg = format!(
4573 "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4574 missing.len(),
4575 missing.iter().take(10).collect::<Vec<_>>()
4576 );
4577 if self.phase_config.validate_coa_coverage_strict {
4578 Err(SynthError::generation(msg))
4579 } else {
4580 warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4581 Ok(())
4582 }
4583 }
4584
4585 fn phase_data_quality_injection(
4587 &mut self,
4588 entries: &mut [JournalEntry],
4589 actions: &DegradationActions,
4590 stats: &mut EnhancedGenerationStatistics,
4591 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4592 if self.phase_config.inject_data_quality
4593 && !entries.is_empty()
4594 && !actions.skip_data_quality
4595 {
4596 info!("Phase 7: Injecting Data Quality Variations");
4597 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4598 stats.data_quality_issues = dq_stats.records_with_issues;
4599 info!("Injected {} data quality issues", stats.data_quality_issues);
4600 self.check_resources_with_log("post-data-quality")?;
4601 Ok((dq_stats, quality_issues))
4602 } else if actions.skip_data_quality {
4603 warn!("Phase 7: Skipped due to resource degradation");
4604 Ok((stats_with_denominator(entries.len()), Vec::new()))
4608 } else {
4609 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4610 Ok((stats_with_denominator(entries.len()), Vec::new()))
4611 }
4612 }
4613
4614 fn phase_period_close(
4624 &mut self,
4625 entries: &mut Vec<JournalEntry>,
4626 subledger: &SubledgerSnapshot,
4627 stats: &mut EnhancedGenerationStatistics,
4628 ) -> SynthResult<()> {
4629 if !self.phase_config.generate_period_close || entries.is_empty() {
4630 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4631 return Ok(());
4632 }
4633
4634 info!("Phase 10b: Generating period-close journal entries");
4635
4636 use datasynth_core::accounts::{
4637 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4638 };
4639 use rust_decimal::Decimal;
4640
4641 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4642 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4643 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4644 let close_date = end_date - chrono::Days::new(1);
4646
4647 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4652 .config
4653 .companies
4654 .iter()
4655 .map(|c| c.code.clone())
4656 .collect();
4657
4658 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4660 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4661
4662 let period_months = self.config.global.period_months;
4666 for asset in &subledger.fa_records {
4667 use datasynth_core::models::subledger::fa::AssetStatus;
4669 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4670 continue;
4671 }
4672 let useful_life_months = asset.useful_life_months();
4673 if useful_life_months == 0 {
4674 continue;
4676 }
4677 let salvage_value = asset.salvage_value();
4678 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4679 if depreciable_base == Decimal::ZERO {
4680 continue;
4681 }
4682 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4683 * Decimal::from(period_months))
4684 .round_dp(2);
4685 if period_depr <= Decimal::ZERO {
4686 continue;
4687 }
4688
4689 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4690 depr_header.document_type = "CL".to_string();
4691 depr_header.header_text = Some(format!(
4692 "Depreciation - {} {}",
4693 asset.asset_number, asset.description
4694 ));
4695 depr_header.created_by = "CLOSE_ENGINE".to_string();
4696 depr_header.source = TransactionSource::Automated;
4697 depr_header.business_process = Some(BusinessProcess::R2R);
4698
4699 let doc_id = depr_header.document_id;
4700 let mut depr_je = JournalEntry::new(depr_header);
4701
4702 depr_je.add_line(JournalEntryLine::debit(
4704 doc_id,
4705 1,
4706 expense_accounts::DEPRECIATION.to_string(),
4707 period_depr,
4708 ));
4709 depr_je.add_line(JournalEntryLine::credit(
4711 doc_id,
4712 2,
4713 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4714 period_depr,
4715 ));
4716
4717 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4718 close_jes.push(depr_je);
4719 }
4720
4721 if !subledger.fa_records.is_empty() {
4722 debug!(
4723 "Generated {} depreciation JEs from {} FA records",
4724 close_jes.len(),
4725 subledger.fa_records.len()
4726 );
4727 }
4728
4729 {
4733 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4734 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4735 if let Some(ctx) = &self.temporal_context {
4738 accrual_gen.set_temporal_context(Arc::clone(ctx));
4739 }
4740
4741 let accrual_items: &[(&str, &str, &str)] = &[
4743 ("Accrued Utilities", "6200", "2100"),
4744 ("Accrued Rent", "6300", "2100"),
4745 ("Accrued Interest", "6100", "2150"),
4746 ];
4747
4748 for company_code in &company_codes {
4749 let company_revenue: Decimal = entries
4751 .iter()
4752 .filter(|e| e.header.company_code == *company_code)
4753 .flat_map(|e| e.lines.iter())
4754 .filter(|l| l.gl_account.starts_with('4'))
4755 .map(|l| l.credit_amount - l.debit_amount)
4756 .fold(Decimal::ZERO, |acc, v| acc + v);
4757
4758 if company_revenue <= Decimal::ZERO {
4759 continue;
4760 }
4761
4762 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4764 if accrual_base <= Decimal::ZERO {
4765 continue;
4766 }
4767
4768 for (description, expense_acct, liability_acct) in accrual_items {
4769 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4770 company_code,
4771 description,
4772 accrual_base,
4773 expense_acct,
4774 liability_acct,
4775 close_date,
4776 None,
4777 );
4778 close_jes.push(accrual_je);
4779 if let Some(rev_je) = reversal_je {
4780 close_jes.push(rev_je);
4781 }
4782 }
4783 }
4784
4785 debug!(
4786 "Generated accrual entries for {} companies",
4787 company_codes.len()
4788 );
4789 }
4790
4791 for company_code in &company_codes {
4792 let mut total_revenue = Decimal::ZERO;
4797 let mut total_expenses = Decimal::ZERO;
4798
4799 for entry in entries.iter() {
4800 if entry.header.company_code != *company_code {
4801 continue;
4802 }
4803 for line in &entry.lines {
4804 let category = AccountCategory::from_account(&line.gl_account);
4805 match category {
4806 AccountCategory::Revenue => {
4807 total_revenue += line.credit_amount - line.debit_amount;
4809 }
4810 AccountCategory::Cogs
4811 | AccountCategory::OperatingExpense
4812 | AccountCategory::OtherIncomeExpense
4813 | AccountCategory::Tax => {
4814 total_expenses += line.debit_amount - line.credit_amount;
4816 }
4817 _ => {}
4818 }
4819 }
4820 }
4821
4822 let pre_tax_income = total_revenue - total_expenses;
4823
4824 if pre_tax_income == Decimal::ZERO {
4826 debug!(
4827 "Company {}: no pre-tax income, skipping period close",
4828 company_code
4829 );
4830 continue;
4831 }
4832
4833 if pre_tax_income > Decimal::ZERO {
4835 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4837
4838 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4839 tax_header.document_type = "CL".to_string();
4840 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4841 tax_header.created_by = "CLOSE_ENGINE".to_string();
4842 tax_header.source = TransactionSource::Automated;
4843 tax_header.business_process = Some(BusinessProcess::R2R);
4844
4845 let doc_id = tax_header.document_id;
4846 let mut tax_je = JournalEntry::new(tax_header);
4847
4848 tax_je.add_line(JournalEntryLine::debit(
4850 doc_id,
4851 1,
4852 tax_accounts::TAX_EXPENSE.to_string(),
4853 tax_amount,
4854 ));
4855 tax_je.add_line(JournalEntryLine::credit(
4857 doc_id,
4858 2,
4859 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4860 tax_amount,
4861 ));
4862
4863 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4864 close_jes.push(tax_je);
4865 } else {
4866 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4869 if dta_amount > Decimal::ZERO {
4870 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4871 dta_header.document_type = "CL".to_string();
4872 dta_header.header_text =
4873 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4874 dta_header.created_by = "CLOSE_ENGINE".to_string();
4875 dta_header.source = TransactionSource::Automated;
4876 dta_header.business_process = Some(BusinessProcess::R2R);
4877
4878 let doc_id = dta_header.document_id;
4879 let mut dta_je = JournalEntry::new(dta_header);
4880
4881 dta_je.add_line(JournalEntryLine::debit(
4883 doc_id,
4884 1,
4885 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4886 dta_amount,
4887 ));
4888 dta_je.add_line(JournalEntryLine::credit(
4891 doc_id,
4892 2,
4893 tax_accounts::TAX_EXPENSE.to_string(),
4894 dta_amount,
4895 ));
4896
4897 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4898 close_jes.push(dta_je);
4899 debug!(
4900 "Company {}: loss year — recognised DTA of {}",
4901 company_code, dta_amount
4902 );
4903 }
4904 }
4905
4906 let tax_provision = if pre_tax_income > Decimal::ZERO {
4912 (pre_tax_income * tax_rate).round_dp(2)
4913 } else {
4914 Decimal::ZERO
4915 };
4916 let net_income = pre_tax_income - tax_provision;
4917
4918 if net_income > Decimal::ZERO {
4919 use datasynth_generators::DividendGenerator;
4920 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4922 let currency_str = self
4923 .config
4924 .companies
4925 .iter()
4926 .find(|c| c.code == *company_code)
4927 .map(|c| c.currency.as_str())
4928 .unwrap_or("USD");
4929 let div_result = div_gen.generate(
4930 company_code,
4931 close_date,
4932 Decimal::new(1, 0), dividend_amount,
4934 currency_str,
4935 );
4936 let div_je_count = div_result.journal_entries.len();
4937 close_jes.extend(div_result.journal_entries);
4938 debug!(
4939 "Company {}: declared dividend of {} ({} JEs)",
4940 company_code, dividend_amount, div_je_count
4941 );
4942 }
4943
4944 if net_income != Decimal::ZERO {
4949 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4950 close_header.document_type = "CL".to_string();
4951 close_header.header_text =
4952 Some(format!("Income statement close - {}", company_code));
4953 close_header.created_by = "CLOSE_ENGINE".to_string();
4954 close_header.source = TransactionSource::Automated;
4955 close_header.business_process = Some(BusinessProcess::R2R);
4956
4957 let doc_id = close_header.document_id;
4958 let mut close_je = JournalEntry::new(close_header);
4959
4960 let abs_net_income = net_income.abs();
4961
4962 if net_income > Decimal::ZERO {
4963 close_je.add_line(JournalEntryLine::debit(
4965 doc_id,
4966 1,
4967 equity_accounts::INCOME_SUMMARY.to_string(),
4968 abs_net_income,
4969 ));
4970 close_je.add_line(JournalEntryLine::credit(
4971 doc_id,
4972 2,
4973 equity_accounts::RETAINED_EARNINGS.to_string(),
4974 abs_net_income,
4975 ));
4976 } else {
4977 close_je.add_line(JournalEntryLine::debit(
4979 doc_id,
4980 1,
4981 equity_accounts::RETAINED_EARNINGS.to_string(),
4982 abs_net_income,
4983 ));
4984 close_je.add_line(JournalEntryLine::credit(
4985 doc_id,
4986 2,
4987 equity_accounts::INCOME_SUMMARY.to_string(),
4988 abs_net_income,
4989 ));
4990 }
4991
4992 debug_assert!(
4993 close_je.is_balanced(),
4994 "Income statement closing JE must be balanced"
4995 );
4996 close_jes.push(close_je);
4997 }
4998 }
4999
5000 let close_count = close_jes.len();
5001 if close_count > 0 {
5002 info!("Generated {} period-close journal entries", close_count);
5003 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5004 entries.extend(close_jes);
5005 stats.period_close_je_count = close_count;
5006
5007 stats.total_entries = entries.len() as u64;
5009 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5010 } else {
5011 debug!("No period-close entries generated (no income statement activity)");
5012 }
5013
5014 Ok(())
5015 }
5016
5017 fn phase_audit_data(
5019 &mut self,
5020 entries: &[JournalEntry],
5021 stats: &mut EnhancedGenerationStatistics,
5022 ) -> SynthResult<AuditSnapshot> {
5023 if self.phase_config.generate_audit {
5024 info!("Phase 8: Generating Audit Data");
5025 let audit_snapshot = self.generate_audit_data(entries)?;
5026 stats.audit_engagement_count = audit_snapshot.engagements.len();
5027 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5028 stats.audit_evidence_count = audit_snapshot.evidence.len();
5029 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5030 stats.audit_finding_count = audit_snapshot.findings.len();
5031 stats.audit_judgment_count = audit_snapshot.judgments.len();
5032 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5033 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5034 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5035 stats.audit_sample_count = audit_snapshot.samples.len();
5036 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5037 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5038 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5039 stats.audit_related_party_count = audit_snapshot.related_parties.len();
5040 stats.audit_related_party_transaction_count =
5041 audit_snapshot.related_party_transactions.len();
5042 info!(
5043 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5044 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5045 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5046 {} RP transactions",
5047 stats.audit_engagement_count,
5048 stats.audit_workpaper_count,
5049 stats.audit_evidence_count,
5050 stats.audit_risk_count,
5051 stats.audit_finding_count,
5052 stats.audit_judgment_count,
5053 stats.audit_confirmation_count,
5054 stats.audit_procedure_step_count,
5055 stats.audit_sample_count,
5056 stats.audit_analytical_result_count,
5057 stats.audit_ia_function_count,
5058 stats.audit_ia_report_count,
5059 stats.audit_related_party_count,
5060 stats.audit_related_party_transaction_count,
5061 );
5062 self.check_resources_with_log("post-audit")?;
5063 Ok(audit_snapshot)
5064 } else {
5065 debug!("Phase 8: Skipped (audit generation disabled)");
5066 Ok(AuditSnapshot::default())
5067 }
5068 }
5069
5070 fn phase_banking_data(
5072 &mut self,
5073 stats: &mut EnhancedGenerationStatistics,
5074 ) -> SynthResult<BankingSnapshot> {
5075 if self.phase_config.generate_banking {
5076 info!("Phase 9: Generating Banking KYC/AML Data");
5077 let banking_snapshot = self.generate_banking_data()?;
5078 stats.banking_customer_count = banking_snapshot.customers.len();
5079 stats.banking_account_count = banking_snapshot.accounts.len();
5080 stats.banking_transaction_count = banking_snapshot.transactions.len();
5081 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5082 info!(
5083 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5084 stats.banking_customer_count, stats.banking_account_count,
5085 stats.banking_transaction_count, stats.banking_suspicious_count
5086 );
5087 self.check_resources_with_log("post-banking")?;
5088 Ok(banking_snapshot)
5089 } else {
5090 debug!("Phase 9: Skipped (banking generation disabled)");
5091 Ok(BankingSnapshot::default())
5092 }
5093 }
5094
5095 fn phase_graph_export(
5097 &mut self,
5098 entries: &[JournalEntry],
5099 coa: &Arc<ChartOfAccounts>,
5100 stats: &mut EnhancedGenerationStatistics,
5101 ) -> SynthResult<GraphExportSnapshot> {
5102 if self.phase_config.generate_graph_export && !entries.is_empty() {
5103 info!("Phase 10: Exporting Accounting Network Graphs");
5104 match self.export_graphs(entries, coa, stats) {
5105 Ok(snapshot) => {
5106 info!(
5107 "Graph export complete: {} graphs ({} nodes, {} edges)",
5108 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5109 );
5110 Ok(snapshot)
5111 }
5112 Err(e) => {
5113 warn!("Phase 10: Graph export failed: {}", e);
5114 Ok(GraphExportSnapshot::default())
5115 }
5116 }
5117 } else {
5118 debug!("Phase 10: Skipped (graph export disabled or no entries)");
5119 Ok(GraphExportSnapshot::default())
5120 }
5121 }
5122
5123 #[allow(clippy::too_many_arguments)]
5125 fn phase_hypergraph_export(
5126 &self,
5127 coa: &Arc<ChartOfAccounts>,
5128 entries: &[JournalEntry],
5129 document_flows: &DocumentFlowSnapshot,
5130 sourcing: &SourcingSnapshot,
5131 hr: &HrSnapshot,
5132 manufacturing: &ManufacturingSnapshot,
5133 banking: &BankingSnapshot,
5134 audit: &AuditSnapshot,
5135 financial_reporting: &FinancialReportingSnapshot,
5136 ocpm: &OcpmSnapshot,
5137 compliance: &ComplianceRegulationsSnapshot,
5138 stats: &mut EnhancedGenerationStatistics,
5139 ) -> SynthResult<()> {
5140 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5141 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5142 match self.export_hypergraph(
5143 coa,
5144 entries,
5145 document_flows,
5146 sourcing,
5147 hr,
5148 manufacturing,
5149 banking,
5150 audit,
5151 financial_reporting,
5152 ocpm,
5153 compliance,
5154 stats,
5155 ) {
5156 Ok(info) => {
5157 info!(
5158 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5159 info.node_count, info.edge_count, info.hyperedge_count
5160 );
5161 }
5162 Err(e) => {
5163 warn!("Phase 10b: Hypergraph export failed: {}", e);
5164 }
5165 }
5166 } else {
5167 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5168 }
5169 Ok(())
5170 }
5171
5172 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5178 if !self.config.llm.enabled {
5179 debug!("Phase 11: Skipped (LLM enrichment disabled)");
5180 return;
5181 }
5182
5183 info!("Phase 11: Starting LLM Enrichment");
5184 let start = std::time::Instant::now();
5185
5186 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5187 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5190 let schema_provider = &self.config.llm.provider;
5191 let api_key_env = match schema_provider.as_str() {
5192 "openai" => Some("OPENAI_API_KEY"),
5193 "anthropic" => Some("ANTHROPIC_API_KEY"),
5194 "custom" => Some("LLM_API_KEY"),
5195 _ => None,
5196 };
5197 if let Some(key_env) = api_key_env {
5198 if std::env::var(key_env).is_ok() {
5199 let llm_config = datasynth_core::llm::LlmConfig {
5200 model: self.config.llm.model.clone(),
5201 api_key_env: key_env.to_string(),
5202 ..datasynth_core::llm::LlmConfig::default()
5203 };
5204 match HttpLlmProvider::new(llm_config) {
5205 Ok(p) => Arc::new(p),
5206 Err(e) => {
5207 warn!(
5208 "Failed to create HttpLlmProvider: {}; falling back to mock",
5209 e
5210 );
5211 Arc::new(MockLlmProvider::new(self.seed))
5212 }
5213 }
5214 } else {
5215 Arc::new(MockLlmProvider::new(self.seed))
5216 }
5217 } else {
5218 Arc::new(MockLlmProvider::new(self.seed))
5219 }
5220 };
5221 let industry = format!("{:?}", self.config.global.industry);
5225
5226 let vendor_enricher =
5227 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5228 let max_vendors = self
5229 .config
5230 .llm
5231 .max_vendor_enrichments
5232 .min(self.master_data.vendors.len());
5233 let mut vendors_enriched = 0usize;
5234 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5235 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5236 Ok(name) => {
5237 vendor.name = name;
5238 vendors_enriched += 1;
5239 }
5240 Err(e) => warn!(
5241 "LLM vendor enrichment failed for {}: {}",
5242 vendor.vendor_id, e
5243 ),
5244 }
5245 }
5246
5247 let mut customers_enriched = 0usize;
5248 if self.config.llm.enrich_customers {
5249 let customer_enricher =
5250 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5251 &provider,
5252 ));
5253 let max_customers = self
5254 .config
5255 .llm
5256 .max_customer_enrichments
5257 .min(self.master_data.customers.len());
5258 for customer in self.master_data.customers.iter_mut().take(max_customers) {
5259 match customer_enricher.enrich_customer_name(
5260 &industry,
5261 "general",
5262 &customer.country,
5263 ) {
5264 Ok(name) => {
5265 customer.name = name;
5266 customers_enriched += 1;
5267 }
5268 Err(e) => warn!(
5269 "LLM customer enrichment failed for {}: {}",
5270 customer.customer_id, e
5271 ),
5272 }
5273 }
5274 }
5275
5276 let mut materials_enriched = 0usize;
5277 if self.config.llm.enrich_materials {
5278 let material_enricher =
5279 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5280 &provider,
5281 ));
5282 let max_materials = self
5283 .config
5284 .llm
5285 .max_material_enrichments
5286 .min(self.master_data.materials.len());
5287 for material in self.master_data.materials.iter_mut().take(max_materials) {
5288 let material_type = format!("{:?}", material.material_type);
5289 match material_enricher.enrich_material_description(&material_type, &industry) {
5290 Ok(desc) => {
5291 material.description = desc;
5292 materials_enriched += 1;
5293 }
5294 Err(e) => warn!(
5295 "LLM material enrichment failed for {}: {}",
5296 material.material_id, e
5297 ),
5298 }
5299 }
5300 }
5301
5302 (vendors_enriched, customers_enriched, materials_enriched)
5303 }));
5304
5305 match result {
5306 Ok((v, c, m)) => {
5307 stats.llm_vendors_enriched = v;
5308 stats.llm_customers_enriched = c;
5309 stats.llm_materials_enriched = m;
5310 let elapsed = start.elapsed();
5311 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5312 info!(
5313 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5314 v, c, m, stats.llm_enrichment_ms
5315 );
5316 }
5317 Err(_) => {
5318 let elapsed = start.elapsed();
5319 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5320 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5321 }
5322 }
5323 }
5324
5325 fn phase_diffusion_enhancement(
5337 &self,
5338 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5339 stats: &mut EnhancedGenerationStatistics,
5340 ) {
5341 if !self.config.diffusion.enabled {
5342 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5343 return;
5344 }
5345
5346 info!("Phase 12: Starting Diffusion Enhancement");
5347 let start = std::time::Instant::now();
5348
5349 let backend_choice = self.config.diffusion.backend.as_str();
5350 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5351
5352 if use_neural {
5353 #[cfg(feature = "neural")]
5354 {
5355 match self.run_neural_diffusion_phase(entries) {
5356 Ok(sample_count) => {
5357 stats.diffusion_samples_generated = sample_count;
5358 let elapsed = start.elapsed();
5359 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5360 info!(
5361 "Phase 12 complete ({}): {} samples in {}ms",
5362 backend_choice, sample_count, stats.diffusion_enhancement_ms
5363 );
5364 return;
5365 }
5366 Err(e) => {
5367 warn!(
5368 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5369 );
5370 }
5372 }
5373 }
5374 #[cfg(not(feature = "neural"))]
5375 {
5376 warn!(
5377 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5378 not compiled in — falling back to statistical. Rebuild with \
5379 `--features neural` (or `neural-cuda` for GPU) to enable.",
5380 backend_choice
5381 );
5382 }
5383 } else if !matches!(backend_choice, "statistical" | "") {
5384 warn!(
5385 "Phase 12: unknown backend '{}', falling back to statistical",
5386 backend_choice
5387 );
5388 }
5389
5390 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5392 let means = vec![5000.0, 3.0, 2.0];
5393 let stds = vec![2000.0, 1.5, 1.0];
5394
5395 let diffusion_config = DiffusionConfig {
5396 n_steps: self.config.diffusion.n_steps,
5397 seed: self.seed,
5398 ..Default::default()
5399 };
5400
5401 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5402 let n_samples = self.config.diffusion.sample_size;
5403 let n_features = 3;
5404 backend.generate(n_samples, n_features, self.seed).len()
5405 }));
5406
5407 match result {
5408 Ok(sample_count) => {
5409 stats.diffusion_samples_generated = sample_count;
5410 let elapsed = start.elapsed();
5411 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5412 info!(
5413 "Phase 12 complete (statistical): {} samples in {}ms",
5414 sample_count, stats.diffusion_enhancement_ms
5415 );
5416 }
5417 Err(_) => {
5418 let elapsed = start.elapsed();
5419 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5420 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5421 }
5422 }
5423 }
5424
5425 #[cfg(feature = "neural")]
5430 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5431 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5432
5433 if entries.is_empty() {
5434 return Err(SynthError::generation(
5435 "neural diffusion: no journal entries available as training data",
5436 ));
5437 }
5438
5439 let training_data: Vec<Vec<f64>> = entries
5440 .iter()
5441 .take(5000)
5442 .map(|je| {
5443 let total_amount: f64 = je
5444 .lines
5445 .iter()
5446 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5447 .map(|l| {
5448 use rust_decimal::prelude::ToPrimitive;
5449 l.debit_amount.to_f64().unwrap_or(0.0)
5450 })
5451 .sum();
5452 let line_count = je.lines.len() as f64;
5453 let approval_level = je
5456 .header
5457 .approval_workflow
5458 .as_ref()
5459 .map(|w| w.required_levels as f64)
5460 .unwrap_or(1.0);
5461 vec![total_amount, line_count, approval_level]
5462 })
5463 .collect();
5464
5465 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5466
5467 let cfg = &self.config.diffusion;
5468 let neural_cfg = &cfg.neural;
5469
5470 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5471 neural_cfg.checkpoint_path.as_ref()
5472 {
5473 let path = std::path::Path::new(ckpt_path);
5474 info!(
5475 " Neural diffusion: loading checkpoint from {}",
5476 path.display()
5477 );
5478 NeuralDiffusionBackend::load(path)
5479 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5480 } else {
5481 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5482 info!(
5483 " Neural diffusion: training score network on {} rows × {} features, \
5484 {} epochs, hidden_dims={:?}",
5485 training_data.len(),
5486 n_features,
5487 neural_cfg.training_epochs,
5488 neural_cfg.hidden_dims
5489 );
5490 let training_config = NeuralTrainingConfig {
5491 n_steps: cfg.n_steps,
5492 schedule: cfg.schedule.clone(),
5493 hidden_dims: neural_cfg.hidden_dims.clone(),
5494 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5495 learning_rate: neural_cfg.learning_rate,
5496 epochs: neural_cfg.training_epochs,
5497 batch_size: neural_cfg.batch_size,
5498 };
5499 let (backend, report) =
5500 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5501 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5502 info!(
5503 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5504 report.epochs_completed, report.final_loss
5505 );
5506 backend
5507 };
5508
5509 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5510 Ok(samples.len())
5511 }
5512
5513 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5520 if !self.config.causal.enabled {
5521 debug!("Phase 13: Skipped (causal generation disabled)");
5522 return;
5523 }
5524
5525 info!("Phase 13: Starting Causal Overlay");
5526 let start = std::time::Instant::now();
5527
5528 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5529 let graph = match self.config.causal.template.as_str() {
5531 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5532 _ => CausalGraph::fraud_detection_template(),
5533 };
5534
5535 let scm = StructuralCausalModel::new(graph.clone())
5536 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5537
5538 let n_samples = self.config.causal.sample_size;
5539 let samples = scm
5540 .generate(n_samples, self.seed)
5541 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5542
5543 let validation_passed = if self.config.causal.validate {
5545 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5546 if report.valid {
5547 info!(
5548 "Causal validation passed: all {} checks OK",
5549 report.checks.len()
5550 );
5551 } else {
5552 warn!(
5553 "Causal validation: {} violations detected: {:?}",
5554 report.violations.len(),
5555 report.violations
5556 );
5557 }
5558 Some(report.valid)
5559 } else {
5560 None
5561 };
5562
5563 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5564 }));
5565
5566 match result {
5567 Ok(Ok((sample_count, validation_passed))) => {
5568 stats.causal_samples_generated = sample_count;
5569 stats.causal_validation_passed = validation_passed;
5570 let elapsed = start.elapsed();
5571 stats.causal_generation_ms = elapsed.as_millis() as u64;
5572 info!(
5573 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5574 sample_count, stats.causal_generation_ms, validation_passed,
5575 );
5576 }
5577 Ok(Err(e)) => {
5578 let elapsed = start.elapsed();
5579 stats.causal_generation_ms = elapsed.as_millis() as u64;
5580 warn!("Phase 13: Causal generation failed: {}", e);
5581 }
5582 Err(_) => {
5583 let elapsed = start.elapsed();
5584 stats.causal_generation_ms = elapsed.as_millis() as u64;
5585 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5586 }
5587 }
5588 }
5589
5590 fn phase_sourcing_data(
5592 &mut self,
5593 stats: &mut EnhancedGenerationStatistics,
5594 ) -> SynthResult<SourcingSnapshot> {
5595 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5596 debug!("Phase 14: Skipped (sourcing generation disabled)");
5597 return Ok(SourcingSnapshot::default());
5598 }
5599 let degradation = self.check_resources()?;
5600 if degradation >= DegradationLevel::Reduced {
5601 debug!(
5602 "Phase skipped due to resource pressure (degradation: {:?})",
5603 degradation
5604 );
5605 return Ok(SourcingSnapshot::default());
5606 }
5607
5608 info!("Phase 14: Generating S2C Sourcing Data");
5609 let seed = self.seed;
5610
5611 let vendor_ids: Vec<String> = self
5613 .master_data
5614 .vendors
5615 .iter()
5616 .map(|v| v.vendor_id.clone())
5617 .collect();
5618 if vendor_ids.is_empty() {
5619 debug!("Phase 14: Skipped (no vendors available)");
5620 return Ok(SourcingSnapshot::default());
5621 }
5622
5623 let categories: Vec<(String, String)> = vec![
5624 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5625 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5626 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5627 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5628 ("CAT-LOG".to_string(), "Logistics".to_string()),
5629 ];
5630 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5631 .iter()
5632 .map(|(id, name)| {
5633 (
5634 id.clone(),
5635 name.clone(),
5636 rust_decimal::Decimal::from(100_000),
5637 )
5638 })
5639 .collect();
5640
5641 let company_code = self
5642 .config
5643 .companies
5644 .first()
5645 .map(|c| c.code.as_str())
5646 .unwrap_or("1000");
5647 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5648 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5649 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5650 let fiscal_year = start_date.year() as u16;
5651 let owner_ids: Vec<String> = self
5652 .master_data
5653 .employees
5654 .iter()
5655 .take(5)
5656 .map(|e| e.employee_id.clone())
5657 .collect();
5658 let owner_id = owner_ids
5659 .first()
5660 .map(std::string::String::as_str)
5661 .unwrap_or("BUYER-001");
5662
5663 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5665 let spend_analyses =
5666 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5667
5668 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5670 let sourcing_projects = if owner_ids.is_empty() {
5671 Vec::new()
5672 } else {
5673 project_gen.generate(
5674 company_code,
5675 &categories_with_spend,
5676 &owner_ids,
5677 start_date,
5678 self.config.global.period_months,
5679 )
5680 };
5681 stats.sourcing_project_count = sourcing_projects.len();
5682
5683 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5685 let mut qual_gen = QualificationGenerator::new(seed + 2);
5686 let qualifications = qual_gen.generate(
5687 company_code,
5688 &qual_vendor_ids,
5689 sourcing_projects.first().map(|p| p.project_id.as_str()),
5690 owner_id,
5691 start_date,
5692 );
5693
5694 let mut rfx_gen = RfxGenerator::new(seed + 3);
5696 let rfx_events: Vec<RfxEvent> = sourcing_projects
5697 .iter()
5698 .map(|proj| {
5699 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5700 rfx_gen.generate(
5701 company_code,
5702 &proj.project_id,
5703 &proj.category_id,
5704 &qualified_vids,
5705 owner_id,
5706 start_date,
5707 50000.0,
5708 )
5709 })
5710 .collect();
5711 stats.rfx_event_count = rfx_events.len();
5712
5713 let mut bid_gen = BidGenerator::new(seed + 4);
5715 let mut all_bids = Vec::new();
5716 for rfx in &rfx_events {
5717 let bidder_count = vendor_ids.len().clamp(2, 5);
5718 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5719 let bids = bid_gen.generate(rfx, &responding, start_date);
5720 all_bids.extend(bids);
5721 }
5722 stats.bid_count = all_bids.len();
5723
5724 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5726 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5727 .iter()
5728 .map(|rfx| {
5729 let rfx_bids: Vec<SupplierBid> = all_bids
5730 .iter()
5731 .filter(|b| b.rfx_id == rfx.rfx_id)
5732 .cloned()
5733 .collect();
5734 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5735 })
5736 .collect();
5737
5738 let mut contract_gen = ContractGenerator::new(seed + 6);
5740 let contracts: Vec<ProcurementContract> = bid_evaluations
5741 .iter()
5742 .zip(rfx_events.iter())
5743 .filter_map(|(eval, rfx)| {
5744 eval.ranked_bids.first().and_then(|winner| {
5745 all_bids
5746 .iter()
5747 .find(|b| b.bid_id == winner.bid_id)
5748 .map(|winning_bid| {
5749 contract_gen.generate_from_bid(
5750 winning_bid,
5751 Some(&rfx.sourcing_project_id),
5752 &rfx.category_id,
5753 owner_id,
5754 start_date,
5755 )
5756 })
5757 })
5758 })
5759 .collect();
5760 stats.contract_count = contracts.len();
5761
5762 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5764 let catalog_items = catalog_gen.generate(&contracts);
5765 stats.catalog_item_count = catalog_items.len();
5766
5767 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5769 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5770 .iter()
5771 .fold(
5772 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5773 |mut acc, c| {
5774 acc.entry(c.vendor_id.clone()).or_default().push(c);
5775 acc
5776 },
5777 )
5778 .into_iter()
5779 .collect();
5780 let scorecards = scorecard_gen.generate(
5781 company_code,
5782 &vendor_contracts,
5783 start_date,
5784 end_date,
5785 owner_id,
5786 );
5787 stats.scorecard_count = scorecards.len();
5788
5789 let mut sourcing_projects = sourcing_projects;
5792 for project in &mut sourcing_projects {
5793 project.rfx_ids = rfx_events
5795 .iter()
5796 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5797 .map(|rfx| rfx.rfx_id.clone())
5798 .collect();
5799
5800 project.contract_id = contracts
5802 .iter()
5803 .find(|c| {
5804 c.sourcing_project_id
5805 .as_deref()
5806 .is_some_and(|sp| sp == project.project_id)
5807 })
5808 .map(|c| c.contract_id.clone());
5809
5810 project.spend_analysis_id = spend_analyses
5812 .iter()
5813 .find(|sa| sa.category_id == project.category_id)
5814 .map(|sa| sa.category_id.clone());
5815 }
5816
5817 info!(
5818 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5819 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5820 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5821 );
5822 self.check_resources_with_log("post-sourcing")?;
5823
5824 Ok(SourcingSnapshot {
5825 spend_analyses,
5826 sourcing_projects,
5827 qualifications,
5828 rfx_events,
5829 bids: all_bids,
5830 bid_evaluations,
5831 contracts,
5832 catalog_items,
5833 scorecards,
5834 })
5835 }
5836
5837 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5843 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5844
5845 let parent_code = self
5846 .config
5847 .companies
5848 .first()
5849 .map(|c| c.code.clone())
5850 .unwrap_or_else(|| "PARENT".to_string());
5851
5852 let mut group = GroupStructure::new(parent_code);
5853
5854 for company in self.config.companies.iter().skip(1) {
5855 let sub =
5856 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5857 group.add_subsidiary(sub);
5858 }
5859
5860 group
5861 }
5862
5863 fn phase_intercompany(
5865 &mut self,
5866 journal_entries: &[JournalEntry],
5867 stats: &mut EnhancedGenerationStatistics,
5868 ) -> SynthResult<IntercompanySnapshot> {
5869 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5871 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5872 return Ok(IntercompanySnapshot::default());
5873 }
5874
5875 if self.config.companies.len() < 2 {
5877 debug!(
5878 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5879 self.config.companies.len()
5880 );
5881 return Ok(IntercompanySnapshot::default());
5882 }
5883
5884 info!("Phase 14b: Generating Intercompany Transactions");
5885
5886 let group_structure = self.build_group_structure();
5889 debug!(
5890 "Group structure built: parent={}, subsidiaries={}",
5891 group_structure.parent_entity,
5892 group_structure.subsidiaries.len()
5893 );
5894
5895 let seed = self.seed;
5896 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5897 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5898 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5899
5900 let parent_code = self.config.companies[0].code.clone();
5903 let mut ownership_structure =
5904 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5905
5906 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5907 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5908 format!("REL{:03}", i + 1),
5909 parent_code.clone(),
5910 company.code.clone(),
5911 rust_decimal::Decimal::from(100), start_date,
5913 );
5914 ownership_structure.add_relationship(relationship);
5915 }
5916
5917 let tp_method = match self.config.intercompany.transfer_pricing_method {
5919 datasynth_config::schema::TransferPricingMethod::CostPlus => {
5920 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5921 }
5922 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5923 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5924 }
5925 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5926 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5927 }
5928 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5929 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5930 }
5931 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5932 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5933 }
5934 };
5935
5936 let ic_currency = self
5938 .config
5939 .companies
5940 .first()
5941 .map(|c| c.currency.clone())
5942 .unwrap_or_else(|| "USD".to_string());
5943 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5944 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5945 transfer_pricing_method: tp_method,
5946 markup_percent: rust_decimal::Decimal::from_f64_retain(
5947 self.config.intercompany.markup_percent,
5948 )
5949 .unwrap_or(rust_decimal::Decimal::from(5)),
5950 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5951 default_currency: ic_currency,
5952 ..Default::default()
5953 };
5954
5955 let mut ic_generator = datasynth_generators::ICGenerator::new(
5957 ic_gen_config,
5958 ownership_structure.clone(),
5959 seed + 50,
5960 );
5961
5962 let transactions_per_day = 3;
5965 let matched_pairs = ic_generator.generate_transactions_for_period(
5966 start_date,
5967 end_date,
5968 transactions_per_day,
5969 );
5970
5971 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5973 debug!(
5974 "Generated {} IC seller invoices, {} IC buyer POs",
5975 ic_doc_chains.seller_invoices.len(),
5976 ic_doc_chains.buyer_orders.len()
5977 );
5978
5979 let mut seller_entries = Vec::new();
5981 let mut buyer_entries = Vec::new();
5982 let fiscal_year = start_date.year();
5983
5984 for pair in &matched_pairs {
5985 let fiscal_period = pair.posting_date.month();
5986 let (seller_je, buyer_je) =
5987 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5988 seller_entries.push(seller_je);
5989 buyer_entries.push(buyer_je);
5990 }
5991
5992 let matching_config = datasynth_generators::ICMatchingConfig {
5994 base_currency: self
5995 .config
5996 .companies
5997 .first()
5998 .map(|c| c.currency.clone())
5999 .unwrap_or_else(|| "USD".to_string()),
6000 ..Default::default()
6001 };
6002 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6003 matching_engine.load_matched_pairs(&matched_pairs);
6004 let matching_result = matching_engine.run_matching(end_date);
6005
6006 let mut elimination_entries = Vec::new();
6008 if self.config.intercompany.generate_eliminations {
6009 let elim_config = datasynth_generators::EliminationConfig {
6010 consolidation_entity: "GROUP".to_string(),
6011 base_currency: self
6012 .config
6013 .companies
6014 .first()
6015 .map(|c| c.currency.clone())
6016 .unwrap_or_else(|| "USD".to_string()),
6017 ..Default::default()
6018 };
6019
6020 let mut elim_generator =
6021 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6022
6023 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6024 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6025 matching_result
6026 .matched_balances
6027 .iter()
6028 .chain(matching_result.unmatched_balances.iter())
6029 .cloned()
6030 .collect();
6031
6032 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6044 std::collections::HashMap::new();
6045 let mut equity_amounts: std::collections::HashMap<
6046 String,
6047 std::collections::HashMap<String, rust_decimal::Decimal>,
6048 > = std::collections::HashMap::new();
6049 {
6050 use rust_decimal::Decimal;
6051 let hundred = Decimal::from(100u32);
6052 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
6056 for sub in &group_structure.subsidiaries {
6057 let net_assets = {
6058 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6059 if na > Decimal::ZERO {
6060 na
6061 } else {
6062 Decimal::from(1_000_000u64)
6063 }
6064 };
6065 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6067 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6068
6069 let mut eq_map = std::collections::HashMap::new();
6072 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6073 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6074 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6075 equity_amounts.insert(sub.entity_code.clone(), eq_map);
6076 }
6077 }
6078
6079 let journal = elim_generator.generate_eliminations(
6080 &fiscal_period,
6081 end_date,
6082 &all_balances,
6083 &matched_pairs,
6084 &investment_amounts,
6085 &equity_amounts,
6086 );
6087
6088 elimination_entries = journal.entries.clone();
6089 }
6090
6091 let matched_pair_count = matched_pairs.len();
6092 let elimination_entry_count = elimination_entries.len();
6093 let match_rate = matching_result.match_rate;
6094
6095 stats.ic_matched_pair_count = matched_pair_count;
6096 stats.ic_elimination_count = elimination_entry_count;
6097 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6098
6099 info!(
6100 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6101 matched_pair_count,
6102 stats.ic_transaction_count,
6103 seller_entries.len(),
6104 buyer_entries.len(),
6105 elimination_entry_count,
6106 match_rate * 100.0
6107 );
6108 self.check_resources_with_log("post-intercompany")?;
6109
6110 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6114 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6115 use rust_decimal::Decimal;
6116
6117 let eight_pct = Decimal::new(8, 2); group_structure
6120 .subsidiaries
6121 .iter()
6122 .filter(|sub| {
6123 sub.nci_percentage > Decimal::ZERO
6124 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6125 })
6126 .map(|sub| {
6127 let net_assets_from_jes =
6131 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6132
6133 let net_assets = if net_assets_from_jes > Decimal::ZERO {
6134 net_assets_from_jes.round_dp(2)
6135 } else {
6136 Decimal::from(1_000_000u64)
6138 };
6139
6140 let net_income = (net_assets * eight_pct).round_dp(2);
6142
6143 NciMeasurement::compute(
6144 sub.entity_code.clone(),
6145 sub.nci_percentage,
6146 net_assets,
6147 net_income,
6148 )
6149 })
6150 .collect()
6151 };
6152
6153 if !nci_measurements.is_empty() {
6154 info!(
6155 "NCI measurements: {} subsidiaries with non-controlling interests",
6156 nci_measurements.len()
6157 );
6158 }
6159
6160 Ok(IntercompanySnapshot {
6161 group_structure: Some(group_structure),
6162 matched_pairs,
6163 seller_journal_entries: seller_entries,
6164 buyer_journal_entries: buyer_entries,
6165 elimination_entries,
6166 nci_measurements,
6167 ic_document_chains: Some(ic_doc_chains),
6168 matched_pair_count,
6169 elimination_entry_count,
6170 match_rate,
6171 })
6172 }
6173
6174 fn phase_financial_reporting(
6176 &mut self,
6177 document_flows: &DocumentFlowSnapshot,
6178 journal_entries: &[JournalEntry],
6179 coa: &Arc<ChartOfAccounts>,
6180 _hr: &HrSnapshot,
6181 _audit: &AuditSnapshot,
6182 stats: &mut EnhancedGenerationStatistics,
6183 ) -> SynthResult<FinancialReportingSnapshot> {
6184 let fs_enabled = self.phase_config.generate_financial_statements
6185 || self.config.financial_reporting.enabled;
6186 let br_enabled = self.phase_config.generate_bank_reconciliation;
6187
6188 if !fs_enabled && !br_enabled {
6189 debug!("Phase 15: Skipped (financial reporting disabled)");
6190 return Ok(FinancialReportingSnapshot::default());
6191 }
6192
6193 info!("Phase 15: Generating Financial Reporting Data");
6194
6195 let seed = self.seed;
6196 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6197 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6198
6199 let mut financial_statements = Vec::new();
6200 let mut bank_reconciliations = Vec::new();
6201 let mut trial_balances = Vec::new();
6202 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6203 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6204 Vec::new();
6205 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6207 std::collections::HashMap::new();
6208 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6210 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6212
6213 if fs_enabled {
6221 let has_journal_entries = !journal_entries.is_empty();
6222
6223 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6226 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6228
6229 let elimination_entries: Vec<&JournalEntry> = journal_entries
6231 .iter()
6232 .filter(|je| je.header.is_elimination)
6233 .collect();
6234
6235 for period in 0..self.config.global.period_months {
6237 let period_start = start_date + chrono::Months::new(period);
6238 let period_end =
6239 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6240 let fiscal_year = period_end.year() as u16;
6241 let fiscal_period = period_end.month() as u8;
6242 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6243
6244 let mut entity_tb_map: std::collections::HashMap<
6247 String,
6248 std::collections::HashMap<String, rust_decimal::Decimal>,
6249 > = std::collections::HashMap::new();
6250
6251 for (company_idx, company) in self.config.companies.iter().enumerate() {
6253 let company_code = company.code.as_str();
6254 let currency = company.currency.as_str();
6255 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6258 let mut company_fs_gen =
6259 FinancialStatementGenerator::new(seed + company_seed_offset);
6260
6261 if has_journal_entries {
6262 let tb_entries = Self::build_cumulative_trial_balance(
6263 journal_entries,
6264 coa,
6265 company_code,
6266 start_date,
6267 period_end,
6268 fiscal_year,
6269 fiscal_period,
6270 );
6271
6272 let entity_cat_map =
6274 entity_tb_map.entry(company_code.to_string()).or_default();
6275 for tb_entry in &tb_entries {
6276 let net = tb_entry.debit_balance - tb_entry.credit_balance;
6277 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6278 }
6279
6280 let stmts = company_fs_gen.generate(
6281 company_code,
6282 currency,
6283 &tb_entries,
6284 period_start,
6285 period_end,
6286 fiscal_year,
6287 fiscal_period,
6288 None,
6289 "SYS-AUTOCLOSE",
6290 );
6291
6292 let mut entity_stmts = Vec::new();
6293 for stmt in stmts {
6294 if stmt.statement_type == StatementType::CashFlowStatement {
6295 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6296 let cf_items = Self::build_cash_flow_from_trial_balances(
6297 &tb_entries,
6298 None,
6299 net_income,
6300 );
6301 entity_stmts.push(FinancialStatement {
6302 cash_flow_items: cf_items,
6303 ..stmt
6304 });
6305 } else {
6306 entity_stmts.push(stmt);
6307 }
6308 }
6309
6310 financial_statements.extend(entity_stmts.clone());
6312
6313 standalone_statements
6315 .entry(company_code.to_string())
6316 .or_default()
6317 .extend(entity_stmts);
6318
6319 if company_idx == 0 {
6322 trial_balances.push(PeriodTrialBalance {
6323 fiscal_year,
6324 fiscal_period,
6325 period_start,
6326 period_end,
6327 entries: tb_entries,
6328 });
6329 }
6330 } else {
6331 let tb_entries = Self::build_trial_balance_from_entries(
6333 journal_entries,
6334 coa,
6335 company_code,
6336 fiscal_year,
6337 fiscal_period,
6338 );
6339
6340 let stmts = company_fs_gen.generate(
6341 company_code,
6342 currency,
6343 &tb_entries,
6344 period_start,
6345 period_end,
6346 fiscal_year,
6347 fiscal_period,
6348 None,
6349 "SYS-AUTOCLOSE",
6350 );
6351 financial_statements.extend(stmts.clone());
6352 standalone_statements
6353 .entry(company_code.to_string())
6354 .or_default()
6355 .extend(stmts);
6356
6357 if company_idx == 0 && !tb_entries.is_empty() {
6358 trial_balances.push(PeriodTrialBalance {
6359 fiscal_year,
6360 fiscal_period,
6361 period_start,
6362 period_end,
6363 entries: tb_entries,
6364 });
6365 }
6366 }
6367 }
6368
6369 let group_currency = self
6372 .config
6373 .companies
6374 .first()
6375 .map(|c| c.currency.as_str())
6376 .unwrap_or("USD");
6377
6378 let period_eliminations: Vec<JournalEntry> = elimination_entries
6380 .iter()
6381 .filter(|je| {
6382 je.header.fiscal_year == fiscal_year
6383 && je.header.fiscal_period == fiscal_period
6384 })
6385 .map(|je| (*je).clone())
6386 .collect();
6387
6388 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6389 &entity_tb_map,
6390 &period_eliminations,
6391 &period_label,
6392 );
6393
6394 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6397 .line_items
6398 .iter()
6399 .map(|li| {
6400 let net = li.post_elimination_total;
6401 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6402 (net, rust_decimal::Decimal::ZERO)
6403 } else {
6404 (rust_decimal::Decimal::ZERO, -net)
6405 };
6406 datasynth_generators::TrialBalanceEntry {
6407 account_code: li.account_category.clone(),
6408 account_name: li.account_category.clone(),
6409 category: li.account_category.clone(),
6410 debit_balance: debit,
6411 credit_balance: credit,
6412 }
6413 })
6414 .collect();
6415
6416 let mut cons_stmts = cons_gen.generate(
6417 "GROUP",
6418 group_currency,
6419 &cons_tb,
6420 period_start,
6421 period_end,
6422 fiscal_year,
6423 fiscal_period,
6424 None,
6425 "SYS-AUTOCLOSE",
6426 );
6427
6428 let bs_categories: &[&str] = &[
6432 "CASH",
6433 "RECEIVABLES",
6434 "INVENTORY",
6435 "FIXEDASSETS",
6436 "PAYABLES",
6437 "ACCRUEDLIABILITIES",
6438 "LONGTERMDEBT",
6439 "EQUITY",
6440 ];
6441 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6442 cons_line_items.into_iter().partition(|li| {
6443 let upper = li.label.to_uppercase();
6444 bs_categories.iter().any(|c| upper == *c)
6445 });
6446
6447 for stmt in &mut cons_stmts {
6448 stmt.is_consolidated = true;
6449 match stmt.statement_type {
6450 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6451 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6452 _ => {} }
6454 }
6455
6456 consolidated_statements.extend(cons_stmts);
6457 consolidation_schedules.push(schedule);
6458 }
6459
6460 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6466 info!(
6467 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6468 stats.financial_statement_count,
6469 consolidated_statements.len(),
6470 has_journal_entries
6471 );
6472
6473 let entity_seeds: Vec<SegmentSeed> = self
6478 .config
6479 .companies
6480 .iter()
6481 .map(|c| SegmentSeed {
6482 code: c.code.clone(),
6483 name: c.name.clone(),
6484 currency: c.currency.clone(),
6485 })
6486 .collect();
6487
6488 let mut seg_gen = SegmentGenerator::new(seed + 30);
6489
6490 for period in 0..self.config.global.period_months {
6495 let period_end =
6496 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6497 let fiscal_year = period_end.year() as u16;
6498 let fiscal_period = period_end.month() as u8;
6499 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6500
6501 use datasynth_core::models::StatementType;
6502
6503 let cons_is = consolidated_statements.iter().find(|s| {
6505 s.fiscal_year == fiscal_year
6506 && s.fiscal_period == fiscal_period
6507 && s.statement_type == StatementType::IncomeStatement
6508 });
6509 let cons_bs = consolidated_statements.iter().find(|s| {
6510 s.fiscal_year == fiscal_year
6511 && s.fiscal_period == fiscal_period
6512 && s.statement_type == StatementType::BalanceSheet
6513 });
6514
6515 let is_stmt = cons_is.or_else(|| {
6517 financial_statements.iter().find(|s| {
6518 s.fiscal_year == fiscal_year
6519 && s.fiscal_period == fiscal_period
6520 && s.statement_type == StatementType::IncomeStatement
6521 })
6522 });
6523 let bs_stmt = cons_bs.or_else(|| {
6524 financial_statements.iter().find(|s| {
6525 s.fiscal_year == fiscal_year
6526 && s.fiscal_period == fiscal_period
6527 && s.statement_type == StatementType::BalanceSheet
6528 })
6529 });
6530
6531 let consolidated_revenue = is_stmt
6532 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6533 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6535
6536 let consolidated_profit = is_stmt
6537 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6538 .map(|li| li.amount)
6539 .unwrap_or(rust_decimal::Decimal::ZERO);
6540
6541 let consolidated_assets = bs_stmt
6542 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6543 .map(|li| li.amount)
6544 .unwrap_or(rust_decimal::Decimal::ZERO);
6545
6546 if consolidated_revenue == rust_decimal::Decimal::ZERO
6548 && consolidated_assets == rust_decimal::Decimal::ZERO
6549 {
6550 continue;
6551 }
6552
6553 let group_code = self
6554 .config
6555 .companies
6556 .first()
6557 .map(|c| c.code.as_str())
6558 .unwrap_or("GROUP");
6559
6560 let total_depr: rust_decimal::Decimal = journal_entries
6563 .iter()
6564 .filter(|je| je.header.document_type == "CL")
6565 .flat_map(|je| je.lines.iter())
6566 .filter(|l| l.gl_account.starts_with("6000"))
6567 .map(|l| l.debit_amount)
6568 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6569 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6570 Some(total_depr)
6571 } else {
6572 None
6573 };
6574
6575 let (segs, recon) = seg_gen.generate(
6576 group_code,
6577 &period_label,
6578 consolidated_revenue,
6579 consolidated_profit,
6580 consolidated_assets,
6581 &entity_seeds,
6582 depr_param,
6583 );
6584 segment_reports.extend(segs);
6585 segment_reconciliations.push(recon);
6586 }
6587
6588 info!(
6589 "Segment reports generated: {} segments, {} reconciliations",
6590 segment_reports.len(),
6591 segment_reconciliations.len()
6592 );
6593 }
6594
6595 if br_enabled && !document_flows.payments.is_empty() {
6597 let employee_ids: Vec<String> = self
6598 .master_data
6599 .employees
6600 .iter()
6601 .map(|e| e.employee_id.clone())
6602 .collect();
6603 let mut br_gen =
6604 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6605
6606 for company in &self.config.companies {
6608 let company_payments: Vec<PaymentReference> = document_flows
6609 .payments
6610 .iter()
6611 .filter(|p| p.header.company_code == company.code)
6612 .map(|p| PaymentReference {
6613 id: p.header.document_id.clone(),
6614 amount: if p.is_vendor { p.amount } else { -p.amount },
6615 date: p.header.document_date,
6616 reference: p
6617 .check_number
6618 .clone()
6619 .or_else(|| p.wire_reference.clone())
6620 .unwrap_or_else(|| p.header.document_id.clone()),
6621 })
6622 .collect();
6623
6624 if company_payments.is_empty() {
6625 continue;
6626 }
6627
6628 let bank_account_id = format!("{}-MAIN", company.code);
6629
6630 for period in 0..self.config.global.period_months {
6632 let period_start = start_date + chrono::Months::new(period);
6633 let period_end =
6634 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6635
6636 let period_payments: Vec<PaymentReference> = company_payments
6637 .iter()
6638 .filter(|p| p.date >= period_start && p.date <= period_end)
6639 .cloned()
6640 .collect();
6641
6642 let recon = br_gen.generate(
6643 &company.code,
6644 &bank_account_id,
6645 period_start,
6646 period_end,
6647 &company.currency,
6648 &period_payments,
6649 );
6650 bank_reconciliations.push(recon);
6651 }
6652 }
6653 info!(
6654 "Bank reconciliations generated: {} reconciliations",
6655 bank_reconciliations.len()
6656 );
6657 }
6658
6659 stats.bank_reconciliation_count = bank_reconciliations.len();
6660 self.check_resources_with_log("post-financial-reporting")?;
6661
6662 if !trial_balances.is_empty() {
6663 info!(
6664 "Period-close trial balances captured: {} periods",
6665 trial_balances.len()
6666 );
6667 }
6668
6669 let notes_to_financial_statements = Vec::new();
6673
6674 Ok(FinancialReportingSnapshot {
6675 financial_statements,
6676 standalone_statements,
6677 consolidated_statements,
6678 consolidation_schedules,
6679 bank_reconciliations,
6680 trial_balances,
6681 segment_reports,
6682 segment_reconciliations,
6683 notes_to_financial_statements,
6684 })
6685 }
6686
6687 fn generate_notes_to_financial_statements(
6694 &self,
6695 financial_reporting: &mut FinancialReportingSnapshot,
6696 accounting_standards: &AccountingStandardsSnapshot,
6697 tax: &TaxSnapshot,
6698 hr: &HrSnapshot,
6699 audit: &AuditSnapshot,
6700 treasury: &TreasurySnapshot,
6701 ) {
6702 use datasynth_config::schema::AccountingFrameworkConfig;
6703 use datasynth_core::models::StatementType;
6704 use datasynth_generators::period_close::notes_generator::{
6705 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6706 };
6707
6708 let seed = self.seed;
6709 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6710 {
6711 Ok(d) => d,
6712 Err(_) => return,
6713 };
6714
6715 let mut notes_gen = NotesGenerator::new(seed + 4235);
6716
6717 for company in &self.config.companies {
6718 let last_period_end = start_date
6719 + chrono::Months::new(self.config.global.period_months)
6720 - chrono::Days::new(1);
6721 let fiscal_year = last_period_end.year() as u16;
6722
6723 let entity_is = financial_reporting
6725 .standalone_statements
6726 .get(&company.code)
6727 .and_then(|stmts| {
6728 stmts.iter().find(|s| {
6729 s.fiscal_year == fiscal_year
6730 && s.statement_type == StatementType::IncomeStatement
6731 })
6732 });
6733 let entity_bs = financial_reporting
6734 .standalone_statements
6735 .get(&company.code)
6736 .and_then(|stmts| {
6737 stmts.iter().find(|s| {
6738 s.fiscal_year == fiscal_year
6739 && s.statement_type == StatementType::BalanceSheet
6740 })
6741 });
6742
6743 let revenue_amount = entity_is
6745 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6746 .map(|li| li.amount);
6747 let ppe_gross = entity_bs
6748 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6749 .map(|li| li.amount);
6750
6751 let framework = match self
6752 .config
6753 .accounting_standards
6754 .framework
6755 .unwrap_or_default()
6756 {
6757 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6758 "IFRS".to_string()
6759 }
6760 _ => "US GAAP".to_string(),
6761 };
6762
6763 let (entity_dta, entity_dtl) = {
6766 let mut dta = rust_decimal::Decimal::ZERO;
6767 let mut dtl = rust_decimal::Decimal::ZERO;
6768 for rf in &tax.deferred_tax.rollforwards {
6769 if rf.entity_code == company.code {
6770 dta += rf.closing_dta;
6771 dtl += rf.closing_dtl;
6772 }
6773 }
6774 (
6775 if dta > rust_decimal::Decimal::ZERO {
6776 Some(dta)
6777 } else {
6778 None
6779 },
6780 if dtl > rust_decimal::Decimal::ZERO {
6781 Some(dtl)
6782 } else {
6783 None
6784 },
6785 )
6786 };
6787
6788 let entity_provisions: Vec<_> = accounting_standards
6791 .provisions
6792 .iter()
6793 .filter(|p| p.entity_code == company.code)
6794 .collect();
6795 let provision_count = entity_provisions.len();
6796 let total_provisions = if provision_count > 0 {
6797 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6798 } else {
6799 None
6800 };
6801
6802 let entity_pension_plan_count = hr
6804 .pension_plans
6805 .iter()
6806 .filter(|p| p.entity_code == company.code)
6807 .count();
6808 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6809 let sum: rust_decimal::Decimal = hr
6810 .pension_disclosures
6811 .iter()
6812 .filter(|d| {
6813 hr.pension_plans
6814 .iter()
6815 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6816 })
6817 .map(|d| d.net_pension_liability)
6818 .sum();
6819 let plan_assets_sum: rust_decimal::Decimal = hr
6820 .pension_plan_assets
6821 .iter()
6822 .filter(|a| {
6823 hr.pension_plans
6824 .iter()
6825 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6826 })
6827 .map(|a| a.fair_value_closing)
6828 .sum();
6829 if entity_pension_plan_count > 0 {
6830 Some(sum + plan_assets_sum)
6831 } else {
6832 None
6833 }
6834 };
6835 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6836 let sum: rust_decimal::Decimal = hr
6837 .pension_plan_assets
6838 .iter()
6839 .filter(|a| {
6840 hr.pension_plans
6841 .iter()
6842 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6843 })
6844 .map(|a| a.fair_value_closing)
6845 .sum();
6846 if entity_pension_plan_count > 0 {
6847 Some(sum)
6848 } else {
6849 None
6850 }
6851 };
6852
6853 let rp_count = audit.related_party_transactions.len();
6856 let se_count = audit.subsequent_events.len();
6857 let adjusting_count = audit
6858 .subsequent_events
6859 .iter()
6860 .filter(|e| {
6861 matches!(
6862 e.classification,
6863 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6864 )
6865 })
6866 .count();
6867
6868 let ctx = NotesGeneratorContext {
6869 entity_code: company.code.clone(),
6870 framework,
6871 period: format!("FY{}", fiscal_year),
6872 period_end: last_period_end,
6873 currency: company.currency.clone(),
6874 revenue_amount,
6875 total_ppe_gross: ppe_gross,
6876 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6877 deferred_tax_asset: entity_dta,
6879 deferred_tax_liability: entity_dtl,
6880 provision_count,
6882 total_provisions,
6883 pension_plan_count: entity_pension_plan_count,
6885 total_dbo: entity_total_dbo,
6886 total_plan_assets: entity_total_plan_assets,
6887 related_party_transaction_count: rp_count,
6889 subsequent_event_count: se_count,
6890 adjusting_event_count: adjusting_count,
6891 ..NotesGeneratorContext::default()
6892 };
6893
6894 let entity_notes = notes_gen.generate(&ctx);
6895 let standard_note_count = entity_notes.len() as u32;
6896 info!(
6897 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6898 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6899 );
6900 financial_reporting
6901 .notes_to_financial_statements
6902 .extend(entity_notes);
6903
6904 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6906 .debt_instruments
6907 .iter()
6908 .filter(|d| d.entity_id == company.code)
6909 .map(|d| {
6910 (
6911 format!("{:?}", d.instrument_type),
6912 d.principal,
6913 d.maturity_date.to_string(),
6914 )
6915 })
6916 .collect();
6917
6918 let hedge_count = treasury.hedge_relationships.len();
6919 let effective_hedges = treasury
6920 .hedge_relationships
6921 .iter()
6922 .filter(|h| h.is_effective)
6923 .count();
6924 let total_notional: rust_decimal::Decimal = treasury
6925 .hedging_instruments
6926 .iter()
6927 .map(|h| h.notional_amount)
6928 .sum();
6929 let total_fair_value: rust_decimal::Decimal = treasury
6930 .hedging_instruments
6931 .iter()
6932 .map(|h| h.fair_value)
6933 .sum();
6934
6935 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6937 .provisions
6938 .iter()
6939 .filter(|p| p.entity_code == company.code)
6940 .map(|p| p.id.as_str())
6941 .collect();
6942 let provision_movements: Vec<(
6943 String,
6944 rust_decimal::Decimal,
6945 rust_decimal::Decimal,
6946 rust_decimal::Decimal,
6947 )> = accounting_standards
6948 .provision_movements
6949 .iter()
6950 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6951 .map(|m| {
6952 let prov_type = accounting_standards
6953 .provisions
6954 .iter()
6955 .find(|p| p.id == m.provision_id)
6956 .map(|p| format!("{:?}", p.provision_type))
6957 .unwrap_or_else(|| "Unknown".to_string());
6958 (prov_type, m.opening, m.additions, m.closing)
6959 })
6960 .collect();
6961
6962 let enhanced_ctx = EnhancedNotesContext {
6963 entity_code: company.code.clone(),
6964 period: format!("FY{}", fiscal_year),
6965 currency: company.currency.clone(),
6966 finished_goods_value: rust_decimal::Decimal::ZERO,
6968 wip_value: rust_decimal::Decimal::ZERO,
6969 raw_materials_value: rust_decimal::Decimal::ZERO,
6970 debt_instruments,
6971 hedge_count,
6972 effective_hedges,
6973 total_notional,
6974 total_fair_value,
6975 provision_movements,
6976 };
6977
6978 let enhanced_notes =
6979 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6980 if !enhanced_notes.is_empty() {
6981 info!(
6982 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6983 company.code,
6984 enhanced_notes.len(),
6985 enhanced_ctx.debt_instruments.len(),
6986 hedge_count,
6987 enhanced_ctx.provision_movements.len(),
6988 );
6989 financial_reporting
6990 .notes_to_financial_statements
6991 .extend(enhanced_notes);
6992 }
6993 }
6994 }
6995
6996 fn build_trial_balance_from_entries(
7002 journal_entries: &[JournalEntry],
7003 coa: &ChartOfAccounts,
7004 company_code: &str,
7005 fiscal_year: u16,
7006 fiscal_period: u8,
7007 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7008 use rust_decimal::Decimal;
7009
7010 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7012 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7013
7014 for je in journal_entries {
7015 if je.header.company_code != company_code
7017 || je.header.fiscal_year != fiscal_year
7018 || je.header.fiscal_period != fiscal_period
7019 {
7020 continue;
7021 }
7022
7023 for line in &je.lines {
7024 let acct = &line.gl_account;
7025 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7026 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7027 }
7028 }
7029
7030 let mut all_accounts: Vec<&String> = account_debits
7032 .keys()
7033 .chain(account_credits.keys())
7034 .collect::<std::collections::HashSet<_>>()
7035 .into_iter()
7036 .collect();
7037 all_accounts.sort();
7038
7039 let mut entries = Vec::new();
7040
7041 for acct_number in all_accounts {
7042 let debit = account_debits
7043 .get(acct_number)
7044 .copied()
7045 .unwrap_or(Decimal::ZERO);
7046 let credit = account_credits
7047 .get(acct_number)
7048 .copied()
7049 .unwrap_or(Decimal::ZERO);
7050
7051 if debit.is_zero() && credit.is_zero() {
7052 continue;
7053 }
7054
7055 let account_name = coa
7057 .get_account(acct_number)
7058 .map(|gl| gl.short_description.clone())
7059 .unwrap_or_else(|| format!("Account {acct_number}"));
7060
7061 let category = Self::category_from_account_code(acct_number);
7066
7067 entries.push(datasynth_generators::TrialBalanceEntry {
7068 account_code: acct_number.clone(),
7069 account_name,
7070 category,
7071 debit_balance: debit,
7072 credit_balance: credit,
7073 });
7074 }
7075
7076 entries
7077 }
7078
7079 fn build_cumulative_trial_balance(
7086 journal_entries: &[JournalEntry],
7087 coa: &ChartOfAccounts,
7088 company_code: &str,
7089 start_date: NaiveDate,
7090 period_end: NaiveDate,
7091 fiscal_year: u16,
7092 fiscal_period: u8,
7093 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7094 use rust_decimal::Decimal;
7095
7096 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7098 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7099
7100 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7102 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7103
7104 for je in journal_entries {
7105 if je.header.company_code != company_code {
7106 continue;
7107 }
7108
7109 for line in &je.lines {
7110 let acct = &line.gl_account;
7111 let category = Self::category_from_account_code(acct);
7112 let is_bs_account = matches!(
7113 category.as_str(),
7114 "Cash"
7115 | "Receivables"
7116 | "Inventory"
7117 | "FixedAssets"
7118 | "Payables"
7119 | "AccruedLiabilities"
7120 | "LongTermDebt"
7121 | "Equity"
7122 );
7123
7124 if is_bs_account {
7125 if je.header.document_date <= period_end
7127 && je.header.document_date >= start_date
7128 {
7129 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7130 line.debit_amount;
7131 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7132 line.credit_amount;
7133 }
7134 } else {
7135 if je.header.fiscal_year == fiscal_year
7137 && je.header.fiscal_period == fiscal_period
7138 {
7139 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7140 line.debit_amount;
7141 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7142 line.credit_amount;
7143 }
7144 }
7145 }
7146 }
7147
7148 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7150 all_accounts.extend(bs_debits.keys().cloned());
7151 all_accounts.extend(bs_credits.keys().cloned());
7152 all_accounts.extend(is_debits.keys().cloned());
7153 all_accounts.extend(is_credits.keys().cloned());
7154
7155 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7156 sorted_accounts.sort();
7157
7158 let mut entries = Vec::new();
7159
7160 for acct_number in &sorted_accounts {
7161 let category = Self::category_from_account_code(acct_number);
7162 let is_bs_account = matches!(
7163 category.as_str(),
7164 "Cash"
7165 | "Receivables"
7166 | "Inventory"
7167 | "FixedAssets"
7168 | "Payables"
7169 | "AccruedLiabilities"
7170 | "LongTermDebt"
7171 | "Equity"
7172 );
7173
7174 let (debit, credit) = if is_bs_account {
7175 (
7176 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7177 bs_credits
7178 .get(acct_number)
7179 .copied()
7180 .unwrap_or(Decimal::ZERO),
7181 )
7182 } else {
7183 (
7184 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7185 is_credits
7186 .get(acct_number)
7187 .copied()
7188 .unwrap_or(Decimal::ZERO),
7189 )
7190 };
7191
7192 if debit.is_zero() && credit.is_zero() {
7193 continue;
7194 }
7195
7196 let account_name = coa
7197 .get_account(acct_number)
7198 .map(|gl| gl.short_description.clone())
7199 .unwrap_or_else(|| format!("Account {acct_number}"));
7200
7201 entries.push(datasynth_generators::TrialBalanceEntry {
7202 account_code: acct_number.clone(),
7203 account_name,
7204 category,
7205 debit_balance: debit,
7206 credit_balance: credit,
7207 });
7208 }
7209
7210 entries
7211 }
7212
7213 fn build_cash_flow_from_trial_balances(
7218 current_tb: &[datasynth_generators::TrialBalanceEntry],
7219 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7220 net_income: rust_decimal::Decimal,
7221 ) -> Vec<CashFlowItem> {
7222 use rust_decimal::Decimal;
7223
7224 let aggregate =
7226 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7227 let mut map: HashMap<String, Decimal> = HashMap::new();
7228 for entry in tb {
7229 let net = entry.debit_balance - entry.credit_balance;
7230 *map.entry(entry.category.clone()).or_default() += net;
7231 }
7232 map
7233 };
7234
7235 let current = aggregate(current_tb);
7236 let prior = prior_tb.map(aggregate);
7237
7238 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7240 *map.get(key).unwrap_or(&Decimal::ZERO)
7241 };
7242
7243 let change = |key: &str| -> Decimal {
7245 let curr = get(¤t, key);
7246 match &prior {
7247 Some(p) => curr - get(p, key),
7248 None => curr,
7249 }
7250 };
7251
7252 let fixed_asset_change = change("FixedAssets");
7255 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7256 -fixed_asset_change
7257 } else {
7258 Decimal::ZERO
7259 };
7260
7261 let ar_change = change("Receivables");
7263 let inventory_change = change("Inventory");
7264 let ap_change = change("Payables");
7266 let accrued_change = change("AccruedLiabilities");
7267
7268 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7269 + (-ap_change)
7270 + (-accrued_change);
7271
7272 let capex = if fixed_asset_change > Decimal::ZERO {
7274 -fixed_asset_change
7275 } else {
7276 Decimal::ZERO
7277 };
7278 let investing_cf = capex;
7279
7280 let debt_change = -change("LongTermDebt");
7282 let equity_change = -change("Equity");
7283 let financing_cf = debt_change + equity_change;
7284
7285 let net_change = operating_cf + investing_cf + financing_cf;
7286
7287 vec![
7288 CashFlowItem {
7289 item_code: "CF-NI".to_string(),
7290 label: "Net Income".to_string(),
7291 category: CashFlowCategory::Operating,
7292 amount: net_income,
7293 amount_prior: None,
7294 sort_order: 1,
7295 is_total: false,
7296 },
7297 CashFlowItem {
7298 item_code: "CF-DEP".to_string(),
7299 label: "Depreciation & Amortization".to_string(),
7300 category: CashFlowCategory::Operating,
7301 amount: depreciation_addback,
7302 amount_prior: None,
7303 sort_order: 2,
7304 is_total: false,
7305 },
7306 CashFlowItem {
7307 item_code: "CF-AR".to_string(),
7308 label: "Change in Accounts Receivable".to_string(),
7309 category: CashFlowCategory::Operating,
7310 amount: -ar_change,
7311 amount_prior: None,
7312 sort_order: 3,
7313 is_total: false,
7314 },
7315 CashFlowItem {
7316 item_code: "CF-AP".to_string(),
7317 label: "Change in Accounts Payable".to_string(),
7318 category: CashFlowCategory::Operating,
7319 amount: -ap_change,
7320 amount_prior: None,
7321 sort_order: 4,
7322 is_total: false,
7323 },
7324 CashFlowItem {
7325 item_code: "CF-INV".to_string(),
7326 label: "Change in Inventory".to_string(),
7327 category: CashFlowCategory::Operating,
7328 amount: -inventory_change,
7329 amount_prior: None,
7330 sort_order: 5,
7331 is_total: false,
7332 },
7333 CashFlowItem {
7334 item_code: "CF-OP".to_string(),
7335 label: "Net Cash from Operating Activities".to_string(),
7336 category: CashFlowCategory::Operating,
7337 amount: operating_cf,
7338 amount_prior: None,
7339 sort_order: 6,
7340 is_total: true,
7341 },
7342 CashFlowItem {
7343 item_code: "CF-CAPEX".to_string(),
7344 label: "Capital Expenditures".to_string(),
7345 category: CashFlowCategory::Investing,
7346 amount: capex,
7347 amount_prior: None,
7348 sort_order: 7,
7349 is_total: false,
7350 },
7351 CashFlowItem {
7352 item_code: "CF-INV-T".to_string(),
7353 label: "Net Cash from Investing Activities".to_string(),
7354 category: CashFlowCategory::Investing,
7355 amount: investing_cf,
7356 amount_prior: None,
7357 sort_order: 8,
7358 is_total: true,
7359 },
7360 CashFlowItem {
7361 item_code: "CF-DEBT".to_string(),
7362 label: "Net Borrowings / (Repayments)".to_string(),
7363 category: CashFlowCategory::Financing,
7364 amount: debt_change,
7365 amount_prior: None,
7366 sort_order: 9,
7367 is_total: false,
7368 },
7369 CashFlowItem {
7370 item_code: "CF-EQ".to_string(),
7371 label: "Equity Changes".to_string(),
7372 category: CashFlowCategory::Financing,
7373 amount: equity_change,
7374 amount_prior: None,
7375 sort_order: 10,
7376 is_total: false,
7377 },
7378 CashFlowItem {
7379 item_code: "CF-FIN-T".to_string(),
7380 label: "Net Cash from Financing Activities".to_string(),
7381 category: CashFlowCategory::Financing,
7382 amount: financing_cf,
7383 amount_prior: None,
7384 sort_order: 11,
7385 is_total: true,
7386 },
7387 CashFlowItem {
7388 item_code: "CF-NET".to_string(),
7389 label: "Net Change in Cash".to_string(),
7390 category: CashFlowCategory::Operating,
7391 amount: net_change,
7392 amount_prior: None,
7393 sort_order: 12,
7394 is_total: true,
7395 },
7396 ]
7397 }
7398
7399 fn calculate_net_income_from_tb(
7403 tb: &[datasynth_generators::TrialBalanceEntry],
7404 ) -> rust_decimal::Decimal {
7405 use rust_decimal::Decimal;
7406
7407 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7408 for entry in tb {
7409 let net = entry.debit_balance - entry.credit_balance;
7410 *aggregated.entry(entry.category.clone()).or_default() += net;
7411 }
7412
7413 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7414 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7415 let opex = *aggregated
7416 .get("OperatingExpenses")
7417 .unwrap_or(&Decimal::ZERO);
7418 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7419 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7420
7421 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7424 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7426 operating_income - tax
7427 }
7428
7429 fn category_from_account_code(code: &str) -> String {
7436 let prefix: String = code.chars().take(2).collect();
7437 match prefix.as_str() {
7438 "10" => "Cash",
7439 "11" => "Receivables",
7440 "12" | "13" | "14" => "Inventory",
7441 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7442 "20" => "Payables",
7443 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7444 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7445 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7446 "40" | "41" | "42" | "43" | "44" => "Revenue",
7447 "50" | "51" | "52" => "CostOfSales",
7448 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7449 "OperatingExpenses"
7450 }
7451 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7452 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7453 _ => "OperatingExpenses",
7454 }
7455 .to_string()
7456 }
7457
7458 fn phase_hr_data(
7460 &mut self,
7461 stats: &mut EnhancedGenerationStatistics,
7462 ) -> SynthResult<HrSnapshot> {
7463 if !self.phase_config.generate_hr {
7464 debug!("Phase 16: Skipped (HR generation disabled)");
7465 return Ok(HrSnapshot::default());
7466 }
7467
7468 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7469
7470 let seed = self.seed;
7471 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7472 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7473 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7474 let company_code = self
7475 .config
7476 .companies
7477 .first()
7478 .map(|c| c.code.as_str())
7479 .unwrap_or("1000");
7480 let currency = self
7481 .config
7482 .companies
7483 .first()
7484 .map(|c| c.currency.as_str())
7485 .unwrap_or("USD");
7486
7487 let employee_ids: Vec<String> = self
7488 .master_data
7489 .employees
7490 .iter()
7491 .map(|e| e.employee_id.clone())
7492 .collect();
7493
7494 if employee_ids.is_empty() {
7495 debug!("Phase 16: Skipped (no employees available)");
7496 return Ok(HrSnapshot::default());
7497 }
7498
7499 let cost_center_ids: Vec<String> = self
7502 .master_data
7503 .employees
7504 .iter()
7505 .filter_map(|e| e.cost_center.clone())
7506 .collect::<std::collections::HashSet<_>>()
7507 .into_iter()
7508 .collect();
7509
7510 let mut snapshot = HrSnapshot::default();
7511
7512 if self.config.hr.payroll.enabled {
7514 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7515 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7516
7517 let payroll_pack = self.primary_pack();
7519
7520 payroll_gen.set_country_pack(payroll_pack.clone());
7523
7524 let employees_with_salary: Vec<(
7525 String,
7526 rust_decimal::Decimal,
7527 Option<String>,
7528 Option<String>,
7529 )> = self
7530 .master_data
7531 .employees
7532 .iter()
7533 .map(|e| {
7534 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7537 e.base_salary
7538 } else {
7539 rust_decimal::Decimal::from(60_000)
7540 };
7541 (
7542 e.employee_id.clone(),
7543 annual, e.cost_center.clone(),
7545 e.department_id.clone(),
7546 )
7547 })
7548 .collect();
7549
7550 let change_history = &self.master_data.employee_change_history;
7553 let has_changes = !change_history.is_empty();
7554 if has_changes {
7555 debug!(
7556 "Payroll will incorporate {} employee change events",
7557 change_history.len()
7558 );
7559 }
7560
7561 for month in 0..self.config.global.period_months {
7562 let period_start = start_date + chrono::Months::new(month);
7563 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7564 let (run, items) = if has_changes {
7565 payroll_gen.generate_with_changes(
7566 company_code,
7567 &employees_with_salary,
7568 period_start,
7569 period_end,
7570 currency,
7571 change_history,
7572 )
7573 } else {
7574 payroll_gen.generate(
7575 company_code,
7576 &employees_with_salary,
7577 period_start,
7578 period_end,
7579 currency,
7580 )
7581 };
7582 snapshot.payroll_runs.push(run);
7583 snapshot.payroll_run_count += 1;
7584 snapshot.payroll_line_item_count += items.len();
7585 snapshot.payroll_line_items.extend(items);
7586 }
7587 }
7588
7589 if self.config.hr.time_attendance.enabled {
7591 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7592 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7593 if let Some(ctx) = &self.temporal_context {
7597 time_gen.set_temporal_context(Arc::clone(ctx));
7598 }
7599 let entries = time_gen.generate(
7600 &employee_ids,
7601 start_date,
7602 end_date,
7603 &self.config.hr.time_attendance,
7604 );
7605 snapshot.time_entry_count = entries.len();
7606 snapshot.time_entries = entries;
7607 }
7608
7609 if self.config.hr.expenses.enabled {
7611 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7612 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7613 expense_gen.set_country_pack(self.primary_pack().clone());
7614 if let Some(ctx) = &self.temporal_context {
7617 expense_gen.set_temporal_context(Arc::clone(ctx));
7618 }
7619 let company_currency = self
7620 .config
7621 .companies
7622 .first()
7623 .map(|c| c.currency.as_str())
7624 .unwrap_or("USD");
7625 let reports = expense_gen.generate_with_currency(
7626 &employee_ids,
7627 start_date,
7628 end_date,
7629 &self.config.hr.expenses,
7630 company_currency,
7631 );
7632 snapshot.expense_report_count = reports.len();
7633 snapshot.expense_reports = reports;
7634 }
7635
7636 if self.config.hr.payroll.enabled {
7638 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7639 let employee_pairs: Vec<(String, String)> = self
7640 .master_data
7641 .employees
7642 .iter()
7643 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7644 .collect();
7645 let enrollments =
7646 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7647 snapshot.benefit_enrollment_count = enrollments.len();
7648 snapshot.benefit_enrollments = enrollments;
7649 }
7650
7651 if self.phase_config.generate_hr {
7653 let entity_name = self
7654 .config
7655 .companies
7656 .first()
7657 .map(|c| c.name.as_str())
7658 .unwrap_or("Entity");
7659 let period_months = self.config.global.period_months;
7660 let period_label = {
7661 let y = start_date.year();
7662 let m = start_date.month();
7663 if period_months >= 12 {
7664 format!("FY{y}")
7665 } else {
7666 format!("{y}-{m:02}")
7667 }
7668 };
7669 let reporting_date =
7670 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7671
7672 let avg_salary: Option<rust_decimal::Decimal> = {
7677 let employee_count = employee_ids.len();
7678 if self.config.hr.payroll.enabled
7679 && employee_count > 0
7680 && !snapshot.payroll_runs.is_empty()
7681 {
7682 let total_gross: rust_decimal::Decimal = snapshot
7684 .payroll_runs
7685 .iter()
7686 .filter(|r| r.company_code == company_code)
7687 .map(|r| r.total_gross)
7688 .sum();
7689 if total_gross > rust_decimal::Decimal::ZERO {
7690 let annual_total = if period_months > 0 && period_months < 12 {
7692 total_gross * rust_decimal::Decimal::from(12u32)
7693 / rust_decimal::Decimal::from(period_months)
7694 } else {
7695 total_gross
7696 };
7697 Some(
7698 (annual_total / rust_decimal::Decimal::from(employee_count))
7699 .round_dp(2),
7700 )
7701 } else {
7702 None
7703 }
7704 } else {
7705 None
7706 }
7707 };
7708
7709 let mut pension_gen =
7710 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7711 let pension_snap = pension_gen.generate(
7712 company_code,
7713 entity_name,
7714 &period_label,
7715 reporting_date,
7716 employee_ids.len(),
7717 currency,
7718 avg_salary,
7719 period_months,
7720 );
7721 snapshot.pension_plan_count = pension_snap.plans.len();
7722 snapshot.pension_plans = pension_snap.plans;
7723 snapshot.pension_obligations = pension_snap.obligations;
7724 snapshot.pension_plan_assets = pension_snap.plan_assets;
7725 snapshot.pension_disclosures = pension_snap.disclosures;
7726 snapshot.pension_journal_entries = pension_snap.journal_entries;
7731 }
7732
7733 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7735 let period_months = self.config.global.period_months;
7736 let period_label = {
7737 let y = start_date.year();
7738 let m = start_date.month();
7739 if period_months >= 12 {
7740 format!("FY{y}")
7741 } else {
7742 format!("{y}-{m:02}")
7743 }
7744 };
7745 let reporting_date =
7746 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7747
7748 let mut stock_comp_gen =
7749 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7750 let stock_snap = stock_comp_gen.generate(
7751 company_code,
7752 &employee_ids,
7753 start_date,
7754 &period_label,
7755 reporting_date,
7756 currency,
7757 );
7758 snapshot.stock_grant_count = stock_snap.grants.len();
7759 snapshot.stock_grants = stock_snap.grants;
7760 snapshot.stock_comp_expenses = stock_snap.expenses;
7761 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7762 }
7763
7764 stats.payroll_run_count = snapshot.payroll_run_count;
7765 stats.time_entry_count = snapshot.time_entry_count;
7766 stats.expense_report_count = snapshot.expense_report_count;
7767 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7768 stats.pension_plan_count = snapshot.pension_plan_count;
7769 stats.stock_grant_count = snapshot.stock_grant_count;
7770
7771 info!(
7772 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7773 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7774 snapshot.time_entry_count, snapshot.expense_report_count,
7775 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7776 snapshot.stock_grant_count
7777 );
7778 self.check_resources_with_log("post-hr")?;
7779
7780 Ok(snapshot)
7781 }
7782
7783 fn phase_accounting_standards(
7785 &mut self,
7786 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7787 journal_entries: &[JournalEntry],
7788 stats: &mut EnhancedGenerationStatistics,
7789 ) -> SynthResult<AccountingStandardsSnapshot> {
7790 if !self.phase_config.generate_accounting_standards {
7791 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7792 return Ok(AccountingStandardsSnapshot::default());
7793 }
7794 info!("Phase 17: Generating Accounting Standards Data");
7795
7796 let seed = self.seed;
7797 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7798 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7799 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7800 let company_code = self
7801 .config
7802 .companies
7803 .first()
7804 .map(|c| c.code.as_str())
7805 .unwrap_or("1000");
7806 let currency = self
7807 .config
7808 .companies
7809 .first()
7810 .map(|c| c.currency.as_str())
7811 .unwrap_or("USD");
7812
7813 let framework = match self.config.accounting_standards.framework {
7818 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7819 datasynth_standards::framework::AccountingFramework::UsGaap
7820 }
7821 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7822 datasynth_standards::framework::AccountingFramework::Ifrs
7823 }
7824 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7825 datasynth_standards::framework::AccountingFramework::DualReporting
7826 }
7827 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7828 datasynth_standards::framework::AccountingFramework::FrenchGaap
7829 }
7830 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7831 datasynth_standards::framework::AccountingFramework::GermanGaap
7832 }
7833 None => {
7834 let pack = self.primary_pack();
7836 let pack_fw = pack.accounting.framework.as_str();
7837 match pack_fw {
7838 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7839 "dual_reporting" => {
7840 datasynth_standards::framework::AccountingFramework::DualReporting
7841 }
7842 "french_gaap" => {
7843 datasynth_standards::framework::AccountingFramework::FrenchGaap
7844 }
7845 "german_gaap" | "hgb" => {
7846 datasynth_standards::framework::AccountingFramework::GermanGaap
7847 }
7848 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7850 }
7851 }
7852 };
7853
7854 let mut snapshot = AccountingStandardsSnapshot::default();
7855
7856 if self.config.accounting_standards.revenue_recognition.enabled {
7858 let customer_ids: Vec<String> = self
7859 .master_data
7860 .customers
7861 .iter()
7862 .map(|c| c.customer_id.clone())
7863 .collect();
7864
7865 if !customer_ids.is_empty() {
7866 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7867 let contracts = rev_gen.generate(
7868 company_code,
7869 &customer_ids,
7870 start_date,
7871 end_date,
7872 currency,
7873 &self.config.accounting_standards.revenue_recognition,
7874 framework,
7875 );
7876 snapshot.revenue_contract_count = contracts.len();
7877 snapshot.contracts = contracts;
7878 }
7879 }
7880
7881 if self.config.accounting_standards.impairment.enabled {
7883 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7884 .master_data
7885 .assets
7886 .iter()
7887 .map(|a| {
7888 (
7889 a.asset_id.clone(),
7890 a.description.clone(),
7891 a.acquisition_cost,
7892 )
7893 })
7894 .collect();
7895
7896 if !asset_data.is_empty() {
7897 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7898 let tests = imp_gen.generate(
7899 company_code,
7900 &asset_data,
7901 end_date,
7902 &self.config.accounting_standards.impairment,
7903 framework,
7904 );
7905 snapshot.impairment_test_count = tests.len();
7906 snapshot.impairment_tests = tests;
7907 }
7908 }
7909
7910 if self
7912 .config
7913 .accounting_standards
7914 .business_combinations
7915 .enabled
7916 {
7917 let bc_config = &self.config.accounting_standards.business_combinations;
7918 let framework_str = match framework {
7919 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7920 _ => "US_GAAP",
7921 };
7922 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7923 let bc_snap = bc_gen.generate(
7924 company_code,
7925 currency,
7926 start_date,
7927 end_date,
7928 bc_config.acquisition_count,
7929 framework_str,
7930 );
7931 snapshot.business_combination_count = bc_snap.combinations.len();
7932 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7933 snapshot.business_combinations = bc_snap.combinations;
7934 }
7935
7936 if self
7938 .config
7939 .accounting_standards
7940 .expected_credit_loss
7941 .enabled
7942 {
7943 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7944 let framework_str = match framework {
7945 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7946 _ => "ASC_326",
7947 };
7948
7949 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7952
7953 let mut ecl_gen = EclGenerator::new(seed + 43);
7954
7955 let bucket_exposures: Vec<(
7957 datasynth_core::models::subledger::ar::AgingBucket,
7958 rust_decimal::Decimal,
7959 )> = if ar_aging_reports.is_empty() {
7960 use datasynth_core::models::subledger::ar::AgingBucket;
7962 vec![
7963 (
7964 AgingBucket::Current,
7965 rust_decimal::Decimal::from(500_000_u32),
7966 ),
7967 (
7968 AgingBucket::Days1To30,
7969 rust_decimal::Decimal::from(120_000_u32),
7970 ),
7971 (
7972 AgingBucket::Days31To60,
7973 rust_decimal::Decimal::from(45_000_u32),
7974 ),
7975 (
7976 AgingBucket::Days61To90,
7977 rust_decimal::Decimal::from(15_000_u32),
7978 ),
7979 (
7980 AgingBucket::Over90Days,
7981 rust_decimal::Decimal::from(8_000_u32),
7982 ),
7983 ]
7984 } else {
7985 use datasynth_core::models::subledger::ar::AgingBucket;
7986 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7988 std::collections::HashMap::new();
7989 for report in ar_aging_reports {
7990 for (bucket, amount) in &report.bucket_totals {
7991 *totals.entry(*bucket).or_default() += amount;
7992 }
7993 }
7994 AgingBucket::all()
7995 .into_iter()
7996 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7997 .collect()
7998 };
7999
8000 let ecl_snap = ecl_gen.generate(
8001 company_code,
8002 end_date,
8003 &bucket_exposures,
8004 ecl_config,
8005 &period_label,
8006 framework_str,
8007 );
8008
8009 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8010 snapshot.ecl_models = ecl_snap.ecl_models;
8011 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8012 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8013 }
8014
8015 {
8017 let framework_str = match framework {
8018 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8019 _ => "US_GAAP",
8020 };
8021
8022 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8027 .max(rust_decimal::Decimal::from(100_000_u32));
8028
8029 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8030
8031 let mut prov_gen = ProvisionGenerator::new(seed + 44);
8032 let prov_snap = prov_gen.generate(
8033 company_code,
8034 currency,
8035 revenue_proxy,
8036 end_date,
8037 &period_label,
8038 framework_str,
8039 None, );
8041
8042 snapshot.provision_count = prov_snap.provisions.len();
8043 snapshot.provisions = prov_snap.provisions;
8044 snapshot.provision_movements = prov_snap.movements;
8045 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8046 snapshot.provision_journal_entries = prov_snap.journal_entries;
8047 }
8048
8049 {
8053 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8054
8055 let presentation_currency = self
8056 .config
8057 .global
8058 .presentation_currency
8059 .clone()
8060 .unwrap_or_else(|| self.config.global.group_currency.clone());
8061
8062 let mut rate_table = FxRateTable::new(&presentation_currency);
8065
8066 let base_rates = base_rates_usd();
8070 for (ccy, rate) in &base_rates {
8071 rate_table.add_rate(FxRate::new(
8072 ccy,
8073 "USD",
8074 RateType::Closing,
8075 end_date,
8076 *rate,
8077 "SYNTHETIC",
8078 ));
8079 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8082 rate_table.add_rate(FxRate::new(
8083 ccy,
8084 "USD",
8085 RateType::Average,
8086 end_date,
8087 avg,
8088 "SYNTHETIC",
8089 ));
8090 }
8091
8092 let mut translation_results = Vec::new();
8093 for company in &self.config.companies {
8094 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8097 .max(rust_decimal::Decimal::from(100_000_u32));
8098
8099 let func_ccy = company
8100 .functional_currency
8101 .clone()
8102 .unwrap_or_else(|| company.currency.clone());
8103
8104 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8105 &company.code,
8106 &func_ccy,
8107 &presentation_currency,
8108 &ias21_period_label,
8109 end_date,
8110 company_revenue,
8111 &rate_table,
8112 );
8113 translation_results.push(result);
8114 }
8115
8116 snapshot.currency_translation_count = translation_results.len();
8117 snapshot.currency_translation_results = translation_results;
8118 }
8119
8120 stats.revenue_contract_count = snapshot.revenue_contract_count;
8121 stats.impairment_test_count = snapshot.impairment_test_count;
8122 stats.business_combination_count = snapshot.business_combination_count;
8123 stats.ecl_model_count = snapshot.ecl_model_count;
8124 stats.provision_count = snapshot.provision_count;
8125
8126 if self.config.accounting_standards.leases.enabled {
8130 use datasynth_generators::standards::LeaseGenerator;
8131 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8132 .unwrap_or_else(|_| {
8133 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8134 });
8135 let framework =
8136 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8137 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8138 for company in &self.config.companies {
8139 let leases = lease_gen.generate(
8140 &company.code,
8141 start_date,
8142 &self.config.accounting_standards.leases,
8143 framework,
8144 );
8145 snapshot.lease_count += leases.len();
8146 snapshot.leases.extend(leases);
8147 }
8148 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8149 }
8150
8151 if self.config.accounting_standards.fair_value.enabled {
8155 use datasynth_generators::standards::FairValueGenerator;
8156 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8157 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8158 + chrono::Months::new(self.config.global.period_months);
8159 let framework =
8160 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8161 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8162 for company in &self.config.companies {
8163 let measurements = fv_gen.generate(
8164 &company.code,
8165 end_date,
8166 &company.currency,
8167 &self.config.accounting_standards.fair_value,
8168 framework,
8169 );
8170 snapshot.fair_value_measurement_count += measurements.len();
8171 snapshot.fair_value_measurements.extend(measurements);
8172 }
8173 info!(
8174 "v3.3.1 fair value measurements: {}",
8175 snapshot.fair_value_measurement_count
8176 );
8177 }
8178
8179 if self.config.accounting_standards.generate_differences
8183 && matches!(
8184 self.config.accounting_standards.framework,
8185 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8186 )
8187 {
8188 use datasynth_generators::standards::FrameworkReconciliationGenerator;
8189 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8190 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8191 + chrono::Months::new(self.config.global.period_months);
8192 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8193 for company in &self.config.companies {
8194 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8195 snapshot.framework_difference_count += records.len();
8196 snapshot.framework_differences.extend(records);
8197 snapshot.framework_reconciliations.push(reconciliation);
8198 }
8199 info!(
8200 "v3.3.1 framework reconciliation: {} differences across {} entities",
8201 snapshot.framework_difference_count,
8202 snapshot.framework_reconciliations.len()
8203 );
8204 }
8205
8206 info!(
8207 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8208 snapshot.revenue_contract_count,
8209 snapshot.impairment_test_count,
8210 snapshot.business_combination_count,
8211 snapshot.ecl_model_count,
8212 snapshot.provision_count,
8213 snapshot.currency_translation_count,
8214 snapshot.lease_count,
8215 snapshot.fair_value_measurement_count,
8216 snapshot.framework_difference_count,
8217 );
8218 self.check_resources_with_log("post-accounting-standards")?;
8219
8220 Ok(snapshot)
8221 }
8222
8223 fn resolve_accounting_framework(
8227 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8228 ) -> datasynth_standards::framework::AccountingFramework {
8229 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8230 use datasynth_standards::framework::AccountingFramework as Fw;
8231 match cfg {
8232 Some(Cfg::Ifrs) => Fw::Ifrs,
8233 Some(Cfg::DualReporting) => Fw::DualReporting,
8234 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8235 Some(Cfg::GermanGaap) => Fw::GermanGaap,
8236 _ => Fw::UsGaap,
8237 }
8238 }
8239
8240 fn phase_manufacturing(
8242 &mut self,
8243 stats: &mut EnhancedGenerationStatistics,
8244 ) -> SynthResult<ManufacturingSnapshot> {
8245 if !self.phase_config.generate_manufacturing {
8246 debug!("Phase 18: Skipped (manufacturing generation disabled)");
8247 return Ok(ManufacturingSnapshot::default());
8248 }
8249 info!("Phase 18: Generating Manufacturing Data");
8250
8251 let seed = self.seed;
8252 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8253 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8254 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8255 let company_code = self
8256 .config
8257 .companies
8258 .first()
8259 .map(|c| c.code.as_str())
8260 .unwrap_or("1000");
8261
8262 let material_data: Vec<(String, String)> = self
8263 .master_data
8264 .materials
8265 .iter()
8266 .map(|m| (m.material_id.clone(), m.description.clone()))
8267 .collect();
8268
8269 if material_data.is_empty() {
8270 debug!("Phase 18: Skipped (no materials available)");
8271 return Ok(ManufacturingSnapshot::default());
8272 }
8273
8274 let mut snapshot = ManufacturingSnapshot::default();
8275
8276 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8278 if let Some(ctx) = &self.temporal_context {
8280 prod_gen.set_temporal_context(Arc::clone(ctx));
8281 }
8282 let production_orders = prod_gen.generate(
8283 company_code,
8284 &material_data,
8285 start_date,
8286 end_date,
8287 &self.config.manufacturing.production_orders,
8288 &self.config.manufacturing.costing,
8289 &self.config.manufacturing.routing,
8290 );
8291 snapshot.production_order_count = production_orders.len();
8292
8293 let inspection_data: Vec<(String, String, String)> = production_orders
8295 .iter()
8296 .map(|po| {
8297 (
8298 po.order_id.clone(),
8299 po.material_id.clone(),
8300 po.material_description.clone(),
8301 )
8302 })
8303 .collect();
8304
8305 snapshot.production_orders = production_orders;
8306
8307 if !inspection_data.is_empty() {
8308 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8309 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8310 snapshot.quality_inspection_count = inspections.len();
8311 snapshot.quality_inspections = inspections;
8312 }
8313
8314 let storage_locations: Vec<(String, String)> = material_data
8316 .iter()
8317 .enumerate()
8318 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8319 .collect();
8320
8321 let employee_ids: Vec<String> = self
8322 .master_data
8323 .employees
8324 .iter()
8325 .map(|e| e.employee_id.clone())
8326 .collect();
8327 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8328 .with_employee_pool(employee_ids);
8329 let mut cycle_count_total = 0usize;
8330 for month in 0..self.config.global.period_months {
8331 let count_date = start_date + chrono::Months::new(month);
8332 let items_per_count = storage_locations.len().clamp(10, 50);
8333 let cc = cc_gen.generate(
8334 company_code,
8335 &storage_locations,
8336 count_date,
8337 items_per_count,
8338 );
8339 snapshot.cycle_counts.push(cc);
8340 cycle_count_total += 1;
8341 }
8342 snapshot.cycle_count_count = cycle_count_total;
8343
8344 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8346 let bom_components = bom_gen.generate(company_code, &material_data);
8347 snapshot.bom_component_count = bom_components.len();
8348 snapshot.bom_components = bom_components;
8349
8350 let currency = self
8352 .config
8353 .companies
8354 .first()
8355 .map(|c| c.currency.as_str())
8356 .unwrap_or("USD");
8357 let production_order_ids: Vec<String> = snapshot
8358 .production_orders
8359 .iter()
8360 .map(|po| po.order_id.clone())
8361 .collect();
8362 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8363 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8364 company_code,
8365 &material_data,
8366 start_date,
8367 end_date,
8368 2,
8369 currency,
8370 &production_order_ids,
8371 );
8372 snapshot.inventory_movement_count = inventory_movements.len();
8373 snapshot.inventory_movements = inventory_movements;
8374
8375 stats.production_order_count = snapshot.production_order_count;
8376 stats.quality_inspection_count = snapshot.quality_inspection_count;
8377 stats.cycle_count_count = snapshot.cycle_count_count;
8378 stats.bom_component_count = snapshot.bom_component_count;
8379 stats.inventory_movement_count = snapshot.inventory_movement_count;
8380
8381 info!(
8382 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8383 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8384 snapshot.bom_component_count, snapshot.inventory_movement_count
8385 );
8386 self.check_resources_with_log("post-manufacturing")?;
8387
8388 Ok(snapshot)
8389 }
8390
8391 fn phase_sales_kpi_budgets(
8393 &mut self,
8394 coa: &Arc<ChartOfAccounts>,
8395 financial_reporting: &FinancialReportingSnapshot,
8396 stats: &mut EnhancedGenerationStatistics,
8397 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8398 if !self.phase_config.generate_sales_kpi_budgets {
8399 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8400 return Ok(SalesKpiBudgetsSnapshot::default());
8401 }
8402 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8403
8404 let seed = self.seed;
8405 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8406 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8407 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8408 let company_code = self
8409 .config
8410 .companies
8411 .first()
8412 .map(|c| c.code.as_str())
8413 .unwrap_or("1000");
8414
8415 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8416
8417 if self.config.sales_quotes.enabled {
8419 let customer_data: Vec<(String, String)> = self
8420 .master_data
8421 .customers
8422 .iter()
8423 .map(|c| (c.customer_id.clone(), c.name.clone()))
8424 .collect();
8425 let material_data: Vec<(String, String)> = self
8426 .master_data
8427 .materials
8428 .iter()
8429 .map(|m| (m.material_id.clone(), m.description.clone()))
8430 .collect();
8431
8432 if !customer_data.is_empty() && !material_data.is_empty() {
8433 let employee_ids: Vec<String> = self
8434 .master_data
8435 .employees
8436 .iter()
8437 .map(|e| e.employee_id.clone())
8438 .collect();
8439 let customer_ids: Vec<String> = self
8440 .master_data
8441 .customers
8442 .iter()
8443 .map(|c| c.customer_id.clone())
8444 .collect();
8445 let company_currency = self
8446 .config
8447 .companies
8448 .first()
8449 .map(|c| c.currency.as_str())
8450 .unwrap_or("USD");
8451
8452 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8453 .with_pools(employee_ids, customer_ids);
8454 let quotes = quote_gen.generate_with_currency(
8455 company_code,
8456 &customer_data,
8457 &material_data,
8458 start_date,
8459 end_date,
8460 &self.config.sales_quotes,
8461 company_currency,
8462 );
8463 snapshot.sales_quote_count = quotes.len();
8464 snapshot.sales_quotes = quotes;
8465 }
8466 }
8467
8468 if self.config.financial_reporting.management_kpis.enabled {
8470 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8471 let mut kpis = kpi_gen.generate(
8472 company_code,
8473 start_date,
8474 end_date,
8475 &self.config.financial_reporting.management_kpis,
8476 );
8477
8478 {
8480 use rust_decimal::Decimal;
8481
8482 if let Some(income_stmt) =
8483 financial_reporting.financial_statements.iter().find(|fs| {
8484 fs.statement_type == StatementType::IncomeStatement
8485 && fs.company_code == company_code
8486 })
8487 {
8488 let total_revenue: Decimal = income_stmt
8490 .line_items
8491 .iter()
8492 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8493 .map(|li| li.amount)
8494 .sum();
8495 let total_cogs: Decimal = income_stmt
8496 .line_items
8497 .iter()
8498 .filter(|li| {
8499 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8500 && !li.is_total
8501 })
8502 .map(|li| li.amount.abs())
8503 .sum();
8504 let total_opex: Decimal = income_stmt
8505 .line_items
8506 .iter()
8507 .filter(|li| {
8508 li.section.contains("Expense")
8509 && !li.is_total
8510 && !li.section.contains("Cost")
8511 })
8512 .map(|li| li.amount.abs())
8513 .sum();
8514
8515 if total_revenue > Decimal::ZERO {
8516 let hundred = Decimal::from(100);
8517 let gross_margin_pct =
8518 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8519 let operating_income = total_revenue - total_cogs - total_opex;
8520 let op_margin_pct =
8521 (operating_income * hundred / total_revenue).round_dp(2);
8522
8523 for kpi in &mut kpis {
8525 if kpi.name == "Gross Margin" {
8526 kpi.value = gross_margin_pct;
8527 } else if kpi.name == "Operating Margin" {
8528 kpi.value = op_margin_pct;
8529 }
8530 }
8531 }
8532 }
8533
8534 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8536 fs.statement_type == StatementType::BalanceSheet
8537 && fs.company_code == company_code
8538 }) {
8539 let current_assets: Decimal = bs
8540 .line_items
8541 .iter()
8542 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8543 .map(|li| li.amount)
8544 .sum();
8545 let current_liabilities: Decimal = bs
8546 .line_items
8547 .iter()
8548 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8549 .map(|li| li.amount.abs())
8550 .sum();
8551
8552 if current_liabilities > Decimal::ZERO {
8553 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8554 for kpi in &mut kpis {
8555 if kpi.name == "Current Ratio" {
8556 kpi.value = current_ratio;
8557 }
8558 }
8559 }
8560 }
8561 }
8562
8563 snapshot.kpi_count = kpis.len();
8564 snapshot.kpis = kpis;
8565 }
8566
8567 if self.config.financial_reporting.budgets.enabled {
8569 let account_data: Vec<(String, String)> = coa
8570 .accounts
8571 .iter()
8572 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8573 .collect();
8574
8575 if !account_data.is_empty() {
8576 let fiscal_year = start_date.year() as u32;
8577 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8578 let budget = budget_gen.generate(
8579 company_code,
8580 fiscal_year,
8581 &account_data,
8582 &self.config.financial_reporting.budgets,
8583 );
8584 snapshot.budget_line_count = budget.line_items.len();
8585 snapshot.budgets.push(budget);
8586 }
8587 }
8588
8589 stats.sales_quote_count = snapshot.sales_quote_count;
8590 stats.kpi_count = snapshot.kpi_count;
8591 stats.budget_line_count = snapshot.budget_line_count;
8592
8593 info!(
8594 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8595 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8596 );
8597 self.check_resources_with_log("post-sales-kpi-budgets")?;
8598
8599 Ok(snapshot)
8600 }
8601
8602 fn compute_pre_tax_income(
8609 company_code: &str,
8610 journal_entries: &[JournalEntry],
8611 ) -> rust_decimal::Decimal {
8612 use datasynth_core::accounts::AccountCategory;
8613 use rust_decimal::Decimal;
8614
8615 let mut total_revenue = Decimal::ZERO;
8616 let mut total_expenses = Decimal::ZERO;
8617
8618 for je in journal_entries {
8619 if je.header.company_code != company_code {
8620 continue;
8621 }
8622 for line in &je.lines {
8623 let cat = AccountCategory::from_account(&line.gl_account);
8624 match cat {
8625 AccountCategory::Revenue => {
8626 total_revenue += line.credit_amount - line.debit_amount;
8627 }
8628 AccountCategory::Cogs
8629 | AccountCategory::OperatingExpense
8630 | AccountCategory::OtherIncomeExpense => {
8631 total_expenses += line.debit_amount - line.credit_amount;
8632 }
8633 _ => {}
8634 }
8635 }
8636 }
8637
8638 let pti = (total_revenue - total_expenses).round_dp(2);
8639 if pti == rust_decimal::Decimal::ZERO {
8640 rust_decimal::Decimal::from(1_000_000u32)
8643 } else {
8644 pti
8645 }
8646 }
8647
8648 fn phase_tax_generation(
8650 &mut self,
8651 document_flows: &DocumentFlowSnapshot,
8652 journal_entries: &[JournalEntry],
8653 stats: &mut EnhancedGenerationStatistics,
8654 ) -> SynthResult<TaxSnapshot> {
8655 if !self.phase_config.generate_tax {
8656 debug!("Phase 20: Skipped (tax generation disabled)");
8657 return Ok(TaxSnapshot::default());
8658 }
8659 info!("Phase 20: Generating Tax Data");
8660
8661 let seed = self.seed;
8662 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8663 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8664 let fiscal_year = start_date.year();
8665 let company_code = self
8666 .config
8667 .companies
8668 .first()
8669 .map(|c| c.code.as_str())
8670 .unwrap_or("1000");
8671
8672 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8673 seed + 370,
8674 self.config.tax.clone(),
8675 );
8676
8677 let pack = self.primary_pack().clone();
8678 let (jurisdictions, codes) =
8679 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8680
8681 let mut provisions = Vec::new();
8683 if self.config.tax.provisions.enabled {
8684 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8685 for company in &self.config.companies {
8686 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8687 let statutory_rate = rust_decimal::Decimal::new(
8688 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8689 2,
8690 );
8691 let provision = provision_gen.generate(
8692 &company.code,
8693 start_date,
8694 pre_tax_income,
8695 statutory_rate,
8696 );
8697 provisions.push(provision);
8698 }
8699 }
8700
8701 let mut tax_lines = Vec::new();
8703 if !codes.is_empty() {
8704 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8705 datasynth_generators::TaxLineGeneratorConfig::default(),
8706 codes.clone(),
8707 seed + 372,
8708 );
8709
8710 let buyer_country = self
8713 .config
8714 .companies
8715 .first()
8716 .map(|c| c.country.as_str())
8717 .unwrap_or("US");
8718 for vi in &document_flows.vendor_invoices {
8719 let lines = tax_line_gen.generate_for_document(
8720 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8721 &vi.header.document_id,
8722 buyer_country, buyer_country,
8724 vi.payable_amount,
8725 vi.header.document_date,
8726 None,
8727 );
8728 tax_lines.extend(lines);
8729 }
8730
8731 for ci in &document_flows.customer_invoices {
8733 let lines = tax_line_gen.generate_for_document(
8734 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8735 &ci.header.document_id,
8736 buyer_country, buyer_country,
8738 ci.total_gross_amount,
8739 ci.header.document_date,
8740 None,
8741 );
8742 tax_lines.extend(lines);
8743 }
8744 }
8745
8746 let deferred_tax = {
8748 let companies: Vec<(&str, &str)> = self
8749 .config
8750 .companies
8751 .iter()
8752 .map(|c| (c.code.as_str(), c.country.as_str()))
8753 .collect();
8754 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8755 deferred_gen.generate(&companies, start_date, journal_entries)
8756 };
8757
8758 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8761 std::collections::HashMap::new();
8762 for vi in &document_flows.vendor_invoices {
8763 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8764 }
8765 for ci in &document_flows.customer_invoices {
8766 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8767 }
8768
8769 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8771 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8772 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8773 &tax_lines,
8774 company_code,
8775 &doc_dates,
8776 end_date,
8777 );
8778 debug!("Generated {} tax posting JEs", jes.len());
8779 jes
8780 } else {
8781 Vec::new()
8782 };
8783
8784 let snapshot = TaxSnapshot {
8785 jurisdiction_count: jurisdictions.len(),
8786 code_count: codes.len(),
8787 jurisdictions,
8788 codes,
8789 tax_provisions: provisions,
8790 tax_lines,
8791 tax_returns: Vec::new(),
8792 withholding_records: Vec::new(),
8793 tax_anomaly_labels: Vec::new(),
8794 deferred_tax,
8795 tax_posting_journal_entries,
8796 };
8797
8798 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8799 stats.tax_code_count = snapshot.code_count;
8800 stats.tax_provision_count = snapshot.tax_provisions.len();
8801 stats.tax_line_count = snapshot.tax_lines.len();
8802
8803 info!(
8804 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8805 snapshot.jurisdiction_count,
8806 snapshot.code_count,
8807 snapshot.tax_provisions.len(),
8808 snapshot.deferred_tax.temporary_differences.len(),
8809 snapshot.deferred_tax.journal_entries.len(),
8810 snapshot.tax_posting_journal_entries.len(),
8811 );
8812 self.check_resources_with_log("post-tax")?;
8813
8814 Ok(snapshot)
8815 }
8816
8817 fn phase_esg_generation(
8819 &mut self,
8820 document_flows: &DocumentFlowSnapshot,
8821 manufacturing: &ManufacturingSnapshot,
8822 stats: &mut EnhancedGenerationStatistics,
8823 ) -> SynthResult<EsgSnapshot> {
8824 if !self.phase_config.generate_esg {
8825 debug!("Phase 21: Skipped (ESG generation disabled)");
8826 return Ok(EsgSnapshot::default());
8827 }
8828 let degradation = self.check_resources()?;
8829 if degradation >= DegradationLevel::Reduced {
8830 debug!(
8831 "Phase skipped due to resource pressure (degradation: {:?})",
8832 degradation
8833 );
8834 return Ok(EsgSnapshot::default());
8835 }
8836 info!("Phase 21: Generating ESG Data");
8837
8838 let seed = self.seed;
8839 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8840 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8841 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8842 let entity_id = self
8843 .config
8844 .companies
8845 .first()
8846 .map(|c| c.code.as_str())
8847 .unwrap_or("1000");
8848
8849 let esg_cfg = &self.config.esg;
8850 let mut snapshot = EsgSnapshot::default();
8851
8852 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8854 esg_cfg.environmental.energy.clone(),
8855 seed + 80,
8856 );
8857 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8858
8859 let facility_count = esg_cfg.environmental.energy.facility_count;
8861 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8862 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8863
8864 let mut waste_gen = datasynth_generators::WasteGenerator::new(
8866 seed + 82,
8867 esg_cfg.environmental.waste.diversion_target,
8868 facility_count,
8869 );
8870 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8871
8872 let mut emission_gen =
8874 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8875
8876 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8878 .iter()
8879 .map(|e| datasynth_generators::EnergyInput {
8880 facility_id: e.facility_id.clone(),
8881 energy_type: match e.energy_source {
8882 EnergySourceType::NaturalGas => {
8883 datasynth_generators::EnergyInputType::NaturalGas
8884 }
8885 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8886 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8887 _ => datasynth_generators::EnergyInputType::Electricity,
8888 },
8889 consumption_kwh: e.consumption_kwh,
8890 period: e.period,
8891 })
8892 .collect();
8893
8894 if !manufacturing.production_orders.is_empty() {
8896 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8897 &manufacturing.production_orders,
8898 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
8901 if !mfg_energy.is_empty() {
8902 info!(
8903 "ESG: {} energy inputs derived from {} production orders",
8904 mfg_energy.len(),
8905 manufacturing.production_orders.len(),
8906 );
8907 energy_inputs.extend(mfg_energy);
8908 }
8909 }
8910
8911 let mut emissions = Vec::new();
8912 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8913 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8914
8915 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8917 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8918 for payment in &document_flows.payments {
8919 if payment.is_vendor {
8920 *totals
8921 .entry(payment.business_partner_id.clone())
8922 .or_default() += payment.amount;
8923 }
8924 }
8925 totals
8926 };
8927 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8928 .master_data
8929 .vendors
8930 .iter()
8931 .map(|v| {
8932 let spend = vendor_payment_totals
8933 .get(&v.vendor_id)
8934 .copied()
8935 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8936 datasynth_generators::VendorSpendInput {
8937 vendor_id: v.vendor_id.clone(),
8938 category: format!("{:?}", v.vendor_type).to_lowercase(),
8939 spend,
8940 country: v.country.clone(),
8941 }
8942 })
8943 .collect();
8944 if !vendor_spend.is_empty() {
8945 emissions.extend(emission_gen.generate_scope3_purchased_goods(
8946 entity_id,
8947 &vendor_spend,
8948 start_date,
8949 end_date,
8950 ));
8951 }
8952
8953 let headcount = self.master_data.employees.len() as u32;
8955 if headcount > 0 {
8956 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8957 emissions.extend(emission_gen.generate_scope3_business_travel(
8958 entity_id,
8959 travel_spend,
8960 start_date,
8961 ));
8962 emissions
8963 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8964 }
8965
8966 snapshot.emission_count = emissions.len();
8967 snapshot.emissions = emissions;
8968 snapshot.energy = energy_records;
8969
8970 let mut workforce_gen =
8972 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8973 let total_headcount = headcount.max(100);
8974 snapshot.diversity =
8975 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8976 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8977
8978 if !self.master_data.employees.is_empty() {
8980 let hr_diversity = workforce_gen.generate_diversity_from_employees(
8981 entity_id,
8982 &self.master_data.employees,
8983 end_date,
8984 );
8985 if !hr_diversity.is_empty() {
8986 info!(
8987 "ESG: {} diversity metrics derived from {} actual employees",
8988 hr_diversity.len(),
8989 self.master_data.employees.len(),
8990 );
8991 snapshot.diversity.extend(hr_diversity);
8992 }
8993 }
8994
8995 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8996 entity_id,
8997 facility_count,
8998 start_date,
8999 end_date,
9000 );
9001
9002 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
9005 entity_id,
9006 &snapshot.safety_incidents,
9007 total_hours,
9008 start_date,
9009 );
9010 snapshot.safety_metrics = vec![safety_metric];
9011
9012 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9014 seed + 85,
9015 esg_cfg.governance.board_size,
9016 esg_cfg.governance.independence_target,
9017 );
9018 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9019
9020 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9022 esg_cfg.supply_chain_esg.clone(),
9023 seed + 86,
9024 );
9025 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9026 .master_data
9027 .vendors
9028 .iter()
9029 .map(|v| datasynth_generators::VendorInput {
9030 vendor_id: v.vendor_id.clone(),
9031 country: v.country.clone(),
9032 industry: format!("{:?}", v.vendor_type).to_lowercase(),
9033 quality_score: None,
9034 })
9035 .collect();
9036 snapshot.supplier_assessments =
9037 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9038
9039 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9041 seed + 87,
9042 esg_cfg.reporting.clone(),
9043 esg_cfg.climate_scenarios.clone(),
9044 );
9045 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9046 snapshot.disclosures = disclosure_gen.generate_disclosures(
9047 entity_id,
9048 &snapshot.materiality,
9049 start_date,
9050 end_date,
9051 );
9052 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9053 snapshot.disclosure_count = snapshot.disclosures.len();
9054
9055 if esg_cfg.anomaly_rate > 0.0 {
9057 let mut anomaly_injector =
9058 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9059 let mut labels = Vec::new();
9060 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9061 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9062 labels.extend(
9063 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9064 );
9065 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9066 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9067 snapshot.anomaly_labels = labels;
9068 }
9069
9070 stats.esg_emission_count = snapshot.emission_count;
9071 stats.esg_disclosure_count = snapshot.disclosure_count;
9072
9073 info!(
9074 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9075 snapshot.emission_count,
9076 snapshot.disclosure_count,
9077 snapshot.supplier_assessments.len()
9078 );
9079 self.check_resources_with_log("post-esg")?;
9080
9081 Ok(snapshot)
9082 }
9083
9084 fn phase_treasury_data(
9086 &mut self,
9087 document_flows: &DocumentFlowSnapshot,
9088 subledger: &SubledgerSnapshot,
9089 intercompany: &IntercompanySnapshot,
9090 stats: &mut EnhancedGenerationStatistics,
9091 ) -> SynthResult<TreasurySnapshot> {
9092 if !self.phase_config.generate_treasury {
9093 debug!("Phase 22: Skipped (treasury generation disabled)");
9094 return Ok(TreasurySnapshot::default());
9095 }
9096 let degradation = self.check_resources()?;
9097 if degradation >= DegradationLevel::Reduced {
9098 debug!(
9099 "Phase skipped due to resource pressure (degradation: {:?})",
9100 degradation
9101 );
9102 return Ok(TreasurySnapshot::default());
9103 }
9104 info!("Phase 22: Generating Treasury Data");
9105
9106 let seed = self.seed;
9107 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9108 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9109 let currency = self
9110 .config
9111 .companies
9112 .first()
9113 .map(|c| c.currency.as_str())
9114 .unwrap_or("USD");
9115 let entity_id = self
9116 .config
9117 .companies
9118 .first()
9119 .map(|c| c.code.as_str())
9120 .unwrap_or("1000");
9121
9122 let mut snapshot = TreasurySnapshot::default();
9123
9124 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9126 self.config.treasury.debt.clone(),
9127 seed + 90,
9128 );
9129 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9130
9131 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9133 self.config.treasury.hedging.clone(),
9134 seed + 91,
9135 );
9136 for debt in &snapshot.debt_instruments {
9137 if debt.rate_type == InterestRateType::Variable {
9138 let swap = hedge_gen.generate_ir_swap(
9139 currency,
9140 debt.principal,
9141 debt.origination_date,
9142 debt.maturity_date,
9143 );
9144 snapshot.hedging_instruments.push(swap);
9145 }
9146 }
9147
9148 {
9151 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9152 for payment in &document_flows.payments {
9153 if payment.currency != currency {
9154 let entry = fx_map
9155 .entry(payment.currency.clone())
9156 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9157 entry.0 += payment.amount;
9158 if payment.header.document_date > entry.1 {
9160 entry.1 = payment.header.document_date;
9161 }
9162 }
9163 }
9164 if !fx_map.is_empty() {
9165 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9166 .into_iter()
9167 .map(|(foreign_ccy, (net_amount, settlement_date))| {
9168 datasynth_generators::treasury::FxExposure {
9169 currency_pair: format!("{foreign_ccy}/{currency}"),
9170 foreign_currency: foreign_ccy,
9171 net_amount,
9172 settlement_date,
9173 description: "AP payment FX exposure".to_string(),
9174 }
9175 })
9176 .collect();
9177 let (fx_instruments, fx_relationships) =
9178 hedge_gen.generate(start_date, &fx_exposures);
9179 snapshot.hedging_instruments.extend(fx_instruments);
9180 snapshot.hedge_relationships.extend(fx_relationships);
9181 }
9182 }
9183
9184 if self.config.treasury.anomaly_rate > 0.0 {
9186 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9187 seed + 92,
9188 self.config.treasury.anomaly_rate,
9189 );
9190 let mut labels = Vec::new();
9191 labels.extend(
9192 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9193 );
9194 snapshot.treasury_anomaly_labels = labels;
9195 }
9196
9197 if self.config.treasury.cash_positioning.enabled {
9199 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9200
9201 for payment in &document_flows.payments {
9203 cash_flows.push(datasynth_generators::treasury::CashFlow {
9204 date: payment.header.document_date,
9205 account_id: format!("{entity_id}-MAIN"),
9206 amount: payment.amount,
9207 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9208 });
9209 }
9210
9211 for chain in &document_flows.o2c_chains {
9213 if let Some(ref receipt) = chain.customer_receipt {
9214 cash_flows.push(datasynth_generators::treasury::CashFlow {
9215 date: receipt.header.document_date,
9216 account_id: format!("{entity_id}-MAIN"),
9217 amount: receipt.amount,
9218 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9219 });
9220 }
9221 for receipt in &chain.remainder_receipts {
9223 cash_flows.push(datasynth_generators::treasury::CashFlow {
9224 date: receipt.header.document_date,
9225 account_id: format!("{entity_id}-MAIN"),
9226 amount: receipt.amount,
9227 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9228 });
9229 }
9230 }
9231
9232 if !cash_flows.is_empty() {
9233 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9234 self.config.treasury.cash_positioning.clone(),
9235 seed + 93,
9236 );
9237 let account_id = format!("{entity_id}-MAIN");
9238 snapshot.cash_positions = cash_gen.generate(
9239 entity_id,
9240 &account_id,
9241 currency,
9242 &cash_flows,
9243 start_date,
9244 start_date + chrono::Months::new(self.config.global.period_months),
9245 rust_decimal::Decimal::new(1_000_000, 0), );
9247 }
9248 }
9249
9250 if self.config.treasury.cash_forecasting.enabled {
9252 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9253
9254 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9256 .ar_invoices
9257 .iter()
9258 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9259 .map(|inv| {
9260 let days_past_due = if inv.due_date < end_date {
9261 (end_date - inv.due_date).num_days().max(0) as u32
9262 } else {
9263 0
9264 };
9265 datasynth_generators::treasury::ArAgingItem {
9266 expected_date: inv.due_date,
9267 amount: inv.amount_remaining,
9268 days_past_due,
9269 document_id: inv.invoice_number.clone(),
9270 }
9271 })
9272 .collect();
9273
9274 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9276 .ap_invoices
9277 .iter()
9278 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9279 .map(|inv| datasynth_generators::treasury::ApAgingItem {
9280 payment_date: inv.due_date,
9281 amount: inv.amount_remaining,
9282 document_id: inv.invoice_number.clone(),
9283 })
9284 .collect();
9285
9286 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9287 self.config.treasury.cash_forecasting.clone(),
9288 seed + 94,
9289 );
9290 let forecast = forecast_gen.generate(
9291 entity_id,
9292 currency,
9293 end_date,
9294 &ar_items,
9295 &ap_items,
9296 &[], );
9298 snapshot.cash_forecasts.push(forecast);
9299 }
9300
9301 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9303 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9304 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9305 self.config.treasury.cash_pooling.clone(),
9306 seed + 95,
9307 );
9308
9309 let account_ids: Vec<String> = snapshot
9311 .cash_positions
9312 .iter()
9313 .map(|cp| cp.bank_account_id.clone())
9314 .collect::<std::collections::HashSet<_>>()
9315 .into_iter()
9316 .collect();
9317
9318 if let Some(pool) =
9319 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9320 {
9321 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9323 for cp in &snapshot.cash_positions {
9324 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9325 }
9326
9327 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9328 latest_balances
9329 .into_iter()
9330 .filter(|(id, _)| pool.participant_accounts.contains(id))
9331 .map(
9332 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9333 account_id: id,
9334 balance,
9335 },
9336 )
9337 .collect();
9338
9339 let sweeps =
9340 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9341 snapshot.cash_pool_sweeps = sweeps;
9342 snapshot.cash_pools.push(pool);
9343 }
9344 }
9345
9346 if self.config.treasury.bank_guarantees.enabled {
9348 let vendor_names: Vec<String> = self
9349 .master_data
9350 .vendors
9351 .iter()
9352 .map(|v| v.name.clone())
9353 .collect();
9354 if !vendor_names.is_empty() {
9355 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9356 self.config.treasury.bank_guarantees.clone(),
9357 seed + 96,
9358 );
9359 snapshot.bank_guarantees =
9360 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9361 }
9362 }
9363
9364 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9366 let entity_ids: Vec<String> = self
9367 .config
9368 .companies
9369 .iter()
9370 .map(|c| c.code.clone())
9371 .collect();
9372 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9373 .matched_pairs
9374 .iter()
9375 .map(|mp| {
9376 (
9377 mp.seller_company.clone(),
9378 mp.buyer_company.clone(),
9379 mp.amount,
9380 )
9381 })
9382 .collect();
9383 if entity_ids.len() >= 2 {
9384 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9385 self.config.treasury.netting.clone(),
9386 seed + 97,
9387 );
9388 snapshot.netting_runs = netting_gen.generate(
9389 &entity_ids,
9390 currency,
9391 start_date,
9392 self.config.global.period_months,
9393 &ic_amounts,
9394 );
9395 }
9396 }
9397
9398 {
9400 use datasynth_generators::treasury::TreasuryAccounting;
9401
9402 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9403 let mut treasury_jes = Vec::new();
9404
9405 if !snapshot.debt_instruments.is_empty() {
9407 let debt_jes =
9408 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9409 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9410 treasury_jes.extend(debt_jes);
9411 }
9412
9413 if !snapshot.hedging_instruments.is_empty() {
9415 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9416 &snapshot.hedging_instruments,
9417 &snapshot.hedge_relationships,
9418 end_date,
9419 entity_id,
9420 );
9421 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9422 treasury_jes.extend(hedge_jes);
9423 }
9424
9425 if !snapshot.cash_pool_sweeps.is_empty() {
9427 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9428 &snapshot.cash_pool_sweeps,
9429 entity_id,
9430 );
9431 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9432 treasury_jes.extend(sweep_jes);
9433 }
9434
9435 if !treasury_jes.is_empty() {
9436 debug!("Total treasury journal entries: {}", treasury_jes.len());
9437 }
9438 snapshot.journal_entries = treasury_jes;
9439 }
9440
9441 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9442 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9443 stats.cash_position_count = snapshot.cash_positions.len();
9444 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9445 stats.cash_pool_count = snapshot.cash_pools.len();
9446
9447 info!(
9448 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9449 snapshot.debt_instruments.len(),
9450 snapshot.hedging_instruments.len(),
9451 snapshot.cash_positions.len(),
9452 snapshot.cash_forecasts.len(),
9453 snapshot.cash_pools.len(),
9454 snapshot.bank_guarantees.len(),
9455 snapshot.netting_runs.len(),
9456 snapshot.journal_entries.len(),
9457 );
9458 self.check_resources_with_log("post-treasury")?;
9459
9460 Ok(snapshot)
9461 }
9462
9463 fn phase_project_accounting(
9465 &mut self,
9466 document_flows: &DocumentFlowSnapshot,
9467 hr: &HrSnapshot,
9468 stats: &mut EnhancedGenerationStatistics,
9469 ) -> SynthResult<ProjectAccountingSnapshot> {
9470 if !self.phase_config.generate_project_accounting {
9471 debug!("Phase 23: Skipped (project accounting disabled)");
9472 return Ok(ProjectAccountingSnapshot::default());
9473 }
9474 let degradation = self.check_resources()?;
9475 if degradation >= DegradationLevel::Reduced {
9476 debug!(
9477 "Phase skipped due to resource pressure (degradation: {:?})",
9478 degradation
9479 );
9480 return Ok(ProjectAccountingSnapshot::default());
9481 }
9482 info!("Phase 23: Generating Project Accounting Data");
9483
9484 let seed = self.seed;
9485 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9486 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9487 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9488 let company_code = self
9489 .config
9490 .companies
9491 .first()
9492 .map(|c| c.code.as_str())
9493 .unwrap_or("1000");
9494
9495 let mut snapshot = ProjectAccountingSnapshot::default();
9496
9497 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9499 self.config.project_accounting.clone(),
9500 seed + 95,
9501 );
9502 let pool = project_gen.generate(company_code, start_date, end_date);
9503 snapshot.projects = pool.projects.clone();
9504
9505 {
9507 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9508 Vec::new();
9509
9510 for te in &hr.time_entries {
9512 let total_hours = te.hours_regular + te.hours_overtime;
9513 if total_hours > 0.0 {
9514 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9515 id: te.entry_id.clone(),
9516 entity_id: company_code.to_string(),
9517 date: te.date,
9518 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9519 .unwrap_or(rust_decimal::Decimal::ZERO),
9520 source_type: CostSourceType::TimeEntry,
9521 hours: Some(
9522 rust_decimal::Decimal::from_f64_retain(total_hours)
9523 .unwrap_or(rust_decimal::Decimal::ZERO),
9524 ),
9525 });
9526 }
9527 }
9528
9529 for er in &hr.expense_reports {
9531 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9532 id: er.report_id.clone(),
9533 entity_id: company_code.to_string(),
9534 date: er.submission_date,
9535 amount: er.total_amount,
9536 source_type: CostSourceType::ExpenseReport,
9537 hours: None,
9538 });
9539 }
9540
9541 for po in &document_flows.purchase_orders {
9543 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9544 id: po.header.document_id.clone(),
9545 entity_id: company_code.to_string(),
9546 date: po.header.document_date,
9547 amount: po.total_net_amount,
9548 source_type: CostSourceType::PurchaseOrder,
9549 hours: None,
9550 });
9551 }
9552
9553 for vi in &document_flows.vendor_invoices {
9555 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9556 id: vi.header.document_id.clone(),
9557 entity_id: company_code.to_string(),
9558 date: vi.header.document_date,
9559 amount: vi.payable_amount,
9560 source_type: CostSourceType::VendorInvoice,
9561 hours: None,
9562 });
9563 }
9564
9565 if !source_docs.is_empty() && !pool.projects.is_empty() {
9566 let mut cost_gen =
9567 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9568 self.config.project_accounting.cost_allocation.clone(),
9569 seed + 99,
9570 );
9571 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9572 }
9573 }
9574
9575 if self.config.project_accounting.change_orders.enabled {
9577 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9578 self.config.project_accounting.change_orders.clone(),
9579 seed + 96,
9580 );
9581 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9582 }
9583
9584 if self.config.project_accounting.milestones.enabled {
9586 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9587 self.config.project_accounting.milestones.clone(),
9588 seed + 97,
9589 );
9590 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9591 }
9592
9593 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9595 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9596 self.config.project_accounting.earned_value.clone(),
9597 seed + 98,
9598 );
9599 snapshot.earned_value_metrics =
9600 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9601 }
9602
9603 if self.config.project_accounting.revenue_recognition.enabled
9605 && !snapshot.projects.is_empty()
9606 && !snapshot.cost_lines.is_empty()
9607 {
9608 use datasynth_generators::project_accounting::RevenueGenerator;
9609 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9610 let avg_contract_value =
9611 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9612 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9613
9614 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9617 snapshot
9618 .projects
9619 .iter()
9620 .filter(|p| {
9621 matches!(
9622 p.project_type,
9623 datasynth_core::models::ProjectType::Customer
9624 )
9625 })
9626 .map(|p| {
9627 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9628 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9629 } else {
9631 avg_contract_value
9632 };
9633 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9635 })
9636 .collect();
9637
9638 if !contract_values.is_empty() {
9639 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9640 snapshot.revenue_records = rev_gen.generate(
9641 &snapshot.projects,
9642 &snapshot.cost_lines,
9643 &contract_values,
9644 start_date,
9645 end_date,
9646 );
9647 debug!(
9648 "Generated {} revenue recognition records for {} customer projects",
9649 snapshot.revenue_records.len(),
9650 contract_values.len()
9651 );
9652 }
9653 }
9654
9655 stats.project_count = snapshot.projects.len();
9656 stats.project_change_order_count = snapshot.change_orders.len();
9657 stats.project_cost_line_count = snapshot.cost_lines.len();
9658
9659 info!(
9660 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9661 snapshot.projects.len(),
9662 snapshot.change_orders.len(),
9663 snapshot.milestones.len(),
9664 snapshot.earned_value_metrics.len()
9665 );
9666 self.check_resources_with_log("post-project-accounting")?;
9667
9668 Ok(snapshot)
9669 }
9670
9671 fn phase_evolution_events(
9673 &mut self,
9674 stats: &mut EnhancedGenerationStatistics,
9675 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9676 if !self.phase_config.generate_evolution_events {
9677 debug!("Phase 24: Skipped (evolution events disabled)");
9678 return Ok((Vec::new(), Vec::new()));
9679 }
9680 info!("Phase 24: Generating Process Evolution + Organizational Events");
9681
9682 let seed = self.seed;
9683 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9684 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9685 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9686
9687 let mut proc_gen =
9689 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9690 seed + 100,
9691 );
9692 let process_events = proc_gen.generate_events(start_date, end_date);
9693
9694 let company_codes: Vec<String> = self
9696 .config
9697 .companies
9698 .iter()
9699 .map(|c| c.code.clone())
9700 .collect();
9701 let mut org_gen =
9702 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9703 seed + 101,
9704 );
9705 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9706
9707 stats.process_evolution_event_count = process_events.len();
9708 stats.organizational_event_count = org_events.len();
9709
9710 info!(
9711 "Evolution events generated: {} process evolution, {} organizational",
9712 process_events.len(),
9713 org_events.len()
9714 );
9715 self.check_resources_with_log("post-evolution-events")?;
9716
9717 Ok((process_events, org_events))
9718 }
9719
9720 fn phase_disruption_events(
9723 &self,
9724 stats: &mut EnhancedGenerationStatistics,
9725 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9726 if !self.config.organizational_events.enabled {
9727 debug!("Phase 24b: Skipped (organizational events disabled)");
9728 return Ok(Vec::new());
9729 }
9730 info!("Phase 24b: Generating Disruption Events");
9731
9732 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9733 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9734 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9735
9736 let company_codes: Vec<String> = self
9737 .config
9738 .companies
9739 .iter()
9740 .map(|c| c.code.clone())
9741 .collect();
9742
9743 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9744 let events = gen.generate(start_date, end_date, &company_codes);
9745
9746 stats.disruption_event_count = events.len();
9747 info!("Disruption events generated: {} events", events.len());
9748 self.check_resources_with_log("post-disruption-events")?;
9749
9750 Ok(events)
9751 }
9752
9753 fn phase_counterfactuals(
9760 &self,
9761 journal_entries: &[JournalEntry],
9762 stats: &mut EnhancedGenerationStatistics,
9763 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9764 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9765 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9766 return Ok(Vec::new());
9767 }
9768 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9769
9770 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9771
9772 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9773
9774 let specs = [
9776 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9777 CounterfactualSpec::ShiftDate { days: -14 },
9778 CounterfactualSpec::SelfApprove,
9779 CounterfactualSpec::SplitTransaction { split_count: 3 },
9780 ];
9781
9782 let pairs: Vec<_> = journal_entries
9783 .iter()
9784 .enumerate()
9785 .map(|(i, je)| {
9786 let spec = &specs[i % specs.len()];
9787 gen.generate(je, spec)
9788 })
9789 .collect();
9790
9791 stats.counterfactual_pair_count = pairs.len();
9792 info!(
9793 "Counterfactual pairs generated: {} pairs from {} journal entries",
9794 pairs.len(),
9795 journal_entries.len()
9796 );
9797 self.check_resources_with_log("post-counterfactuals")?;
9798
9799 Ok(pairs)
9800 }
9801
9802 fn phase_red_flags(
9809 &self,
9810 anomaly_labels: &AnomalyLabels,
9811 document_flows: &DocumentFlowSnapshot,
9812 stats: &mut EnhancedGenerationStatistics,
9813 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9814 if !self.config.fraud.enabled {
9815 debug!("Phase 26: Skipped (fraud generation disabled)");
9816 return Ok(Vec::new());
9817 }
9818 info!("Phase 26: Generating Fraud Red-Flag Indicators");
9819
9820 use datasynth_generators::fraud::RedFlagGenerator;
9821
9822 let generator = RedFlagGenerator::new();
9823 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9824
9825 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9827 .labels
9828 .iter()
9829 .filter(|label| label.anomaly_type.is_intentional())
9830 .map(|label| label.document_id.as_str())
9831 .collect();
9832
9833 let mut flags = Vec::new();
9834
9835 for chain in &document_flows.p2p_chains {
9837 let doc_id = &chain.purchase_order.header.document_id;
9838 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9839 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9840 }
9841
9842 for chain in &document_flows.o2c_chains {
9844 let doc_id = &chain.sales_order.header.document_id;
9845 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9846 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9847 }
9848
9849 stats.red_flag_count = flags.len();
9850 info!(
9851 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9852 flags.len(),
9853 document_flows.p2p_chains.len(),
9854 document_flows.o2c_chains.len(),
9855 fraud_doc_ids.len()
9856 );
9857 self.check_resources_with_log("post-red-flags")?;
9858
9859 Ok(flags)
9860 }
9861
9862 fn phase_collusion_rings(
9868 &mut self,
9869 stats: &mut EnhancedGenerationStatistics,
9870 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9871 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9872 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9873 return Ok(Vec::new());
9874 }
9875 info!("Phase 26b: Generating Collusion Rings");
9876
9877 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9878 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9879 let months = self.config.global.period_months;
9880
9881 let employee_ids: Vec<String> = self
9882 .master_data
9883 .employees
9884 .iter()
9885 .map(|e| e.employee_id.clone())
9886 .collect();
9887 let vendor_ids: Vec<String> = self
9888 .master_data
9889 .vendors
9890 .iter()
9891 .map(|v| v.vendor_id.clone())
9892 .collect();
9893
9894 let mut generator =
9895 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9896 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9897
9898 stats.collusion_ring_count = rings.len();
9899 info!(
9900 "Collusion rings generated: {} rings, total members: {}",
9901 rings.len(),
9902 rings
9903 .iter()
9904 .map(datasynth_generators::fraud::CollusionRing::size)
9905 .sum::<usize>()
9906 );
9907 self.check_resources_with_log("post-collusion-rings")?;
9908
9909 Ok(rings)
9910 }
9911
9912 fn phase_temporal_attributes(
9917 &mut self,
9918 stats: &mut EnhancedGenerationStatistics,
9919 ) -> SynthResult<
9920 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9921 > {
9922 if !self.config.temporal_attributes.enabled {
9923 debug!("Phase 27: Skipped (temporal attributes disabled)");
9924 return Ok(Vec::new());
9925 }
9926 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9927
9928 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9929 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9930
9931 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9935 || self.config.temporal_attributes.enabled;
9936 let temporal_config = {
9937 let ta = &self.config.temporal_attributes;
9938 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9939 .enabled(ta.enabled)
9940 .closed_probability(ta.valid_time.closed_probability)
9941 .avg_validity_days(ta.valid_time.avg_validity_days)
9942 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9943 .with_version_chains(if generate_version_chains {
9944 ta.avg_versions_per_entity
9945 } else {
9946 1.0
9947 })
9948 .build()
9949 };
9950 let temporal_config = if self
9952 .config
9953 .temporal_attributes
9954 .transaction_time
9955 .allow_backdating
9956 {
9957 let mut c = temporal_config;
9958 c.transaction_time.allow_backdating = true;
9959 c.transaction_time.backdating_probability = self
9960 .config
9961 .temporal_attributes
9962 .transaction_time
9963 .backdating_probability;
9964 c.transaction_time.max_backdate_days = self
9965 .config
9966 .temporal_attributes
9967 .transaction_time
9968 .max_backdate_days;
9969 c
9970 } else {
9971 temporal_config
9972 };
9973 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9974 temporal_config,
9975 self.seed + 130,
9976 start_date,
9977 );
9978
9979 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9980 self.seed + 130,
9981 datasynth_core::GeneratorType::Vendor,
9982 );
9983
9984 let chains: Vec<_> = self
9985 .master_data
9986 .vendors
9987 .iter()
9988 .map(|vendor| {
9989 let id = uuid_factory.next();
9990 gen.generate_version_chain(vendor.clone(), id)
9991 })
9992 .collect();
9993
9994 stats.temporal_version_chain_count = chains.len();
9995 info!("Temporal version chains generated: {} chains", chains.len());
9996 self.check_resources_with_log("post-temporal-attributes")?;
9997
9998 Ok(chains)
9999 }
10000
10001 fn phase_entity_relationships(
10011 &self,
10012 journal_entries: &[JournalEntry],
10013 document_flows: &DocumentFlowSnapshot,
10014 stats: &mut EnhancedGenerationStatistics,
10015 ) -> SynthResult<(
10016 Option<datasynth_core::models::EntityGraph>,
10017 Vec<datasynth_core::models::CrossProcessLink>,
10018 )> {
10019 use datasynth_generators::relationships::{
10020 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10021 TransactionSummary,
10022 };
10023
10024 let rs_enabled = self.config.relationship_strength.enabled;
10025 let cpl_enabled = self.config.cross_process_links.enabled
10026 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10027
10028 if !rs_enabled && !cpl_enabled {
10029 debug!(
10030 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10031 );
10032 return Ok((None, Vec::new()));
10033 }
10034
10035 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10036
10037 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10038 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10039
10040 let company_code = self
10041 .config
10042 .companies
10043 .first()
10044 .map(|c| c.code.as_str())
10045 .unwrap_or("1000");
10046
10047 let gen_config = EntityGraphConfig {
10049 enabled: rs_enabled,
10050 cross_process: datasynth_generators::relationships::CrossProcessConfig {
10051 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10052 enable_return_flows: false,
10053 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10054 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10055 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10057 1.0
10058 } else {
10059 0.30
10060 },
10061 ..Default::default()
10062 },
10063 strength_config: datasynth_generators::relationships::StrengthConfig {
10064 transaction_volume_weight: self
10065 .config
10066 .relationship_strength
10067 .calculation
10068 .transaction_volume_weight,
10069 transaction_count_weight: self
10070 .config
10071 .relationship_strength
10072 .calculation
10073 .transaction_count_weight,
10074 duration_weight: self
10075 .config
10076 .relationship_strength
10077 .calculation
10078 .relationship_duration_weight,
10079 recency_weight: self.config.relationship_strength.calculation.recency_weight,
10080 mutual_connections_weight: self
10081 .config
10082 .relationship_strength
10083 .calculation
10084 .mutual_connections_weight,
10085 recency_half_life_days: self
10086 .config
10087 .relationship_strength
10088 .calculation
10089 .recency_half_life_days,
10090 },
10091 ..Default::default()
10092 };
10093
10094 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10095
10096 let entity_graph = if rs_enabled {
10098 let vendor_summaries: Vec<EntitySummary> = self
10100 .master_data
10101 .vendors
10102 .iter()
10103 .map(|v| {
10104 EntitySummary::new(
10105 &v.vendor_id,
10106 &v.name,
10107 datasynth_core::models::GraphEntityType::Vendor,
10108 start_date,
10109 )
10110 })
10111 .collect();
10112
10113 let customer_summaries: Vec<EntitySummary> = self
10114 .master_data
10115 .customers
10116 .iter()
10117 .map(|c| {
10118 EntitySummary::new(
10119 &c.customer_id,
10120 &c.name,
10121 datasynth_core::models::GraphEntityType::Customer,
10122 start_date,
10123 )
10124 })
10125 .collect();
10126
10127 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10132 std::collections::HashMap::new();
10133
10134 for je in journal_entries {
10135 let cc = je.header.company_code.clone();
10136 let posting_date = je.header.posting_date;
10137 for line in &je.lines {
10138 if let Some(ref tp) = line.trading_partner {
10139 let amount = if line.debit_amount > line.credit_amount {
10140 line.debit_amount
10141 } else {
10142 line.credit_amount
10143 };
10144 let entry = txn_summaries
10145 .entry((cc.clone(), tp.clone()))
10146 .or_insert_with(|| TransactionSummary {
10147 total_volume: rust_decimal::Decimal::ZERO,
10148 transaction_count: 0,
10149 first_transaction_date: posting_date,
10150 last_transaction_date: posting_date,
10151 related_entities: std::collections::HashSet::new(),
10152 });
10153 entry.total_volume += amount;
10154 entry.transaction_count += 1;
10155 if posting_date < entry.first_transaction_date {
10156 entry.first_transaction_date = posting_date;
10157 }
10158 if posting_date > entry.last_transaction_date {
10159 entry.last_transaction_date = posting_date;
10160 }
10161 entry.related_entities.insert(cc.clone());
10162 }
10163 }
10164 }
10165
10166 for chain in &document_flows.p2p_chains {
10169 let cc = chain.purchase_order.header.company_code.clone();
10170 let vendor_id = chain.purchase_order.vendor_id.clone();
10171 let po_date = chain.purchase_order.header.document_date;
10172 let amount = chain.purchase_order.total_net_amount;
10173
10174 let entry = txn_summaries
10175 .entry((cc.clone(), vendor_id))
10176 .or_insert_with(|| TransactionSummary {
10177 total_volume: rust_decimal::Decimal::ZERO,
10178 transaction_count: 0,
10179 first_transaction_date: po_date,
10180 last_transaction_date: po_date,
10181 related_entities: std::collections::HashSet::new(),
10182 });
10183 entry.total_volume += amount;
10184 entry.transaction_count += 1;
10185 if po_date < entry.first_transaction_date {
10186 entry.first_transaction_date = po_date;
10187 }
10188 if po_date > entry.last_transaction_date {
10189 entry.last_transaction_date = po_date;
10190 }
10191 entry.related_entities.insert(cc);
10192 }
10193
10194 for chain in &document_flows.o2c_chains {
10196 let cc = chain.sales_order.header.company_code.clone();
10197 let customer_id = chain.sales_order.customer_id.clone();
10198 let so_date = chain.sales_order.header.document_date;
10199 let amount = chain.sales_order.total_net_amount;
10200
10201 let entry = txn_summaries
10202 .entry((cc.clone(), customer_id))
10203 .or_insert_with(|| TransactionSummary {
10204 total_volume: rust_decimal::Decimal::ZERO,
10205 transaction_count: 0,
10206 first_transaction_date: so_date,
10207 last_transaction_date: so_date,
10208 related_entities: std::collections::HashSet::new(),
10209 });
10210 entry.total_volume += amount;
10211 entry.transaction_count += 1;
10212 if so_date < entry.first_transaction_date {
10213 entry.first_transaction_date = so_date;
10214 }
10215 if so_date > entry.last_transaction_date {
10216 entry.last_transaction_date = so_date;
10217 }
10218 entry.related_entities.insert(cc);
10219 }
10220
10221 let as_of_date = journal_entries
10222 .last()
10223 .map(|je| je.header.posting_date)
10224 .unwrap_or(start_date);
10225
10226 let graph = gen.generate_entity_graph(
10227 company_code,
10228 as_of_date,
10229 &vendor_summaries,
10230 &customer_summaries,
10231 &txn_summaries,
10232 );
10233
10234 info!(
10235 "Entity relationship graph: {} nodes, {} edges",
10236 graph.nodes.len(),
10237 graph.edges.len()
10238 );
10239 stats.entity_relationship_node_count = graph.nodes.len();
10240 stats.entity_relationship_edge_count = graph.edges.len();
10241 Some(graph)
10242 } else {
10243 None
10244 };
10245
10246 let cross_process_links = if cpl_enabled {
10248 let gr_refs: Vec<GoodsReceiptRef> = document_flows
10250 .p2p_chains
10251 .iter()
10252 .flat_map(|chain| {
10253 let vendor_id = chain.purchase_order.vendor_id.clone();
10254 let cc = chain.purchase_order.header.company_code.clone();
10255 chain.goods_receipts.iter().flat_map(move |gr| {
10256 gr.items.iter().filter_map({
10257 let doc_id = gr.header.document_id.clone();
10258 let v_id = vendor_id.clone();
10259 let company = cc.clone();
10260 let receipt_date = gr.header.document_date;
10261 move |item| {
10262 item.base
10263 .material_id
10264 .as_ref()
10265 .map(|mat_id| GoodsReceiptRef {
10266 document_id: doc_id.clone(),
10267 material_id: mat_id.clone(),
10268 quantity: item.base.quantity,
10269 receipt_date,
10270 vendor_id: v_id.clone(),
10271 company_code: company.clone(),
10272 })
10273 }
10274 })
10275 })
10276 })
10277 .collect();
10278
10279 let del_refs: Vec<DeliveryRef> = document_flows
10281 .o2c_chains
10282 .iter()
10283 .flat_map(|chain| {
10284 let customer_id = chain.sales_order.customer_id.clone();
10285 let cc = chain.sales_order.header.company_code.clone();
10286 chain.deliveries.iter().flat_map(move |del| {
10287 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10288 del.items.iter().filter_map({
10289 let doc_id = del.header.document_id.clone();
10290 let c_id = customer_id.clone();
10291 let company = cc.clone();
10292 move |item| {
10293 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10294 document_id: doc_id.clone(),
10295 material_id: mat_id.clone(),
10296 quantity: item.base.quantity,
10297 delivery_date,
10298 customer_id: c_id.clone(),
10299 company_code: company.clone(),
10300 })
10301 }
10302 })
10303 })
10304 })
10305 .collect();
10306
10307 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10308 info!("Cross-process links generated: {} links", links.len());
10309 stats.cross_process_link_count = links.len();
10310 links
10311 } else {
10312 Vec::new()
10313 };
10314
10315 self.check_resources_with_log("post-entity-relationships")?;
10316 Ok((entity_graph, cross_process_links))
10317 }
10318
10319 fn phase_industry_data(
10321 &self,
10322 stats: &mut EnhancedGenerationStatistics,
10323 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10324 if !self.config.industry_specific.enabled {
10325 return None;
10326 }
10327 info!("Phase 29: Generating industry-specific data");
10328 let output = datasynth_generators::industry::factory::generate_industry_output(
10329 self.config.global.industry,
10330 );
10331 stats.industry_gl_account_count = output.gl_accounts.len();
10332 info!(
10333 "Industry data generated: {} GL accounts for {:?}",
10334 output.gl_accounts.len(),
10335 self.config.global.industry
10336 );
10337 Some(output)
10338 }
10339
10340 fn phase_opening_balances(
10342 &mut self,
10343 coa: &Arc<ChartOfAccounts>,
10344 stats: &mut EnhancedGenerationStatistics,
10345 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10346 if !self.config.balance.generate_opening_balances {
10347 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10348 return Ok(Vec::new());
10349 }
10350 info!("Phase 3b: Generating Opening Balances");
10351
10352 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10353 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10354 let fiscal_year = start_date.year();
10355
10356 if let Some(ctx) = &self.shard_context {
10367 if !ctx.opening_balances.is_empty() {
10368 debug!(
10369 "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10370 ctx.opening_balances.len()
10371 );
10372 let mut results = Vec::new();
10373 for company in &self.config.companies {
10374 let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10375 .opening_balances
10376 .iter()
10377 .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10378 .collect();
10379 let total_assets = ctx
10380 .opening_balances
10381 .iter()
10382 .filter(|ob| {
10383 matches!(
10384 ob.account_type,
10385 AccountType::Asset | AccountType::ContraAsset
10386 )
10387 })
10388 .map(|ob| ob.net_balance())
10389 .sum::<rust_decimal::Decimal>();
10390 let total_liabilities = ctx
10391 .opening_balances
10392 .iter()
10393 .filter(|ob| {
10394 matches!(
10395 ob.account_type,
10396 AccountType::Liability | AccountType::ContraLiability
10397 )
10398 })
10399 .map(|ob| ob.net_balance())
10400 .sum::<rust_decimal::Decimal>();
10401 let total_equity = ctx
10402 .opening_balances
10403 .iter()
10404 .filter(|ob| {
10405 matches!(
10406 ob.account_type,
10407 AccountType::Equity | AccountType::ContraEquity
10408 )
10409 })
10410 .map(|ob| ob.net_balance())
10411 .sum::<rust_decimal::Decimal>();
10412 let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10413 < rust_decimal::Decimal::ONE;
10414 results.push(GeneratedOpeningBalance {
10415 company_code: company.code.clone(),
10416 as_of_date: start_date,
10417 balances,
10418 total_assets,
10419 total_liabilities,
10420 total_equity,
10421 is_balanced,
10422 calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10423 current_ratio: None,
10424 quick_ratio: None,
10425 debt_to_equity: None,
10426 working_capital: rust_decimal::Decimal::ZERO,
10427 },
10428 });
10429 }
10430 stats.opening_balance_count = results.len();
10431 info!(
10432 "Phase 3b: opening-balance carryover applied ({} companies)",
10433 results.len()
10434 );
10435 self.check_resources_with_log("post-opening-balances")?;
10436 return Ok(results);
10437 }
10438 }
10439
10440 let industry = match self.config.global.industry {
10441 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10442 IndustrySector::Retail => IndustryType::Retail,
10443 IndustrySector::FinancialServices => IndustryType::Financial,
10444 IndustrySector::Healthcare => IndustryType::Healthcare,
10445 IndustrySector::Technology => IndustryType::Technology,
10446 _ => IndustryType::Manufacturing,
10447 };
10448
10449 let config = datasynth_generators::OpeningBalanceConfig {
10450 industry,
10451 ..Default::default()
10452 };
10453 let mut gen =
10454 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10455
10456 let mut results = Vec::new();
10457 for company in &self.config.companies {
10458 let spec = OpeningBalanceSpec::new(
10459 company.code.clone(),
10460 start_date,
10461 fiscal_year,
10462 company.currency.clone(),
10463 rust_decimal::Decimal::new(10_000_000, 0),
10464 industry,
10465 );
10466 let ob = gen.generate(&spec, coa, start_date, &company.code);
10467 results.push(ob);
10468 }
10469
10470 stats.opening_balance_count = results.len();
10471 info!("Opening balances generated: {} companies", results.len());
10472 self.check_resources_with_log("post-opening-balances")?;
10473
10474 Ok(results)
10475 }
10476
10477 fn phase_subledger_reconciliation(
10479 &mut self,
10480 subledger: &SubledgerSnapshot,
10481 entries: &[JournalEntry],
10482 stats: &mut EnhancedGenerationStatistics,
10483 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10484 if !self.config.balance.reconcile_subledgers {
10485 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10486 return Ok(Vec::new());
10487 }
10488 info!("Phase 9b: Reconciling GL to subledger balances");
10489
10490 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10491 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10492 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10493
10494 let tracker_config = BalanceTrackerConfig {
10496 validate_on_each_entry: false,
10497 track_history: false,
10498 fail_on_validation_error: false,
10499 ..Default::default()
10500 };
10501 let recon_currency = self
10502 .config
10503 .companies
10504 .first()
10505 .map(|c| c.currency.clone())
10506 .unwrap_or_else(|| "USD".to_string());
10507 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10508 let validation_errors = tracker.apply_entries(entries);
10509 if !validation_errors.is_empty() {
10510 warn!(
10511 error_count = validation_errors.len(),
10512 "Balance tracker encountered validation errors during subledger reconciliation"
10513 );
10514 for err in &validation_errors {
10515 debug!("Balance validation error: {:?}", err);
10516 }
10517 }
10518
10519 let mut engine = datasynth_generators::ReconciliationEngine::new(
10520 datasynth_generators::ReconciliationConfig::default(),
10521 );
10522
10523 let mut results = Vec::new();
10524 let company_code = self
10525 .config
10526 .companies
10527 .first()
10528 .map(|c| c.code.as_str())
10529 .unwrap_or("1000");
10530
10531 if !subledger.ar_invoices.is_empty() {
10533 let gl_balance = tracker
10534 .get_account_balance(
10535 company_code,
10536 datasynth_core::accounts::control_accounts::AR_CONTROL,
10537 )
10538 .map(|b| b.closing_balance)
10539 .unwrap_or_default();
10540 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10541 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10542 }
10543
10544 if !subledger.ap_invoices.is_empty() {
10546 let gl_balance = tracker
10547 .get_account_balance(
10548 company_code,
10549 datasynth_core::accounts::control_accounts::AP_CONTROL,
10550 )
10551 .map(|b| b.closing_balance)
10552 .unwrap_or_default();
10553 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10554 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10555 }
10556
10557 if !subledger.fa_records.is_empty() {
10559 let gl_asset_balance = tracker
10560 .get_account_balance(
10561 company_code,
10562 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10563 )
10564 .map(|b| b.closing_balance)
10565 .unwrap_or_default();
10566 let gl_accum_depr_balance = tracker
10567 .get_account_balance(
10568 company_code,
10569 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10570 )
10571 .map(|b| b.closing_balance)
10572 .unwrap_or_default();
10573 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10574 subledger.fa_records.iter().collect();
10575 let (asset_recon, depr_recon) = engine.reconcile_fa(
10576 company_code,
10577 end_date,
10578 gl_asset_balance,
10579 gl_accum_depr_balance,
10580 &fa_refs,
10581 );
10582 results.push(asset_recon);
10583 results.push(depr_recon);
10584 }
10585
10586 if !subledger.inventory_positions.is_empty() {
10588 let gl_balance = tracker
10589 .get_account_balance(
10590 company_code,
10591 datasynth_core::accounts::control_accounts::INVENTORY,
10592 )
10593 .map(|b| b.closing_balance)
10594 .unwrap_or_default();
10595 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10596 subledger.inventory_positions.iter().collect();
10597 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10598 }
10599
10600 stats.subledger_reconciliation_count = results.len();
10601 let passed = results.iter().filter(|r| r.is_balanced()).count();
10602 let failed = results.len() - passed;
10603 info!(
10604 "Subledger reconciliation: {} checks, {} passed, {} failed",
10605 results.len(),
10606 passed,
10607 failed
10608 );
10609 self.check_resources_with_log("post-subledger-reconciliation")?;
10610
10611 Ok(results)
10612 }
10613
10614 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10616 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10617
10618 let coa_framework = self.resolve_coa_framework();
10619
10620 let mut gen = ChartOfAccountsGenerator::new(
10621 self.config.chart_of_accounts.complexity,
10622 self.config.global.industry,
10623 self.seed,
10624 )
10625 .with_coa_framework(coa_framework)
10626 .with_expand_industry_subaccounts(
10628 self.config.chart_of_accounts.expand_industry_subaccounts,
10629 );
10630
10631 let mut built = gen.generate();
10632 if self.config.accounting_standards.enabled {
10636 use datasynth_config::schema::AccountingFrameworkConfig;
10637 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10638 match f {
10639 AccountingFrameworkConfig::UsGaap => "us_gaap",
10640 AccountingFrameworkConfig::Ifrs => "ifrs",
10641 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10642 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10643 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10644 }
10645 .to_string()
10646 });
10647 }
10648 if let Some(ref cached) = self.cached_priors {
10652 if let Some(ref coa_prior) = cached.coa_semantic {
10653 use datasynth_generators::coa_generator::{
10654 remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10655 };
10656 let mut rng =
10659 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10660 let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10661 tracing::info!(
10662 target: "datasynth_runtime::coa",
10663 remapped,
10664 total = built.accounts.len(),
10665 "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10666 );
10667 let applied =
10670 ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10671 tracing::info!(
10672 target: "datasynth_runtime::coa",
10673 applied,
10674 total = built.accounts.len(),
10675 "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10676 );
10677 }
10678 if let Some(tx) = cached.text_taxonomy.as_ref() {
10684 use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
10685 use datasynth_generators::coa_generator::overlay_coa_taxonomy;
10686 let mut resolver = SyntheticExampleResolver;
10687 let mut rng =
10688 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
10689 overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
10690 tracing::info!(
10691 target: "datasynth_runtime::coa",
10692 total = built.accounts.len(),
10693 "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
10694 );
10695 }
10696 }
10697
10698 let coa = Arc::new(built);
10699 self.coa = Some(Arc::clone(&coa));
10700
10701 if let Some(pb) = pb {
10702 pb.finish_with_message("Chart of Accounts complete");
10703 }
10704
10705 Ok(coa)
10706 }
10707
10708 fn generate_master_data(&mut self) -> SynthResult<()> {
10710 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10711 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10712 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10713
10714 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
10716
10717 let pack = self.primary_pack().clone();
10719
10720 let vendors_per_company = self.phase_config.vendors_per_company;
10722 let customers_per_company = self.phase_config.customers_per_company;
10723 let materials_per_company = self.phase_config.materials_per_company;
10724 let assets_per_company = self.phase_config.assets_per_company;
10725 let coa_framework = self.resolve_coa_framework();
10726
10727 let per_company_results: Vec<_> = self
10730 .config
10731 .companies
10732 .par_iter()
10733 .enumerate()
10734 .map(|(i, company)| {
10735 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10736 let pack = pack.clone();
10737
10738 let mut vendor_gen = VendorGenerator::new(company_seed);
10740 vendor_gen.set_country_pack(pack.clone());
10741 vendor_gen.set_coa_framework(coa_framework);
10742 vendor_gen.set_counter_offset(i * vendors_per_company);
10743 vendor_gen.set_template_provider(self.template_provider.clone());
10746 if self.config.vendor_network.enabled {
10748 let vn = &self.config.vendor_network;
10749 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10750 enabled: true,
10751 depth: vn.depth,
10752 tier1_count: datasynth_generators::TierCountConfig::new(
10753 vn.tier1.min,
10754 vn.tier1.max,
10755 ),
10756 tier2_per_parent: datasynth_generators::TierCountConfig::new(
10757 vn.tier2_per_parent.min,
10758 vn.tier2_per_parent.max,
10759 ),
10760 tier3_per_parent: datasynth_generators::TierCountConfig::new(
10761 vn.tier3_per_parent.min,
10762 vn.tier3_per_parent.max,
10763 ),
10764 cluster_distribution: datasynth_generators::ClusterDistribution {
10765 reliable_strategic: vn.clusters.reliable_strategic,
10766 standard_operational: vn.clusters.standard_operational,
10767 transactional: vn.clusters.transactional,
10768 problematic: vn.clusters.problematic,
10769 },
10770 concentration_limits: datasynth_generators::ConcentrationLimits {
10771 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10772 max_top5: vn.dependencies.top_5_concentration,
10773 },
10774 ..datasynth_generators::VendorNetworkConfig::default()
10775 });
10776 }
10777 let vendor_pool =
10778 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10779
10780 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10782 customer_gen.set_country_pack(pack.clone());
10783 customer_gen.set_coa_framework(coa_framework);
10784 customer_gen.set_counter_offset(i * customers_per_company);
10785 customer_gen.set_template_provider(self.template_provider.clone());
10787 if self.config.customer_segmentation.enabled {
10789 let cs = &self.config.customer_segmentation;
10790 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10791 enabled: true,
10792 segment_distribution: datasynth_generators::SegmentDistribution {
10793 enterprise: cs.value_segments.enterprise.customer_share,
10794 mid_market: cs.value_segments.mid_market.customer_share,
10795 smb: cs.value_segments.smb.customer_share,
10796 consumer: cs.value_segments.consumer.customer_share,
10797 },
10798 referral_config: datasynth_generators::ReferralConfig {
10799 enabled: cs.networks.referrals.enabled,
10800 referral_rate: cs.networks.referrals.referral_rate,
10801 ..Default::default()
10802 },
10803 hierarchy_config: datasynth_generators::HierarchyConfig {
10804 enabled: cs.networks.corporate_hierarchies.enabled,
10805 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10806 ..Default::default()
10807 },
10808 ..Default::default()
10809 };
10810 customer_gen.set_segmentation_config(seg_cfg);
10811 }
10812 let customer_pool = customer_gen.generate_customer_pool(
10813 customers_per_company,
10814 &company.code,
10815 start_date,
10816 );
10817
10818 let mut material_gen = MaterialGenerator::new(company_seed + 200);
10820 material_gen.set_country_pack(pack.clone());
10821 material_gen.set_counter_offset(i * materials_per_company);
10822 material_gen.set_template_provider(self.template_provider.clone());
10824 let material_pool = material_gen.generate_material_pool(
10825 materials_per_company,
10826 &company.code,
10827 start_date,
10828 );
10829
10830 let mut asset_gen = AssetGenerator::new(company_seed + 300);
10832 asset_gen.set_template_provider(self.template_provider.clone());
10834 let asset_pool = asset_gen.generate_asset_pool(
10835 assets_per_company,
10836 &company.code,
10837 (start_date, end_date),
10838 );
10839
10840 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10842 employee_gen.set_country_pack(pack);
10843 employee_gen.set_template_provider(self.template_provider.clone());
10845 let employee_pool =
10846 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10847
10848 let employee_change_history =
10850 employee_gen.generate_all_change_history(&employee_pool, end_date);
10851
10852 let employee_ids: Vec<String> = employee_pool
10854 .employees
10855 .iter()
10856 .map(|e| e.employee_id.clone())
10857 .collect();
10858 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10859 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10860
10861 let mut pc_gen =
10864 datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10865 let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10866
10867 (
10868 vendor_pool.vendors,
10869 customer_pool.customers,
10870 material_pool.materials,
10871 asset_pool.assets,
10872 employee_pool.employees,
10873 employee_change_history,
10874 cost_centers,
10875 profit_centers,
10876 )
10877 })
10878 .collect();
10879
10880 for (
10882 vendors,
10883 customers,
10884 materials,
10885 assets,
10886 employees,
10887 change_history,
10888 cost_centers,
10889 profit_centers,
10890 ) in per_company_results
10891 {
10892 self.master_data.vendors.extend(vendors);
10893 self.master_data.customers.extend(customers);
10894 self.master_data.materials.extend(materials);
10895 self.master_data.assets.extend(assets);
10896 self.master_data.employees.extend(employees);
10897 self.master_data.cost_centers.extend(cost_centers);
10898 self.master_data.profit_centers.extend(profit_centers);
10899 self.master_data
10900 .employee_change_history
10901 .extend(change_history);
10902 }
10903
10904 {
10908 use datasynth_core::models::IndustrySector;
10909 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10910 let industry = match self.config.global.industry {
10911 IndustrySector::Manufacturing => "manufacturing",
10912 IndustrySector::Retail => "retail",
10913 IndustrySector::FinancialServices => "financial_services",
10914 IndustrySector::Technology => "technology",
10915 IndustrySector::Healthcare => "healthcare",
10916 _ => "other",
10917 };
10918 for (i, company) in self.config.companies.iter().enumerate() {
10919 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10920 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10921 let profile = profile_gen.generate(&company.code, industry);
10922 self.master_data.organizational_profiles.push(profile);
10923 }
10924 }
10925
10926 if let Some(pb) = &pb {
10927 pb.inc(total);
10928 }
10929 if let Some(pb) = pb {
10930 pb.finish_with_message("Master data generation complete");
10931 }
10932
10933 Ok(())
10934 }
10935
10936 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10938 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10939 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10940
10941 let months = (self.config.global.period_months as usize).max(1);
10944 let p2p_count = self
10945 .phase_config
10946 .p2p_chains
10947 .min(self.master_data.vendors.len() * 2 * months);
10948 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10949
10950 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10952 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10953 p2p_gen.set_country_pack(self.primary_pack().clone());
10954 if let Some(ctx) = &self.temporal_context {
10958 p2p_gen.set_temporal_context(Arc::clone(ctx));
10959 }
10960
10961 for i in 0..p2p_count {
10962 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10963 let materials: Vec<&Material> = self
10964 .master_data
10965 .materials
10966 .iter()
10967 .skip(i % self.master_data.materials.len().max(1))
10968 .take(2.min(self.master_data.materials.len()))
10969 .collect();
10970
10971 if materials.is_empty() {
10972 continue;
10973 }
10974
10975 let company = &self.config.companies[i % self.config.companies.len()];
10976 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10977 let fiscal_period = po_date.month() as u8;
10978 let created_by = if self.master_data.employees.is_empty() {
10979 "SYSTEM"
10980 } else {
10981 self.master_data.employees[i % self.master_data.employees.len()]
10982 .user_id
10983 .as_str()
10984 };
10985
10986 let chain = p2p_gen.generate_chain(
10987 &company.code,
10988 vendor,
10989 &materials,
10990 po_date,
10991 start_date.year() as u16,
10992 fiscal_period,
10993 created_by,
10994 );
10995
10996 flows.purchase_orders.push(chain.purchase_order.clone());
10998 flows.goods_receipts.extend(chain.goods_receipts.clone());
10999 if let Some(vi) = &chain.vendor_invoice {
11000 flows.vendor_invoices.push(vi.clone());
11001 }
11002 if let Some(payment) = &chain.payment {
11003 flows.payments.push(payment.clone());
11004 }
11005 for remainder in &chain.remainder_payments {
11006 flows.payments.push(remainder.clone());
11007 }
11008 flows.p2p_chains.push(chain);
11009
11010 if let Some(pb) = &pb {
11011 pb.inc(1);
11012 }
11013 }
11014
11015 if let Some(pb) = pb {
11016 pb.finish_with_message("P2P document flows complete");
11017 }
11018
11019 let o2c_count = self
11022 .phase_config
11023 .o2c_chains
11024 .min(self.master_data.customers.len() * 2 * months);
11025 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11026
11027 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11029 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11030 o2c_gen.set_country_pack(self.primary_pack().clone());
11031 if let Some(ctx) = &self.temporal_context {
11033 o2c_gen.set_temporal_context(Arc::clone(ctx));
11034 }
11035
11036 for i in 0..o2c_count {
11037 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11038 let materials: Vec<&Material> = self
11039 .master_data
11040 .materials
11041 .iter()
11042 .skip(i % self.master_data.materials.len().max(1))
11043 .take(2.min(self.master_data.materials.len()))
11044 .collect();
11045
11046 if materials.is_empty() {
11047 continue;
11048 }
11049
11050 let company = &self.config.companies[i % self.config.companies.len()];
11051 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11052 let fiscal_period = so_date.month() as u8;
11053 let created_by = if self.master_data.employees.is_empty() {
11054 "SYSTEM"
11055 } else {
11056 self.master_data.employees[i % self.master_data.employees.len()]
11057 .user_id
11058 .as_str()
11059 };
11060
11061 let chain = o2c_gen.generate_chain(
11062 &company.code,
11063 customer,
11064 &materials,
11065 so_date,
11066 start_date.year() as u16,
11067 fiscal_period,
11068 created_by,
11069 );
11070
11071 flows.sales_orders.push(chain.sales_order.clone());
11073 flows.deliveries.extend(chain.deliveries.clone());
11074 if let Some(ci) = &chain.customer_invoice {
11075 flows.customer_invoices.push(ci.clone());
11076 }
11077 if let Some(receipt) = &chain.customer_receipt {
11078 flows.payments.push(receipt.clone());
11079 }
11080 for receipt in &chain.remainder_receipts {
11082 flows.payments.push(receipt.clone());
11083 }
11084 flows.o2c_chains.push(chain);
11085
11086 if let Some(pb) = &pb {
11087 pb.inc(1);
11088 }
11089 }
11090
11091 if let Some(pb) = pb {
11092 pb.finish_with_message("O2C document flows complete");
11093 }
11094
11095 {
11099 let mut refs = Vec::new();
11100 for doc in &flows.purchase_orders {
11101 refs.extend(doc.header.document_references.iter().cloned());
11102 }
11103 for doc in &flows.goods_receipts {
11104 refs.extend(doc.header.document_references.iter().cloned());
11105 }
11106 for doc in &flows.vendor_invoices {
11107 refs.extend(doc.header.document_references.iter().cloned());
11108 }
11109 for doc in &flows.sales_orders {
11110 refs.extend(doc.header.document_references.iter().cloned());
11111 }
11112 for doc in &flows.deliveries {
11113 refs.extend(doc.header.document_references.iter().cloned());
11114 }
11115 for doc in &flows.customer_invoices {
11116 refs.extend(doc.header.document_references.iter().cloned());
11117 }
11118 for doc in &flows.payments {
11119 refs.extend(doc.header.document_references.iter().cloned());
11120 }
11121 debug!(
11122 "Collected {} document cross-references from document headers",
11123 refs.len()
11124 );
11125 flows.document_references = refs;
11126 }
11127
11128 Ok(())
11129 }
11130
11131 fn generate_journal_entries(
11133 &mut self,
11134 coa: &Arc<ChartOfAccounts>,
11135 ) -> SynthResult<Vec<JournalEntry>> {
11136 use datasynth_core::traits::ParallelGenerator;
11137
11138 let total = self.calculate_total_transactions();
11139 let pb = self.create_progress_bar(total, "Generating Journal Entries");
11140
11141 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11142 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11143 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11144
11145 let company_codes: Vec<String> = self
11146 .config
11147 .companies
11148 .iter()
11149 .map(|c| c.code.clone())
11150 .collect();
11151
11152 let mut generator = JournalEntryGenerator::new_with_params(
11153 self.config.transactions.clone(),
11154 Arc::clone(coa),
11155 company_codes,
11156 start_date,
11157 end_date,
11158 self.seed,
11159 );
11160 let bp = &self.config.business_processes;
11163 generator.set_business_process_weights(
11164 bp.o2c_weight,
11165 bp.p2p_weight,
11166 bp.r2r_weight,
11167 bp.h2r_weight,
11168 bp.a2r_weight,
11169 );
11170 generator
11175 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11176 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11177
11178 if let Some(profile) = &self.config.distributions.industry_profile {
11183 if let Some(priors_cfg) = profile.priors() {
11184 if priors_cfg.enabled {
11185 use datasynth_config::schema::PriorsSource;
11186 use datasynth_generators::priors_loader::LoadedPriors;
11187
11188 let mut priors_rng =
11189 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11190 let period_days = i64::from(self.config.global.period_months) * 30;
11191 let industry_slug = profile.profile_type().slug();
11192
11193 let loaded = match priors_cfg.source {
11194 PriorsSource::Bundled => {
11195 LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11196 .map_err(|e| {
11197 SynthError::config(format!(
11198 "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11199 ))
11200 })?
11201 }
11202 PriorsSource::File => {
11203 let path = priors_cfg.path.as_ref().ok_or_else(|| {
11204 SynthError::config(
11205 "SP3: industry_profile.priors.path required when source = file"
11206 .to_string(),
11207 )
11208 })?;
11209 LoadedPriors::load_from_path(
11210 path,
11211 &mut priors_rng,
11212 period_days,
11213 Some(industry_slug),
11214 )
11215 .map_err(|e| {
11216 SynthError::config(format!(
11217 "SP3: failed to load priors from '{}': {e}",
11218 path.display()
11219 ))
11220 })?
11221 }
11222 };
11223
11224 let loaded = std::sync::Arc::new(loaded);
11227 self.cached_priors = Some(loaded.clone());
11228 generator.loaded_priors = Some((*loaded).clone());
11229
11230 if priors_cfg.velocity_calibration {
11235 use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11236 let mut targets = std::collections::HashMap::new();
11237 targets.insert("R7".to_string(), 0.10);
11238 targets.insert("R9".to_string(), 0.10);
11239 let calibrator = VelocityCalibrator::new(targets, 10_000);
11240 generator.velocity_calibrator = Some(calibrator);
11241 }
11242 }
11243 }
11244 }
11245
11246 let generator = generator;
11247
11248 let je_pack = self.primary_pack();
11252
11253 let cc_pool: Vec<String> = self
11260 .master_data
11261 .cost_centers
11262 .iter()
11263 .map(|c| c.id.clone())
11264 .collect();
11265 let pc_pool: Vec<String> = self
11266 .master_data
11267 .profit_centers
11268 .iter()
11269 .map(|p| p.id.clone())
11270 .collect();
11271
11272 let user_pool_from_employees =
11278 datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11279
11280 let mut generator = generator
11281 .with_master_data(
11282 &self.master_data.vendors,
11283 &self.master_data.customers,
11284 &self.master_data.materials,
11285 )
11286 .with_cost_center_pool(cc_pool)
11287 .with_profit_center_pool(pc_pool)
11288 .with_country_pack_names(je_pack)
11289 .with_user_pool(user_pool_from_employees)
11290 .with_country_pack_temporal(
11291 self.config.temporal_patterns.clone(),
11292 self.seed + 200,
11293 je_pack,
11294 )
11295 .with_persona_errors(true)
11296 .with_fraud_config(self.config.fraud.clone());
11297
11298 let temporal_enabled = self.config.temporal.enabled;
11303 let regimes_enabled = self.config.distributions.regime_changes.enabled;
11304 if temporal_enabled || regimes_enabled {
11305 let mut drift_config = if temporal_enabled {
11306 self.config.temporal.to_core_config()
11307 } else {
11308 datasynth_core::distributions::DriftConfig::default()
11311 };
11312 if regimes_enabled {
11313 self.config
11314 .distributions
11315 .regime_changes
11316 .apply_to(&mut drift_config, start_date);
11317 }
11318 generator = generator.with_drift_config(drift_config, self.seed + 100);
11319 }
11320
11321 self.check_memory_limit()?;
11323
11324 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11326
11327 let entries = if total >= 10_000 && num_threads > 1 {
11331 let sub_generators = generator.split(num_threads);
11334 let entries_per_thread = total as usize / num_threads;
11335 let remainder = total as usize % num_threads;
11336
11337 let batches: Vec<Vec<JournalEntry>> = sub_generators
11338 .into_par_iter()
11339 .enumerate()
11340 .map(|(i, mut gen)| {
11341 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11342 gen.generate_batch(count)
11343 })
11344 .collect();
11345
11346 let entries = JournalEntryGenerator::merge_results(batches);
11348
11349 if let Some(pb) = &pb {
11350 pb.inc(total);
11351 }
11352 entries
11353 } else {
11354 let mut entries = Vec::with_capacity(total as usize);
11356 for _ in 0..total {
11357 let entry = generator.generate();
11358 entries.push(entry);
11359 if let Some(pb) = &pb {
11360 pb.inc(1);
11361 }
11362 }
11363 entries
11364 };
11365
11366 if let Some(pb) = pb {
11367 pb.finish_with_message("Journal entries complete");
11368 }
11369
11370 Ok(entries)
11371 }
11372
11373 fn generate_jes_from_document_flows(
11378 &mut self,
11379 flows: &DocumentFlowSnapshot,
11380 ) -> SynthResult<Vec<JournalEntry>> {
11381 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11382 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11383
11384 let je_config = match self.resolve_coa_framework() {
11385 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11386 CoAFramework::GermanSkr04 => {
11387 let fa = datasynth_core::FrameworkAccounts::german_gaap();
11388 DocumentFlowJeConfig::from(&fa)
11389 }
11390 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11391 };
11392
11393 let populate_fec = je_config.populate_fec_fields;
11394 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11395
11396 if let Some(ref priors) = self.cached_priors {
11399 generator.set_loaded_priors(priors.clone());
11400 }
11401
11402 let cc_pool: Vec<String> = self
11408 .master_data
11409 .cost_centers
11410 .iter()
11411 .map(|c| c.id.clone())
11412 .collect();
11413 let pc_pool: Vec<String> = self
11414 .master_data
11415 .profit_centers
11416 .iter()
11417 .map(|p| p.id.clone())
11418 .collect();
11419 if !cc_pool.is_empty() {
11420 generator.set_cost_center_pool(cc_pool);
11421 }
11422 if !pc_pool.is_empty() {
11423 generator.set_profit_center_pool(pc_pool);
11424 }
11425
11426 if populate_fec {
11430 let mut aux_lookup = std::collections::HashMap::new();
11431 for vendor in &self.master_data.vendors {
11432 if let Some(ref aux) = vendor.auxiliary_gl_account {
11433 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11434 }
11435 }
11436 for customer in &self.master_data.customers {
11437 if let Some(ref aux) = customer.auxiliary_gl_account {
11438 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11439 }
11440 }
11441 if !aux_lookup.is_empty() {
11442 generator.set_auxiliary_account_lookup(aux_lookup);
11443 }
11444 }
11445
11446 let mut entries = Vec::new();
11447
11448 for chain in &flows.p2p_chains {
11450 let chain_entries = generator.generate_from_p2p_chain(chain);
11451 entries.extend(chain_entries);
11452 if let Some(pb) = &pb {
11453 pb.inc(1);
11454 }
11455 }
11456
11457 for chain in &flows.o2c_chains {
11459 let chain_entries = generator.generate_from_o2c_chain(chain);
11460 entries.extend(chain_entries);
11461 if let Some(pb) = &pb {
11462 pb.inc(1);
11463 }
11464 }
11465
11466 if let Some(pb) = pb {
11467 pb.finish_with_message(format!(
11468 "Generated {} JEs from document flows",
11469 entries.len()
11470 ));
11471 }
11472
11473 Ok(entries)
11474 }
11475
11476 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11482 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11483
11484 let mut jes = Vec::with_capacity(payroll_runs.len());
11485
11486 for run in payroll_runs {
11487 let mut je = JournalEntry::new_simple(
11488 format!("JE-PAYROLL-{}", run.payroll_id),
11489 run.company_code.clone(),
11490 run.run_date,
11491 format!("Payroll {}", run.payroll_id),
11492 );
11493
11494 je.add_line(JournalEntryLine {
11496 line_number: 1,
11497 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11498 debit_amount: run.total_gross,
11499 reference: Some(run.payroll_id.clone()),
11500 text: Some(format!(
11501 "Payroll {} ({} employees)",
11502 run.payroll_id, run.employee_count
11503 )),
11504 ..Default::default()
11505 });
11506
11507 je.add_line(JournalEntryLine {
11509 line_number: 2,
11510 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11511 credit_amount: run.total_gross,
11512 reference: Some(run.payroll_id.clone()),
11513 ..Default::default()
11514 });
11515
11516 jes.push(je);
11517 }
11518
11519 jes
11520 }
11521
11522 fn link_document_flows_to_subledgers(
11527 &mut self,
11528 flows: &DocumentFlowSnapshot,
11529 ) -> SynthResult<SubledgerSnapshot> {
11530 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11531 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11532
11533 let vendor_names: std::collections::HashMap<String, String> = self
11535 .master_data
11536 .vendors
11537 .iter()
11538 .map(|v| (v.vendor_id.clone(), v.name.clone()))
11539 .collect();
11540 let customer_names: std::collections::HashMap<String, String> = self
11541 .master_data
11542 .customers
11543 .iter()
11544 .map(|c| (c.customer_id.clone(), c.name.clone()))
11545 .collect();
11546
11547 let mut linker = DocumentFlowLinker::new()
11548 .with_vendor_names(vendor_names)
11549 .with_customer_names(customer_names);
11550
11551 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11553 if let Some(pb) = &pb {
11554 pb.inc(flows.vendor_invoices.len() as u64);
11555 }
11556
11557 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11559 if let Some(pb) = &pb {
11560 pb.inc(flows.customer_invoices.len() as u64);
11561 }
11562
11563 if let Some(pb) = pb {
11564 pb.finish_with_message(format!(
11565 "Linked {} AP and {} AR invoices",
11566 ap_invoices.len(),
11567 ar_invoices.len()
11568 ));
11569 }
11570
11571 Ok(SubledgerSnapshot {
11572 ap_invoices,
11573 ar_invoices,
11574 fa_records: Vec::new(),
11575 inventory_positions: Vec::new(),
11576 inventory_movements: Vec::new(),
11577 ar_aging_reports: Vec::new(),
11579 ap_aging_reports: Vec::new(),
11580 depreciation_runs: Vec::new(),
11582 inventory_valuations: Vec::new(),
11583 dunning_runs: Vec::new(),
11585 dunning_letters: Vec::new(),
11586 })
11587 }
11588
11589 #[allow(clippy::too_many_arguments)]
11594 fn generate_ocpm_events(
11595 &mut self,
11596 flows: &DocumentFlowSnapshot,
11597 sourcing: &SourcingSnapshot,
11598 hr: &HrSnapshot,
11599 manufacturing: &ManufacturingSnapshot,
11600 banking: &BankingSnapshot,
11601 audit: &AuditSnapshot,
11602 financial_reporting: &FinancialReportingSnapshot,
11603 ) -> SynthResult<OcpmSnapshot> {
11604 let total_chains = flows.p2p_chains.len()
11605 + flows.o2c_chains.len()
11606 + sourcing.sourcing_projects.len()
11607 + hr.payroll_runs.len()
11608 + manufacturing.production_orders.len()
11609 + banking.customers.len()
11610 + audit.engagements.len()
11611 + financial_reporting.bank_reconciliations.len();
11612 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11613
11614 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11616 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11617
11618 let ocpm_config = OcpmGeneratorConfig {
11620 generate_p2p: true,
11621 generate_o2c: true,
11622 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11623 generate_h2r: !hr.payroll_runs.is_empty(),
11624 generate_mfg: !manufacturing.production_orders.is_empty(),
11625 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11626 generate_bank: !banking.customers.is_empty(),
11627 generate_audit: !audit.engagements.is_empty(),
11628 happy_path_rate: 0.75,
11629 exception_path_rate: 0.20,
11630 error_path_rate: 0.05,
11631 add_duration_variability: true,
11632 duration_std_dev_factor: 0.3,
11633 };
11634 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11635 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11636
11637 let available_users: Vec<String> = self
11639 .master_data
11640 .employees
11641 .iter()
11642 .take(20)
11643 .map(|e| e.user_id.clone())
11644 .collect();
11645
11646 let fallback_date =
11648 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11649 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11650 .unwrap_or(fallback_date);
11651 let base_midnight = base_date
11652 .and_hms_opt(0, 0, 0)
11653 .expect("midnight is always valid");
11654 let base_datetime =
11655 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11656
11657 let add_result = |event_log: &mut OcpmEventLog,
11659 result: datasynth_ocpm::CaseGenerationResult| {
11660 for event in result.events {
11661 event_log.add_event(event);
11662 }
11663 for object in result.objects {
11664 event_log.add_object(object);
11665 }
11666 for relationship in result.relationships {
11667 event_log.add_relationship(relationship);
11668 }
11669 for corr in result.correlation_events {
11670 event_log.add_correlation_event(corr);
11671 }
11672 event_log.add_case(result.case_trace);
11673 };
11674
11675 for chain in &flows.p2p_chains {
11677 let po = &chain.purchase_order;
11678 let documents = P2pDocuments::new(
11679 &po.header.document_id,
11680 &po.vendor_id,
11681 &po.header.company_code,
11682 po.total_net_amount,
11683 &po.header.currency,
11684 &ocpm_uuid_factory,
11685 )
11686 .with_goods_receipt(
11687 chain
11688 .goods_receipts
11689 .first()
11690 .map(|gr| gr.header.document_id.as_str())
11691 .unwrap_or(""),
11692 &ocpm_uuid_factory,
11693 )
11694 .with_invoice(
11695 chain
11696 .vendor_invoice
11697 .as_ref()
11698 .map(|vi| vi.header.document_id.as_str())
11699 .unwrap_or(""),
11700 &ocpm_uuid_factory,
11701 )
11702 .with_payment(
11703 chain
11704 .payment
11705 .as_ref()
11706 .map(|p| p.header.document_id.as_str())
11707 .unwrap_or(""),
11708 &ocpm_uuid_factory,
11709 );
11710
11711 let start_time =
11712 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11713 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11714 add_result(&mut event_log, result);
11715
11716 if let Some(pb) = &pb {
11717 pb.inc(1);
11718 }
11719 }
11720
11721 for chain in &flows.o2c_chains {
11723 let so = &chain.sales_order;
11724 let documents = O2cDocuments::new(
11725 &so.header.document_id,
11726 &so.customer_id,
11727 &so.header.company_code,
11728 so.total_net_amount,
11729 &so.header.currency,
11730 &ocpm_uuid_factory,
11731 )
11732 .with_delivery(
11733 chain
11734 .deliveries
11735 .first()
11736 .map(|d| d.header.document_id.as_str())
11737 .unwrap_or(""),
11738 &ocpm_uuid_factory,
11739 )
11740 .with_invoice(
11741 chain
11742 .customer_invoice
11743 .as_ref()
11744 .map(|ci| ci.header.document_id.as_str())
11745 .unwrap_or(""),
11746 &ocpm_uuid_factory,
11747 )
11748 .with_receipt(
11749 chain
11750 .customer_receipt
11751 .as_ref()
11752 .map(|r| r.header.document_id.as_str())
11753 .unwrap_or(""),
11754 &ocpm_uuid_factory,
11755 );
11756
11757 let start_time =
11758 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11759 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11760 add_result(&mut event_log, result);
11761
11762 if let Some(pb) = &pb {
11763 pb.inc(1);
11764 }
11765 }
11766
11767 for project in &sourcing.sourcing_projects {
11769 let vendor_id = sourcing
11771 .contracts
11772 .iter()
11773 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11774 .map(|c| c.vendor_id.clone())
11775 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11776 .or_else(|| {
11777 self.master_data
11778 .vendors
11779 .first()
11780 .map(|v| v.vendor_id.clone())
11781 })
11782 .unwrap_or_else(|| "V000".to_string());
11783 let mut docs = S2cDocuments::new(
11784 &project.project_id,
11785 &vendor_id,
11786 &project.company_code,
11787 project.estimated_annual_spend,
11788 &ocpm_uuid_factory,
11789 );
11790 if let Some(rfx) = sourcing
11792 .rfx_events
11793 .iter()
11794 .find(|r| r.sourcing_project_id == project.project_id)
11795 {
11796 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11797 if let Some(bid) = sourcing.bids.iter().find(|b| {
11799 b.rfx_id == rfx.rfx_id
11800 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11801 }) {
11802 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11803 }
11804 }
11805 if let Some(contract) = sourcing
11807 .contracts
11808 .iter()
11809 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11810 {
11811 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11812 }
11813 let start_time = base_datetime - chrono::Duration::days(90);
11814 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11815 add_result(&mut event_log, result);
11816
11817 if let Some(pb) = &pb {
11818 pb.inc(1);
11819 }
11820 }
11821
11822 for run in &hr.payroll_runs {
11824 let employee_id = hr
11826 .payroll_line_items
11827 .iter()
11828 .find(|li| li.payroll_id == run.payroll_id)
11829 .map(|li| li.employee_id.as_str())
11830 .unwrap_or("EMP000");
11831 let docs = H2rDocuments::new(
11832 &run.payroll_id,
11833 employee_id,
11834 &run.company_code,
11835 run.total_gross,
11836 &ocpm_uuid_factory,
11837 )
11838 .with_time_entries(
11839 hr.time_entries
11840 .iter()
11841 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11842 .take(5)
11843 .map(|t| t.entry_id.as_str())
11844 .collect(),
11845 );
11846 let start_time = base_datetime - chrono::Duration::days(30);
11847 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11848 add_result(&mut event_log, result);
11849
11850 if let Some(pb) = &pb {
11851 pb.inc(1);
11852 }
11853 }
11854
11855 for order in &manufacturing.production_orders {
11857 let mut docs = MfgDocuments::new(
11858 &order.order_id,
11859 &order.material_id,
11860 &order.company_code,
11861 order.planned_quantity,
11862 &ocpm_uuid_factory,
11863 )
11864 .with_operations(
11865 order
11866 .operations
11867 .iter()
11868 .map(|o| format!("OP-{:04}", o.operation_number))
11869 .collect::<Vec<_>>()
11870 .iter()
11871 .map(std::string::String::as_str)
11872 .collect(),
11873 );
11874 if let Some(insp) = manufacturing
11876 .quality_inspections
11877 .iter()
11878 .find(|i| i.reference_id == order.order_id)
11879 {
11880 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11881 }
11882 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11884 cc.items
11885 .iter()
11886 .any(|item| item.material_id == order.material_id)
11887 }) {
11888 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11889 }
11890 let start_time = base_datetime - chrono::Duration::days(60);
11891 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11892 add_result(&mut event_log, result);
11893
11894 if let Some(pb) = &pb {
11895 pb.inc(1);
11896 }
11897 }
11898
11899 for customer in &banking.customers {
11901 let customer_id_str = customer.customer_id.to_string();
11902 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11903 if let Some(account) = banking
11905 .accounts
11906 .iter()
11907 .find(|a| a.primary_owner_id == customer.customer_id)
11908 {
11909 let account_id_str = account.account_id.to_string();
11910 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11911 let txn_strs: Vec<String> = banking
11913 .transactions
11914 .iter()
11915 .filter(|t| t.account_id == account.account_id)
11916 .take(10)
11917 .map(|t| t.transaction_id.to_string())
11918 .collect();
11919 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11920 let txn_amounts: Vec<rust_decimal::Decimal> = banking
11921 .transactions
11922 .iter()
11923 .filter(|t| t.account_id == account.account_id)
11924 .take(10)
11925 .map(|t| t.amount)
11926 .collect();
11927 if !txn_ids.is_empty() {
11928 docs = docs.with_transactions(txn_ids, txn_amounts);
11929 }
11930 }
11931 let start_time = base_datetime - chrono::Duration::days(180);
11932 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11933 add_result(&mut event_log, result);
11934
11935 if let Some(pb) = &pb {
11936 pb.inc(1);
11937 }
11938 }
11939
11940 for engagement in &audit.engagements {
11942 let engagement_id_str = engagement.engagement_id.to_string();
11943 let docs = AuditDocuments::new(
11944 &engagement_id_str,
11945 &engagement.client_entity_id,
11946 &ocpm_uuid_factory,
11947 )
11948 .with_workpapers(
11949 audit
11950 .workpapers
11951 .iter()
11952 .filter(|w| w.engagement_id == engagement.engagement_id)
11953 .take(10)
11954 .map(|w| w.workpaper_id.to_string())
11955 .collect::<Vec<_>>()
11956 .iter()
11957 .map(std::string::String::as_str)
11958 .collect(),
11959 )
11960 .with_evidence(
11961 audit
11962 .evidence
11963 .iter()
11964 .filter(|e| e.engagement_id == engagement.engagement_id)
11965 .take(10)
11966 .map(|e| e.evidence_id.to_string())
11967 .collect::<Vec<_>>()
11968 .iter()
11969 .map(std::string::String::as_str)
11970 .collect(),
11971 )
11972 .with_risks(
11973 audit
11974 .risk_assessments
11975 .iter()
11976 .filter(|r| r.engagement_id == engagement.engagement_id)
11977 .take(5)
11978 .map(|r| r.risk_id.to_string())
11979 .collect::<Vec<_>>()
11980 .iter()
11981 .map(std::string::String::as_str)
11982 .collect(),
11983 )
11984 .with_findings(
11985 audit
11986 .findings
11987 .iter()
11988 .filter(|f| f.engagement_id == engagement.engagement_id)
11989 .take(5)
11990 .map(|f| f.finding_id.to_string())
11991 .collect::<Vec<_>>()
11992 .iter()
11993 .map(std::string::String::as_str)
11994 .collect(),
11995 )
11996 .with_judgments(
11997 audit
11998 .judgments
11999 .iter()
12000 .filter(|j| j.engagement_id == engagement.engagement_id)
12001 .take(5)
12002 .map(|j| j.judgment_id.to_string())
12003 .collect::<Vec<_>>()
12004 .iter()
12005 .map(std::string::String::as_str)
12006 .collect(),
12007 );
12008 let start_time = base_datetime - chrono::Duration::days(120);
12009 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12010 add_result(&mut event_log, result);
12011
12012 if let Some(pb) = &pb {
12013 pb.inc(1);
12014 }
12015 }
12016
12017 for recon in &financial_reporting.bank_reconciliations {
12019 let docs = BankReconDocuments::new(
12020 &recon.reconciliation_id,
12021 &recon.bank_account_id,
12022 &recon.company_code,
12023 recon.bank_ending_balance,
12024 &ocpm_uuid_factory,
12025 )
12026 .with_statement_lines(
12027 recon
12028 .statement_lines
12029 .iter()
12030 .take(20)
12031 .map(|l| l.line_id.as_str())
12032 .collect(),
12033 )
12034 .with_reconciling_items(
12035 recon
12036 .reconciling_items
12037 .iter()
12038 .take(10)
12039 .map(|i| i.item_id.as_str())
12040 .collect(),
12041 );
12042 let start_time = base_datetime - chrono::Duration::days(30);
12043 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12044 add_result(&mut event_log, result);
12045
12046 if let Some(pb) = &pb {
12047 pb.inc(1);
12048 }
12049 }
12050
12051 event_log.compute_variants();
12053
12054 let summary = event_log.summary();
12055
12056 if let Some(pb) = pb {
12057 pb.finish_with_message(format!(
12058 "Generated {} OCPM events, {} objects",
12059 summary.event_count, summary.object_count
12060 ));
12061 }
12062
12063 Ok(OcpmSnapshot {
12064 event_count: summary.event_count,
12065 object_count: summary.object_count,
12066 case_count: summary.case_count,
12067 event_log: Some(event_log),
12068 })
12069 }
12070
12071 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12073 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12074
12075 let total_rate = if self.config.anomaly_injection.enabled {
12078 self.config.anomaly_injection.rates.total_rate
12079 } else if self.config.fraud.enabled {
12080 self.config.fraud.fraud_rate
12081 } else {
12082 0.02
12083 };
12084
12085 let fraud_rate = if self.config.anomaly_injection.enabled {
12086 self.config.anomaly_injection.rates.fraud_rate
12087 } else {
12088 AnomalyRateConfig::default().fraud_rate
12089 };
12090
12091 let error_rate = if self.config.anomaly_injection.enabled {
12092 self.config.anomaly_injection.rates.error_rate
12093 } else {
12094 AnomalyRateConfig::default().error_rate
12095 };
12096
12097 let process_issue_rate = if self.config.anomaly_injection.enabled {
12098 self.config.anomaly_injection.rates.process_rate
12099 } else {
12100 AnomalyRateConfig::default().process_issue_rate
12101 };
12102
12103 let anomaly_config = AnomalyInjectorConfig {
12104 rates: AnomalyRateConfig {
12105 total_rate,
12106 fraud_rate,
12107 error_rate,
12108 process_issue_rate,
12109 ..Default::default()
12110 },
12111 seed: self.seed + 5000,
12112 ..Default::default()
12113 };
12114
12115 let mut injector = AnomalyInjector::new(anomaly_config);
12116 let result = injector.process_entries(entries);
12117
12118 let sota12_tagged: usize = {
12128 use datasynth_config::schema::{
12129 ConcentrationConfig, SourceConditionalRarityPassConfig,
12130 };
12131 use datasynth_generators::concentration::ConcentrationPipeline;
12132
12133 let mut effective: ConcentrationConfig = self.config.concentration.clone();
12136 if effective.source_conditional_rarity.is_none() {
12137 if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12138 effective.enabled = true;
12139 effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12140 rate,
12141 min_surprise: None,
12142 min_per_source_lines: None,
12143 });
12144 }
12145 }
12146
12147 if !effective.enabled {
12148 0
12149 } else {
12150 let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12151 SynthError::generation(format!(
12152 "ConcentrationPipeline construction failed: {e}"
12153 ))
12154 })?;
12155 if !pipeline.is_active() {
12156 0
12157 } else {
12158 const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12160 let stats =
12161 pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12162 stats
12163 .iter()
12164 .filter(|s| s.pass == "source_conditional_rarity")
12165 .map(|s| s.entries_modified)
12166 .sum()
12167 }
12168 }
12169 };
12170
12171 if let Some(pb) = &pb {
12172 pb.inc(entries.len() as u64);
12173 pb.finish_with_message("Anomaly injection complete");
12174 }
12175
12176 let mut by_type = HashMap::new();
12177 for label in &result.labels {
12178 *by_type
12179 .entry(format!("{:?}", label.anomaly_type))
12180 .or_insert(0) += 1;
12181 }
12182 if sota12_tagged > 0 {
12183 *by_type
12184 .entry("SourceConditionalRarity".to_string())
12185 .or_insert(0) += sota12_tagged;
12186 }
12187
12188 Ok(AnomalyLabels {
12189 labels: result.labels,
12190 summary: Some(result.summary),
12191 by_type,
12192 })
12193 }
12194
12195 fn validate_journal_entries(
12204 &mut self,
12205 entries: &[JournalEntry],
12206 ) -> SynthResult<BalanceValidationResult> {
12207 let clean_entries: Vec<&JournalEntry> = entries
12209 .iter()
12210 .filter(|e| {
12211 e.header
12212 .header_text
12213 .as_ref()
12214 .map(|t| !t.contains("[HUMAN_ERROR:"))
12215 .unwrap_or(true)
12216 })
12217 .collect();
12218
12219 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12220
12221 let config = BalanceTrackerConfig {
12223 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
12227 };
12228 let validation_currency = self
12229 .config
12230 .companies
12231 .first()
12232 .map(|c| c.currency.clone())
12233 .unwrap_or_else(|| "USD".to_string());
12234
12235 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12236
12237 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12239 let errors = tracker.apply_entries(&clean_refs);
12240
12241 if let Some(pb) = &pb {
12242 pb.inc(entries.len() as u64);
12243 }
12244
12245 let has_unbalanced = tracker
12248 .get_validation_errors()
12249 .iter()
12250 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12251
12252 let mut all_errors = errors;
12255 all_errors.extend(tracker.get_validation_errors().iter().cloned());
12256 let company_codes: Vec<String> = self
12257 .config
12258 .companies
12259 .iter()
12260 .map(|c| c.code.clone())
12261 .collect();
12262
12263 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12264 .map(|d| d + chrono::Months::new(self.config.global.period_months))
12265 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12266
12267 for company_code in &company_codes {
12268 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12269 all_errors.push(e);
12270 }
12271 }
12272
12273 let stats = tracker.get_statistics();
12275
12276 let is_balanced = all_errors.is_empty();
12278
12279 if let Some(pb) = pb {
12280 let msg = if is_balanced {
12281 "Balance validation passed"
12282 } else {
12283 "Balance validation completed with errors"
12284 };
12285 pb.finish_with_message(msg);
12286 }
12287
12288 Ok(BalanceValidationResult {
12289 validated: true,
12290 is_balanced,
12291 entries_processed: stats.entries_processed,
12292 total_debits: stats.total_debits,
12293 total_credits: stats.total_credits,
12294 accounts_tracked: stats.accounts_tracked,
12295 companies_tracked: stats.companies_tracked,
12296 validation_errors: all_errors,
12297 has_unbalanced_entries: has_unbalanced,
12298 })
12299 }
12300
12301 fn inject_data_quality(
12306 &mut self,
12307 entries: &mut [JournalEntry],
12308 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12309 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12310
12311 let config = if self.config.data_quality.enabled {
12314 let dq = &self.config.data_quality;
12315 let field_rates = dq.missing_values.field_rates.clone();
12319 let mut required_fields: std::collections::HashSet<String> =
12320 dq.missing_values.protected_fields.iter().cloned().collect();
12321 for f in [
12324 "document_id",
12325 "company_code",
12326 "posting_date",
12327 "fiscal_year",
12328 "fiscal_period",
12329 "gl_account",
12330 "line_number",
12331 "transaction_id",
12332 ] {
12333 required_fields.insert(f.to_string());
12334 }
12335 DataQualityConfig {
12336 enable_missing_values: dq.missing_values.enabled,
12337 missing_values: datasynth_generators::MissingValueConfig {
12338 global_rate: dq.effective_missing_rate(),
12339 field_rates,
12340 required_fields,
12341 ..Default::default()
12342 },
12343 enable_format_variations: dq.format_variations.enabled,
12344 format_variations: datasynth_generators::FormatVariationConfig {
12345 date_variation_rate: dq.format_variations.dates.rate,
12346 amount_variation_rate: dq.format_variations.amounts.rate,
12347 identifier_variation_rate: dq.format_variations.identifiers.rate,
12348 ..Default::default()
12349 },
12350 enable_duplicates: dq.duplicates.enabled,
12351 duplicates: datasynth_generators::DuplicateConfig {
12352 duplicate_rate: dq.effective_duplicate_rate(),
12353 ..Default::default()
12354 },
12355 enable_typos: dq.typos.enabled,
12356 typos: datasynth_generators::TypoConfig {
12357 char_error_rate: dq.effective_typo_rate(),
12358 ..Default::default()
12359 },
12360 enable_encoding_issues: dq.encoding_issues.enabled,
12361 encoding_issue_rate: dq.encoding_issues.rate,
12362 seed: self.seed.wrapping_add(77), track_statistics: true,
12364 }
12365 } else {
12366 DataQualityConfig::minimal()
12367 };
12368 let mut injector = DataQualityInjector::new(config);
12369
12370 injector.set_country_pack(self.primary_pack().clone());
12372
12373 let context = HashMap::new();
12375
12376 for entry in entries.iter_mut() {
12377 if let Some(text) = &entry.header.header_text {
12379 let processed = injector.process_text_field(
12380 "header_text",
12381 text,
12382 &entry.header.document_id.to_string(),
12383 &context,
12384 );
12385 match processed {
12386 Some(new_text) if new_text != *text => {
12387 entry.header.header_text = Some(new_text);
12388 }
12389 None => {
12390 entry.header.header_text = None; }
12392 _ => {}
12393 }
12394 }
12395
12396 if let Some(ref_text) = &entry.header.reference {
12398 let processed = injector.process_text_field(
12399 "reference",
12400 ref_text,
12401 &entry.header.document_id.to_string(),
12402 &context,
12403 );
12404 match processed {
12405 Some(new_text) if new_text != *ref_text => {
12406 entry.header.reference = Some(new_text);
12407 }
12408 None => {
12409 entry.header.reference = None;
12410 }
12411 _ => {}
12412 }
12413 }
12414
12415 let user_persona = entry.header.user_persona.clone();
12417 if let Some(processed) = injector.process_text_field(
12418 "user_persona",
12419 &user_persona,
12420 &entry.header.document_id.to_string(),
12421 &context,
12422 ) {
12423 if processed != user_persona {
12424 entry.header.user_persona = processed;
12425 }
12426 }
12427
12428 for line in &mut entry.lines {
12430 if let Some(ref text) = line.line_text {
12432 let processed = injector.process_text_field(
12433 "line_text",
12434 text,
12435 &entry.header.document_id.to_string(),
12436 &context,
12437 );
12438 match processed {
12439 Some(new_text) if new_text != *text => {
12440 line.line_text = Some(new_text);
12441 }
12442 None => {
12443 line.line_text = None;
12444 }
12445 _ => {}
12446 }
12447 }
12448
12449 if let Some(cc) = &line.cost_center {
12451 let processed = injector.process_text_field(
12452 "cost_center",
12453 cc,
12454 &entry.header.document_id.to_string(),
12455 &context,
12456 );
12457 match processed {
12458 Some(new_cc) if new_cc != *cc => {
12459 line.cost_center = Some(new_cc);
12460 }
12461 None => {
12462 line.cost_center = None;
12463 }
12464 _ => {}
12465 }
12466 }
12467
12468 macro_rules! process_opt_field {
12476 ($field_name:expr, $opt:expr) => {
12477 if let Some(val) = $opt.as_ref() {
12478 match injector.process_text_field(
12479 $field_name,
12480 val,
12481 &entry.header.document_id.to_string(),
12482 &context,
12483 ) {
12484 Some(new_val) if new_val != *val => {
12485 *$opt = Some(new_val);
12486 }
12487 None => {
12488 *$opt = None;
12489 }
12490 _ => {}
12491 }
12492 }
12493 };
12494 }
12495
12496 process_opt_field!("profit_center", &mut line.profit_center);
12497 process_opt_field!("assignment", &mut line.assignment);
12498 process_opt_field!("tax_code", &mut line.tax_code);
12499 process_opt_field!("account_description", &mut line.account_description);
12500 process_opt_field!(
12501 "auxiliary_account_number",
12502 &mut line.auxiliary_account_number
12503 );
12504 process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12505 process_opt_field!("lettrage", &mut line.lettrage);
12506 }
12507
12508 if let Some(pb) = &pb {
12509 pb.inc(1);
12510 }
12511 }
12512
12513 if let Some(pb) = pb {
12514 pb.finish_with_message("Data quality injection complete");
12515 }
12516
12517 let quality_issues = injector.issues().to_vec();
12518 Ok((injector.stats().clone(), quality_issues))
12519 }
12520
12521 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12532 let use_fsm = self
12534 .config
12535 .audit
12536 .fsm
12537 .as_ref()
12538 .map(|f| f.enabled)
12539 .unwrap_or(false);
12540
12541 if use_fsm {
12542 return self.generate_audit_data_with_fsm(entries);
12543 }
12544
12545 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12547 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12548 let fiscal_year = start_date.year() as u16;
12549 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12550
12551 let total_revenue: rust_decimal::Decimal = entries
12553 .iter()
12554 .flat_map(|e| e.lines.iter())
12555 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12556 .map(|l| l.credit_amount)
12557 .sum();
12558
12559 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12561
12562 let mut snapshot = AuditSnapshot::default();
12563
12564 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12566 engagement_gen.set_team_config(&self.config.audit.team);
12569
12570 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12571 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12575 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12576 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12577 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12578 finding_gen.set_template_provider(self.template_provider.clone());
12580 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12581 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12582 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12583 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12584 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12585 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12586 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12587
12588 let accounts: Vec<String> = self
12590 .coa
12591 .as_ref()
12592 .map(|coa| {
12593 coa.get_postable_accounts()
12594 .iter()
12595 .map(|acc| acc.account_code().to_string())
12596 .collect()
12597 })
12598 .unwrap_or_default();
12599
12600 for (i, company) in self.config.companies.iter().enumerate() {
12602 let company_revenue = total_revenue
12604 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12605
12606 let engagements_for_company =
12608 self.phase_config.audit_engagements / self.config.companies.len().max(1);
12609 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12610 1
12611 } else {
12612 0
12613 };
12614
12615 for _eng_idx in 0..(engagements_for_company + extra) {
12616 let eng_type =
12621 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12622
12623 let mut engagement = engagement_gen.generate_engagement(
12625 &company.code,
12626 &company.name,
12627 fiscal_year,
12628 period_end,
12629 company_revenue,
12630 Some(eng_type),
12631 );
12632
12633 if !self.master_data.employees.is_empty() {
12635 let emp_count = self.master_data.employees.len();
12636 let base = (i * 10 + _eng_idx) % emp_count;
12638 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12639 .employee_id
12640 .clone();
12641 engagement.engagement_manager_id = self.master_data.employees
12642 [(base + 1) % emp_count]
12643 .employee_id
12644 .clone();
12645 let real_team: Vec<String> = engagement
12646 .team_member_ids
12647 .iter()
12648 .enumerate()
12649 .map(|(j, _)| {
12650 self.master_data.employees[(base + 2 + j) % emp_count]
12651 .employee_id
12652 .clone()
12653 })
12654 .collect();
12655 engagement.team_member_ids = real_team;
12656 }
12657
12658 if let Some(pb) = &pb {
12659 pb.inc(1);
12660 }
12661
12662 let team_members: Vec<String> = engagement.team_member_ids.clone();
12664
12665 let workpapers = if self.config.audit.generate_workpapers {
12671 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12672 } else {
12673 Vec::new()
12674 };
12675
12676 for wp in &workpapers {
12677 if let Some(pb) = &pb {
12678 pb.inc(1);
12679 }
12680
12681 let evidence = evidence_gen.generate_evidence_for_workpaper(
12683 wp,
12684 &team_members,
12685 wp.preparer_date,
12686 );
12687
12688 for _ in &evidence {
12689 if let Some(pb) = &pb {
12690 pb.inc(1);
12691 }
12692 }
12693
12694 snapshot.evidence.extend(evidence);
12695 }
12696
12697 let risks =
12699 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12700
12701 for _ in &risks {
12702 if let Some(pb) = &pb {
12703 pb.inc(1);
12704 }
12705 }
12706 snapshot.risk_assessments.extend(risks);
12707
12708 let findings = finding_gen.generate_findings_for_engagement(
12710 &engagement,
12711 &workpapers,
12712 &team_members,
12713 );
12714
12715 for _ in &findings {
12716 if let Some(pb) = &pb {
12717 pb.inc(1);
12718 }
12719 }
12720 snapshot.findings.extend(findings);
12721
12722 let judgments =
12724 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12725
12726 for _ in &judgments {
12727 if let Some(pb) = &pb {
12728 pb.inc(1);
12729 }
12730 }
12731 snapshot.judgments.extend(judgments);
12732
12733 let (confs, resps) =
12735 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12736 snapshot.confirmations.extend(confs);
12737 snapshot.confirmation_responses.extend(resps);
12738
12739 let team_pairs: Vec<(String, String)> = team_members
12741 .iter()
12742 .map(|id| {
12743 let name = self
12744 .master_data
12745 .employees
12746 .iter()
12747 .find(|e| e.employee_id == *id)
12748 .map(|e| e.display_name.clone())
12749 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12750 (id.clone(), name)
12751 })
12752 .collect();
12753 for wp in &workpapers {
12754 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12755 snapshot.procedure_steps.extend(steps);
12756 }
12757
12758 for wp in &workpapers {
12760 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12761 snapshot.samples.push(sample);
12762 }
12763 }
12764
12765 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12767 snapshot.analytical_results.extend(analytical);
12768
12769 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12771 snapshot.ia_functions.push(ia_func);
12772 snapshot.ia_reports.extend(ia_reports);
12773
12774 let vendor_names: Vec<String> = self
12776 .master_data
12777 .vendors
12778 .iter()
12779 .map(|v| v.name.clone())
12780 .collect();
12781 let customer_names: Vec<String> = self
12782 .master_data
12783 .customers
12784 .iter()
12785 .map(|c| c.name.clone())
12786 .collect();
12787 let (parties, rp_txns) =
12788 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12789 snapshot.related_parties.extend(parties);
12790 snapshot.related_party_transactions.extend(rp_txns);
12791
12792 snapshot.workpapers.extend(workpapers);
12794
12795 {
12797 let scope_id = format!(
12798 "SCOPE-{}-{}",
12799 engagement.engagement_id.simple(),
12800 &engagement.client_entity_id
12801 );
12802 let scope = datasynth_core::models::audit::AuditScope::new(
12803 scope_id.clone(),
12804 engagement.engagement_id.to_string(),
12805 engagement.client_entity_id.clone(),
12806 engagement.materiality,
12807 );
12808 let mut eng = engagement;
12810 eng.scope_id = Some(scope_id);
12811 snapshot.audit_scopes.push(scope);
12812 snapshot.engagements.push(eng);
12813 }
12814 }
12815 }
12816
12817 if self.config.companies.len() > 1 {
12821 let group_materiality = snapshot
12824 .engagements
12825 .first()
12826 .map(|e| e.materiality)
12827 .unwrap_or_else(|| {
12828 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12829 total_revenue * pct
12830 });
12831
12832 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12833 let group_engagement_id = snapshot
12834 .engagements
12835 .first()
12836 .map(|e| e.engagement_id.to_string())
12837 .unwrap_or_else(|| "GROUP-ENG".to_string());
12838
12839 let component_snapshot = component_gen.generate(
12840 &self.config.companies,
12841 group_materiality,
12842 &group_engagement_id,
12843 period_end,
12844 );
12845
12846 snapshot.component_auditors = component_snapshot.component_auditors;
12847 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12848 snapshot.component_instructions = component_snapshot.component_instructions;
12849 snapshot.component_reports = component_snapshot.component_reports;
12850
12851 info!(
12852 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12853 snapshot.component_auditors.len(),
12854 snapshot.component_instructions.len(),
12855 snapshot.component_reports.len(),
12856 );
12857 }
12858
12859 {
12863 let applicable_framework = self
12864 .config
12865 .accounting_standards
12866 .framework
12867 .as_ref()
12868 .map(|f| format!("{f:?}"))
12869 .unwrap_or_else(|| "IFRS".to_string());
12870
12871 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12872 let entity_count = self.config.companies.len();
12873
12874 for engagement in &snapshot.engagements {
12875 let company = self
12876 .config
12877 .companies
12878 .iter()
12879 .find(|c| c.code == engagement.client_entity_id);
12880 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12881 let letter_date = engagement.planning_start;
12882 let letter = letter_gen.generate(
12883 &engagement.engagement_id.to_string(),
12884 &engagement.client_name,
12885 entity_count,
12886 engagement.period_end_date,
12887 currency,
12888 &applicable_framework,
12889 letter_date,
12890 );
12891 snapshot.engagement_letters.push(letter);
12892 }
12893
12894 info!(
12895 "ISA 210 engagement letters: {} generated",
12896 snapshot.engagement_letters.len()
12897 );
12898 }
12899
12900 if self.phase_config.generate_legal_documents {
12904 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12905 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12906 for engagement in &snapshot.engagements {
12907 let employee_names: Vec<String> = self
12911 .master_data
12912 .employees
12913 .iter()
12914 .filter(|e| e.company_code == engagement.client_entity_id)
12915 .map(|e| e.display_name.clone())
12916 .collect();
12917 let names_to_use = if !employee_names.is_empty() {
12918 employee_names
12919 } else {
12920 self.master_data
12921 .employees
12922 .iter()
12923 .take(10)
12924 .map(|e| e.display_name.clone())
12925 .collect()
12926 };
12927 let docs = legal_gen.generate(
12928 &engagement.client_entity_id,
12929 engagement.fiscal_year as i32,
12930 &names_to_use,
12931 );
12932 snapshot.legal_documents.extend(docs);
12933 }
12934 info!(
12935 "v3.3.0 legal documents: {} emitted across {} engagements",
12936 snapshot.legal_documents.len(),
12937 snapshot.engagements.len()
12938 );
12939 }
12940
12941 if self.phase_config.generate_it_controls {
12951 use datasynth_generators::it_controls_generator::ItControlsGenerator;
12952 use std::collections::HashMap;
12953 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12954
12955 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12958 HashMap::new();
12959 for engagement in &snapshot.engagements {
12960 let entry = by_company
12961 .entry(engagement.client_entity_id.clone())
12962 .or_insert((engagement.planning_start, engagement.period_end_date));
12963 if engagement.planning_start < entry.0 {
12964 entry.0 = engagement.planning_start;
12965 }
12966 if engagement.period_end_date > entry.1 {
12967 entry.1 = engagement.period_end_date;
12968 }
12969 }
12970
12971 let systems: Vec<String> = vec![
12975 "SAP ECC",
12976 "SAP S/4 HANA",
12977 "Oracle EBS",
12978 "Workday",
12979 "NetSuite",
12980 "Active Directory",
12981 "SharePoint",
12982 "Salesforce",
12983 "ServiceNow",
12984 "Jira",
12985 "GitHub Enterprise",
12986 "AWS Console",
12987 "Okta",
12988 ]
12989 .into_iter()
12990 .map(String::from)
12991 .collect();
12992
12993 for (company_code, (start, end)) in by_company {
12994 let emps: Vec<(String, String)> = self
12995 .master_data
12996 .employees
12997 .iter()
12998 .filter(|e| e.company_code == company_code)
12999 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13000 .collect();
13001 if emps.is_empty() {
13002 continue;
13003 }
13004 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13007 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13008 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13009 snapshot.it_controls_access_logs.extend(access_logs);
13010 snapshot.it_controls_change_records.extend(change_records);
13011 }
13012
13013 info!(
13014 "v3.3.0 IT controls: {} access logs, {} change records",
13015 snapshot.it_controls_access_logs.len(),
13016 snapshot.it_controls_change_records.len()
13017 );
13018 }
13019
13020 {
13024 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13025 let entity_codes: Vec<String> = self
13026 .config
13027 .companies
13028 .iter()
13029 .map(|c| c.code.clone())
13030 .collect();
13031 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13032 info!(
13033 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13034 subsequent.len(),
13035 subsequent
13036 .iter()
13037 .filter(|e| matches!(
13038 e.classification,
13039 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13040 ))
13041 .count(),
13042 subsequent
13043 .iter()
13044 .filter(|e| matches!(
13045 e.classification,
13046 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13047 ))
13048 .count(),
13049 );
13050 snapshot.subsequent_events = subsequent;
13051 }
13052
13053 {
13057 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13058 let entity_codes: Vec<String> = self
13059 .config
13060 .companies
13061 .iter()
13062 .map(|c| c.code.clone())
13063 .collect();
13064 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13065 info!(
13066 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13067 soc_snapshot.service_organizations.len(),
13068 soc_snapshot.soc_reports.len(),
13069 soc_snapshot.user_entity_controls.len(),
13070 );
13071 snapshot.service_organizations = soc_snapshot.service_organizations;
13072 snapshot.soc_reports = soc_snapshot.soc_reports;
13073 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13074 }
13075
13076 {
13080 use datasynth_generators::audit::going_concern_generator::{
13081 GoingConcernGenerator, GoingConcernInput,
13082 };
13083 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13084 let entity_codes: Vec<String> = self
13085 .config
13086 .companies
13087 .iter()
13088 .map(|c| c.code.clone())
13089 .collect();
13090 let assessment_date = period_end + chrono::Duration::days(75);
13092 let period_label = format!("FY{}", period_end.year());
13093
13094 let gc_inputs: Vec<GoingConcernInput> = self
13105 .config
13106 .companies
13107 .iter()
13108 .map(|company| {
13109 let code = &company.code;
13110 let mut revenue = rust_decimal::Decimal::ZERO;
13111 let mut expenses = rust_decimal::Decimal::ZERO;
13112 let mut current_assets = rust_decimal::Decimal::ZERO;
13113 let mut current_liabs = rust_decimal::Decimal::ZERO;
13114 let mut total_debt = rust_decimal::Decimal::ZERO;
13115
13116 for je in entries.iter().filter(|je| &je.header.company_code == code) {
13117 for line in &je.lines {
13118 let acct = line.gl_account.as_str();
13119 let net = line.debit_amount - line.credit_amount;
13120 if acct.starts_with('4') {
13121 revenue -= net;
13123 } else if acct.starts_with('6') {
13124 expenses += net;
13126 }
13127 if acct.starts_with('1') {
13129 if let Ok(n) = acct.parse::<u32>() {
13131 if (1000..=1499).contains(&n) {
13132 current_assets += net;
13133 }
13134 }
13135 } else if acct.starts_with('2') {
13136 if let Ok(n) = acct.parse::<u32>() {
13137 if (2000..=2499).contains(&n) {
13138 current_liabs -= net; } else if (2500..=2999).contains(&n) {
13141 total_debt -= net;
13143 }
13144 }
13145 }
13146 }
13147 }
13148
13149 let net_income = revenue - expenses;
13150 let working_capital = current_assets - current_liabs;
13151 let operating_cash_flow = net_income;
13154
13155 GoingConcernInput {
13156 entity_code: code.clone(),
13157 net_income,
13158 working_capital,
13159 operating_cash_flow,
13160 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13161 assessment_date,
13162 }
13163 })
13164 .collect();
13165
13166 let assessments = if gc_inputs.is_empty() {
13167 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13168 } else {
13169 gc_gen.generate_for_entities_with_inputs(
13170 &entity_codes,
13171 &gc_inputs,
13172 assessment_date,
13173 &period_label,
13174 )
13175 };
13176 info!(
13177 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13178 assessments.len(),
13179 assessments.iter().filter(|a| matches!(
13180 a.auditor_conclusion,
13181 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13182 )).count(),
13183 assessments.iter().filter(|a| matches!(
13184 a.auditor_conclusion,
13185 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13186 )).count(),
13187 assessments.iter().filter(|a| matches!(
13188 a.auditor_conclusion,
13189 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13190 )).count(),
13191 );
13192 snapshot.going_concern_assessments = assessments;
13193 }
13194
13195 {
13199 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13200 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13201 let entity_codes: Vec<String> = self
13202 .config
13203 .companies
13204 .iter()
13205 .map(|c| c.code.clone())
13206 .collect();
13207 let estimates = est_gen.generate_for_entities(&entity_codes);
13208 info!(
13209 "ISA 540 accounting estimates: {} estimates across {} entities \
13210 ({} with retrospective reviews, {} with auditor point estimates)",
13211 estimates.len(),
13212 entity_codes.len(),
13213 estimates
13214 .iter()
13215 .filter(|e| e.retrospective_review.is_some())
13216 .count(),
13217 estimates
13218 .iter()
13219 .filter(|e| e.auditor_point_estimate.is_some())
13220 .count(),
13221 );
13222 snapshot.accounting_estimates = estimates;
13223 }
13224
13225 {
13229 use datasynth_generators::audit::audit_opinion_generator::{
13230 AuditOpinionGenerator, AuditOpinionInput,
13231 };
13232
13233 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13234
13235 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13237 .engagements
13238 .iter()
13239 .map(|eng| {
13240 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13242 .findings
13243 .iter()
13244 .filter(|f| f.engagement_id == eng.engagement_id)
13245 .cloned()
13246 .collect();
13247
13248 let gc = snapshot
13250 .going_concern_assessments
13251 .iter()
13252 .find(|g| g.entity_code == eng.client_entity_id)
13253 .cloned();
13254
13255 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13257 snapshot.component_reports.clone();
13258
13259 let auditor = self
13260 .master_data
13261 .employees
13262 .first()
13263 .map(|e| e.display_name.clone())
13264 .unwrap_or_else(|| "Global Audit LLP".into());
13265
13266 let partner = self
13267 .master_data
13268 .employees
13269 .get(1)
13270 .map(|e| e.display_name.clone())
13271 .unwrap_or_else(|| eng.engagement_partner_id.clone());
13272
13273 AuditOpinionInput {
13274 entity_code: eng.client_entity_id.clone(),
13275 entity_name: eng.client_name.clone(),
13276 engagement_id: eng.engagement_id,
13277 period_end: eng.period_end_date,
13278 findings: eng_findings,
13279 going_concern: gc,
13280 component_reports: comp_reports,
13281 is_us_listed: {
13283 let fw = &self.config.audit_standards.isa_compliance.framework;
13284 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13285 },
13286 auditor_name: auditor,
13287 engagement_partner: partner,
13288 }
13289 })
13290 .collect();
13291
13292 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13293
13294 for go in &generated_opinions {
13295 snapshot
13296 .key_audit_matters
13297 .extend(go.key_audit_matters.clone());
13298 }
13299 snapshot.audit_opinions = generated_opinions
13300 .into_iter()
13301 .map(|go| go.opinion)
13302 .collect();
13303
13304 info!(
13305 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13306 snapshot.audit_opinions.len(),
13307 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13308 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13309 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13310 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13311 );
13312 }
13313
13314 {
13318 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13319
13320 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13321
13322 for (i, company) in self.config.companies.iter().enumerate() {
13323 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13325 .engagements
13326 .iter()
13327 .filter(|e| e.client_entity_id == company.code)
13328 .map(|e| e.engagement_id)
13329 .collect();
13330
13331 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13332 .findings
13333 .iter()
13334 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13335 .cloned()
13336 .collect();
13337
13338 let emp_count = self.master_data.employees.len();
13340 let ceo_name = if emp_count > 0 {
13341 self.master_data.employees[i % emp_count]
13342 .display_name
13343 .clone()
13344 } else {
13345 format!("CEO of {}", company.name)
13346 };
13347 let cfo_name = if emp_count > 1 {
13348 self.master_data.employees[(i + 1) % emp_count]
13349 .display_name
13350 .clone()
13351 } else {
13352 format!("CFO of {}", company.name)
13353 };
13354
13355 let materiality = snapshot
13357 .engagements
13358 .iter()
13359 .find(|e| e.client_entity_id == company.code)
13360 .map(|e| e.materiality)
13361 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13362
13363 let input = SoxGeneratorInput {
13364 company_code: company.code.clone(),
13365 company_name: company.name.clone(),
13366 fiscal_year,
13367 period_end,
13368 findings: company_findings,
13369 ceo_name,
13370 cfo_name,
13371 materiality_threshold: materiality,
13372 revenue_percent: rust_decimal::Decimal::from(100),
13373 assets_percent: rust_decimal::Decimal::from(100),
13374 significant_accounts: vec![
13375 "Revenue".into(),
13376 "Accounts Receivable".into(),
13377 "Inventory".into(),
13378 "Fixed Assets".into(),
13379 "Accounts Payable".into(),
13380 ],
13381 };
13382
13383 let (certs, assessment) = sox_gen.generate(&input);
13384 snapshot.sox_302_certifications.extend(certs);
13385 snapshot.sox_404_assessments.push(assessment);
13386 }
13387
13388 info!(
13389 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13390 snapshot.sox_302_certifications.len(),
13391 snapshot.sox_404_assessments.len(),
13392 snapshot
13393 .sox_404_assessments
13394 .iter()
13395 .filter(|a| a.icfr_effective)
13396 .count(),
13397 snapshot
13398 .sox_404_assessments
13399 .iter()
13400 .filter(|a| !a.icfr_effective)
13401 .count(),
13402 );
13403 }
13404
13405 {
13409 use datasynth_generators::audit::materiality_generator::{
13410 MaterialityGenerator, MaterialityInput,
13411 };
13412
13413 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13414
13415 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13419
13420 for company in &self.config.companies {
13421 let company_code = company.code.clone();
13422
13423 let company_revenue: rust_decimal::Decimal = entries
13425 .iter()
13426 .filter(|e| e.company_code() == company_code)
13427 .flat_map(|e| e.lines.iter())
13428 .filter(|l| l.account_code.starts_with('4'))
13429 .map(|l| l.credit_amount)
13430 .sum();
13431
13432 let total_assets: rust_decimal::Decimal = entries
13434 .iter()
13435 .filter(|e| e.company_code() == company_code)
13436 .flat_map(|e| e.lines.iter())
13437 .filter(|l| l.account_code.starts_with('1'))
13438 .map(|l| l.debit_amount)
13439 .sum();
13440
13441 let total_expenses: rust_decimal::Decimal = entries
13443 .iter()
13444 .filter(|e| e.company_code() == company_code)
13445 .flat_map(|e| e.lines.iter())
13446 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13447 .map(|l| l.debit_amount)
13448 .sum();
13449
13450 let equity: rust_decimal::Decimal = entries
13452 .iter()
13453 .filter(|e| e.company_code() == company_code)
13454 .flat_map(|e| e.lines.iter())
13455 .filter(|l| l.account_code.starts_with('3'))
13456 .map(|l| l.credit_amount)
13457 .sum();
13458
13459 let pretax_income = company_revenue - total_expenses;
13460
13461 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13463 let w = rust_decimal::Decimal::try_from(company.volume_weight)
13464 .unwrap_or(rust_decimal::Decimal::ONE);
13465 (
13466 total_revenue * w,
13467 total_revenue * w * rust_decimal::Decimal::from(3),
13468 total_revenue * w * rust_decimal::Decimal::new(1, 1),
13469 total_revenue * w * rust_decimal::Decimal::from(2),
13470 )
13471 } else {
13472 (company_revenue, total_assets, pretax_income, equity)
13473 };
13474
13475 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
13478 entity_code: company_code,
13479 period: format!("FY{}", fiscal_year),
13480 revenue: rev,
13481 pretax_income: pti,
13482 total_assets: assets,
13483 equity: eq,
13484 gross_profit,
13485 });
13486 }
13487
13488 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13489
13490 info!(
13491 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13492 {} total assets, {} equity benchmarks)",
13493 snapshot.materiality_calculations.len(),
13494 snapshot
13495 .materiality_calculations
13496 .iter()
13497 .filter(|m| matches!(
13498 m.benchmark,
13499 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13500 ))
13501 .count(),
13502 snapshot
13503 .materiality_calculations
13504 .iter()
13505 .filter(|m| matches!(
13506 m.benchmark,
13507 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13508 ))
13509 .count(),
13510 snapshot
13511 .materiality_calculations
13512 .iter()
13513 .filter(|m| matches!(
13514 m.benchmark,
13515 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13516 ))
13517 .count(),
13518 snapshot
13519 .materiality_calculations
13520 .iter()
13521 .filter(|m| matches!(
13522 m.benchmark,
13523 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13524 ))
13525 .count(),
13526 );
13527 }
13528
13529 {
13533 use datasynth_generators::audit::cra_generator::CraGenerator;
13534
13535 let mut cra_gen = CraGenerator::new(self.seed + 8315);
13536
13537 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13539 .audit_scopes
13540 .iter()
13541 .map(|s| (s.entity_code.clone(), s.id.clone()))
13542 .collect();
13543
13544 for company in &self.config.companies {
13545 let cras = cra_gen.generate_for_entity(&company.code, None);
13546 let scope_id = entity_scope_map.get(&company.code).cloned();
13547 let cras_with_scope: Vec<_> = cras
13548 .into_iter()
13549 .map(|mut cra| {
13550 cra.scope_id = scope_id.clone();
13551 cra
13552 })
13553 .collect();
13554 snapshot.combined_risk_assessments.extend(cras_with_scope);
13555 }
13556
13557 let significant_count = snapshot
13558 .combined_risk_assessments
13559 .iter()
13560 .filter(|c| c.significant_risk)
13561 .count();
13562 let high_cra_count = snapshot
13563 .combined_risk_assessments
13564 .iter()
13565 .filter(|c| {
13566 matches!(
13567 c.combined_risk,
13568 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13569 )
13570 })
13571 .count();
13572
13573 info!(
13574 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13575 snapshot.combined_risk_assessments.len(),
13576 significant_count,
13577 high_cra_count,
13578 );
13579 }
13580
13581 {
13585 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13586
13587 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13588
13589 for company in &self.config.companies {
13591 let entity_code = company.code.clone();
13592
13593 let tolerable_error = snapshot
13595 .materiality_calculations
13596 .iter()
13597 .find(|m| m.entity_code == entity_code)
13598 .map(|m| m.tolerable_error);
13599
13600 let entity_cras: Vec<_> = snapshot
13602 .combined_risk_assessments
13603 .iter()
13604 .filter(|c| c.entity_code == entity_code)
13605 .cloned()
13606 .collect();
13607
13608 if !entity_cras.is_empty() {
13609 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13610 snapshot.sampling_plans.extend(plans);
13611 snapshot.sampled_items.extend(items);
13612 }
13613 }
13614
13615 let misstatement_count = snapshot
13616 .sampled_items
13617 .iter()
13618 .filter(|i| i.misstatement_found)
13619 .count();
13620
13621 info!(
13622 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13623 snapshot.sampling_plans.len(),
13624 snapshot.sampled_items.len(),
13625 misstatement_count,
13626 );
13627 }
13628
13629 {
13633 use datasynth_generators::audit::scots_generator::{
13634 ScotsGenerator, ScotsGeneratorConfig,
13635 };
13636
13637 let ic_enabled = self.config.intercompany.enabled;
13638
13639 let config = ScotsGeneratorConfig {
13640 intercompany_enabled: ic_enabled,
13641 ..ScotsGeneratorConfig::default()
13642 };
13643 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13644
13645 for company in &self.config.companies {
13646 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13647 snapshot
13648 .significant_transaction_classes
13649 .extend(entity_scots);
13650 }
13651
13652 let estimation_count = snapshot
13653 .significant_transaction_classes
13654 .iter()
13655 .filter(|s| {
13656 matches!(
13657 s.transaction_type,
13658 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13659 )
13660 })
13661 .count();
13662
13663 info!(
13664 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13665 snapshot.significant_transaction_classes.len(),
13666 estimation_count,
13667 );
13668 }
13669
13670 {
13674 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13675
13676 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13677 let entity_codes: Vec<String> = self
13678 .config
13679 .companies
13680 .iter()
13681 .map(|c| c.code.clone())
13682 .collect();
13683 let unusual_flags =
13684 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13685 info!(
13686 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13687 unusual_flags.len(),
13688 unusual_flags
13689 .iter()
13690 .filter(|f| matches!(
13691 f.severity,
13692 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13693 ))
13694 .count(),
13695 unusual_flags
13696 .iter()
13697 .filter(|f| matches!(
13698 f.severity,
13699 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13700 ))
13701 .count(),
13702 unusual_flags
13703 .iter()
13704 .filter(|f| matches!(
13705 f.severity,
13706 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13707 ))
13708 .count(),
13709 );
13710 snapshot.unusual_items = unusual_flags;
13711 }
13712
13713 {
13717 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13718
13719 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13720 let entity_codes: Vec<String> = self
13721 .config
13722 .companies
13723 .iter()
13724 .map(|c| c.code.clone())
13725 .collect();
13726 let current_period_label = format!("FY{fiscal_year}");
13727 let prior_period_label = format!("FY{}", fiscal_year - 1);
13728 let analytical_rels = ar_gen.generate_for_entities(
13729 &entity_codes,
13730 entries,
13731 ¤t_period_label,
13732 &prior_period_label,
13733 );
13734 let out_of_range = analytical_rels
13735 .iter()
13736 .filter(|r| !r.within_expected_range)
13737 .count();
13738 info!(
13739 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13740 analytical_rels.len(),
13741 out_of_range,
13742 );
13743 snapshot.analytical_relationships = analytical_rels;
13744 }
13745
13746 if let Some(pb) = pb {
13747 pb.finish_with_message(format!(
13748 "Audit data: {} engagements, {} workpapers, {} evidence, \
13749 {} confirmations, {} procedure steps, {} samples, \
13750 {} analytical, {} IA funcs, {} related parties, \
13751 {} component auditors, {} letters, {} subsequent events, \
13752 {} service orgs, {} going concern, {} accounting estimates, \
13753 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13754 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13755 {} unusual items, {} analytical relationships",
13756 snapshot.engagements.len(),
13757 snapshot.workpapers.len(),
13758 snapshot.evidence.len(),
13759 snapshot.confirmations.len(),
13760 snapshot.procedure_steps.len(),
13761 snapshot.samples.len(),
13762 snapshot.analytical_results.len(),
13763 snapshot.ia_functions.len(),
13764 snapshot.related_parties.len(),
13765 snapshot.component_auditors.len(),
13766 snapshot.engagement_letters.len(),
13767 snapshot.subsequent_events.len(),
13768 snapshot.service_organizations.len(),
13769 snapshot.going_concern_assessments.len(),
13770 snapshot.accounting_estimates.len(),
13771 snapshot.audit_opinions.len(),
13772 snapshot.key_audit_matters.len(),
13773 snapshot.sox_302_certifications.len(),
13774 snapshot.sox_404_assessments.len(),
13775 snapshot.materiality_calculations.len(),
13776 snapshot.combined_risk_assessments.len(),
13777 snapshot.sampling_plans.len(),
13778 snapshot.significant_transaction_classes.len(),
13779 snapshot.unusual_items.len(),
13780 snapshot.analytical_relationships.len(),
13781 ));
13782 }
13783
13784 {
13791 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13792 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13793 debug!(
13794 "PCAOB-ISA mappings generated: {} mappings",
13795 snapshot.isa_pcaob_mappings.len()
13796 );
13797 }
13798
13799 {
13806 use datasynth_standards::audit::isa_reference::IsaStandard;
13807 snapshot.isa_mappings = IsaStandard::standard_entries();
13808 debug!(
13809 "ISA standard entries generated: {} standards",
13810 snapshot.isa_mappings.len()
13811 );
13812 }
13813
13814 {
13817 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13818 .engagements
13819 .iter()
13820 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13821 .collect();
13822
13823 for rpt in &mut snapshot.related_party_transactions {
13824 if rpt.journal_entry_id.is_some() {
13825 continue; }
13827 let entity = engagement_by_id
13828 .get(&rpt.engagement_id.to_string())
13829 .copied()
13830 .unwrap_or("");
13831
13832 let best_je = entries
13834 .iter()
13835 .filter(|je| je.header.company_code == entity)
13836 .min_by_key(|je| {
13837 (je.header.posting_date - rpt.transaction_date)
13838 .num_days()
13839 .abs()
13840 });
13841
13842 if let Some(je) = best_je {
13843 rpt.journal_entry_id = Some(je.header.document_id.to_string());
13844 }
13845 }
13846
13847 let linked = snapshot
13848 .related_party_transactions
13849 .iter()
13850 .filter(|t| t.journal_entry_id.is_some())
13851 .count();
13852 debug!(
13853 "Linked {}/{} related party transactions to journal entries",
13854 linked,
13855 snapshot.related_party_transactions.len()
13856 );
13857 }
13858
13859 if !snapshot.engagements.is_empty() {
13865 use datasynth_generators::audit_opinion_generator::{
13866 AuditOpinionGenerator, AuditOpinionInput,
13867 };
13868
13869 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13870 let inputs: Vec<AuditOpinionInput> = snapshot
13871 .engagements
13872 .iter()
13873 .map(|eng| {
13874 let findings = snapshot
13875 .findings
13876 .iter()
13877 .filter(|f| f.engagement_id == eng.engagement_id)
13878 .cloned()
13879 .collect();
13880 let going_concern = snapshot
13881 .going_concern_assessments
13882 .iter()
13883 .find(|gc| gc.entity_code == eng.client_entity_id)
13884 .cloned();
13885 let component_reports = snapshot
13888 .component_reports
13889 .iter()
13890 .filter(|r| r.entity_code == eng.client_entity_id)
13891 .cloned()
13892 .collect();
13893
13894 AuditOpinionInput {
13895 entity_code: eng.client_entity_id.clone(),
13896 entity_name: eng.client_name.clone(),
13897 engagement_id: eng.engagement_id,
13898 period_end: eng.period_end_date,
13899 findings,
13900 going_concern,
13901 component_reports,
13902 is_us_listed: matches!(
13903 eng.engagement_type,
13904 datasynth_core::audit::EngagementType::IntegratedAudit
13905 | datasynth_core::audit::EngagementType::Sox404
13906 ),
13907 auditor_name: "DataSynth Audit LLP".to_string(),
13908 engagement_partner: "Engagement Partner".to_string(),
13909 }
13910 })
13911 .collect();
13912
13913 let generated = opinion_gen.generate_batch(&inputs);
13914 for g in generated {
13915 snapshot.key_audit_matters.extend(g.key_audit_matters);
13916 snapshot.audit_opinions.push(g.opinion);
13917 }
13918 debug!(
13919 "Generated {} audit opinions with {} key audit matters",
13920 snapshot.audit_opinions.len(),
13921 snapshot.key_audit_matters.len()
13922 );
13923 }
13924
13925 Ok(snapshot)
13926 }
13927
13928 fn generate_audit_data_with_fsm(
13935 &mut self,
13936 entries: &[JournalEntry],
13937 ) -> SynthResult<AuditSnapshot> {
13938 use datasynth_audit_fsm::{
13939 context::EngagementContext,
13940 engine::AuditFsmEngine,
13941 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13942 };
13943 use rand::SeedableRng;
13944 use rand_chacha::ChaCha8Rng;
13945
13946 info!("Audit FSM: generating audit data via FSM engine");
13947
13948 let fsm_config = self
13949 .config
13950 .audit
13951 .fsm
13952 .as_ref()
13953 .expect("FSM config must be present when FSM is enabled");
13954
13955 let bwp = match fsm_config.blueprint.as_str() {
13957 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13958 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13959 _ => {
13960 warn!(
13961 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13962 fsm_config.blueprint
13963 );
13964 BlueprintWithPreconditions::load_builtin_fsa()
13965 }
13966 }
13967 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13968
13969 let overlay = match fsm_config.overlay.as_str() {
13971 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13972 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13973 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13974 _ => {
13975 warn!(
13976 "Unknown FSM overlay '{}', falling back to builtin:default",
13977 fsm_config.overlay
13978 );
13979 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13980 }
13981 }
13982 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13983
13984 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13986 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13987 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13988
13989 let company = self.config.companies.first();
13991 let company_code = company
13992 .map(|c| c.code.clone())
13993 .unwrap_or_else(|| "UNKNOWN".to_string());
13994 let company_name = company
13995 .map(|c| c.name.clone())
13996 .unwrap_or_else(|| "Unknown Company".to_string());
13997 let currency = company
13998 .map(|c| c.currency.clone())
13999 .unwrap_or_else(|| "USD".to_string());
14000
14001 let entity_entries: Vec<_> = entries
14003 .iter()
14004 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14005 .cloned()
14006 .collect();
14007 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
14011 .iter()
14012 .flat_map(|e| e.lines.iter())
14013 .filter(|l| l.account_code.starts_with('4'))
14014 .map(|l| l.credit_amount - l.debit_amount)
14015 .sum();
14016
14017 let total_assets: rust_decimal::Decimal = entries
14018 .iter()
14019 .flat_map(|e| e.lines.iter())
14020 .filter(|l| l.account_code.starts_with('1'))
14021 .map(|l| l.debit_amount - l.credit_amount)
14022 .sum();
14023
14024 let total_expenses: rust_decimal::Decimal = entries
14025 .iter()
14026 .flat_map(|e| e.lines.iter())
14027 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14028 .map(|l| l.debit_amount)
14029 .sum();
14030
14031 let equity: rust_decimal::Decimal = entries
14032 .iter()
14033 .flat_map(|e| e.lines.iter())
14034 .filter(|l| l.account_code.starts_with('3'))
14035 .map(|l| l.credit_amount - l.debit_amount)
14036 .sum();
14037
14038 let total_debt: rust_decimal::Decimal = entries
14039 .iter()
14040 .flat_map(|e| e.lines.iter())
14041 .filter(|l| l.account_code.starts_with('2'))
14042 .map(|l| l.credit_amount - l.debit_amount)
14043 .sum();
14044
14045 let pretax_income = total_revenue - total_expenses;
14046
14047 let cogs: rust_decimal::Decimal = entries
14048 .iter()
14049 .flat_map(|e| e.lines.iter())
14050 .filter(|l| l.account_code.starts_with('5'))
14051 .map(|l| l.debit_amount)
14052 .sum();
14053 let gross_profit = total_revenue - cogs;
14054
14055 let current_assets: rust_decimal::Decimal = entries
14056 .iter()
14057 .flat_map(|e| e.lines.iter())
14058 .filter(|l| {
14059 l.account_code.starts_with("10")
14060 || l.account_code.starts_with("11")
14061 || l.account_code.starts_with("12")
14062 || l.account_code.starts_with("13")
14063 })
14064 .map(|l| l.debit_amount - l.credit_amount)
14065 .sum();
14066 let current_liabilities: rust_decimal::Decimal = entries
14067 .iter()
14068 .flat_map(|e| e.lines.iter())
14069 .filter(|l| {
14070 l.account_code.starts_with("20")
14071 || l.account_code.starts_with("21")
14072 || l.account_code.starts_with("22")
14073 })
14074 .map(|l| l.credit_amount - l.debit_amount)
14075 .sum();
14076 let working_capital = current_assets - current_liabilities;
14077
14078 let depreciation: rust_decimal::Decimal = entries
14079 .iter()
14080 .flat_map(|e| e.lines.iter())
14081 .filter(|l| l.account_code.starts_with("60"))
14082 .map(|l| l.debit_amount)
14083 .sum();
14084 let operating_cash_flow = pretax_income + depreciation;
14085
14086 let accounts: Vec<String> = self
14088 .coa
14089 .as_ref()
14090 .map(|coa| {
14091 coa.get_postable_accounts()
14092 .iter()
14093 .map(|acc| acc.account_code().to_string())
14094 .collect()
14095 })
14096 .unwrap_or_default();
14097
14098 let team_member_ids: Vec<String> = self
14100 .master_data
14101 .employees
14102 .iter()
14103 .take(8) .map(|e| e.employee_id.clone())
14105 .collect();
14106 let team_member_pairs: Vec<(String, String)> = self
14107 .master_data
14108 .employees
14109 .iter()
14110 .take(8)
14111 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14112 .collect();
14113
14114 let vendor_names: Vec<String> = self
14115 .master_data
14116 .vendors
14117 .iter()
14118 .map(|v| v.name.clone())
14119 .collect();
14120 let customer_names: Vec<String> = self
14121 .master_data
14122 .customers
14123 .iter()
14124 .map(|c| c.name.clone())
14125 .collect();
14126
14127 let entity_codes: Vec<String> = self
14128 .config
14129 .companies
14130 .iter()
14131 .map(|c| c.code.clone())
14132 .collect();
14133
14134 let journal_entry_ids: Vec<String> = entries
14136 .iter()
14137 .take(50)
14138 .map(|e| e.header.document_id.to_string())
14139 .collect();
14140
14141 let mut account_balances = std::collections::HashMap::<String, f64>::new();
14143 for entry in entries {
14144 for line in &entry.lines {
14145 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14146 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14147 *account_balances
14148 .entry(line.account_code.clone())
14149 .or_insert(0.0) += debit_f64 - credit_f64;
14150 }
14151 }
14152
14153 let control_ids: Vec<String> = Vec::new();
14158 let anomaly_refs: Vec<String> = Vec::new();
14159
14160 let mut context = EngagementContext {
14161 company_code,
14162 company_name,
14163 fiscal_year: start_date.year(),
14164 currency,
14165 total_revenue,
14166 total_assets,
14167 engagement_start: start_date,
14168 report_date: period_end,
14169 pretax_income,
14170 equity,
14171 gross_profit,
14172 working_capital,
14173 operating_cash_flow,
14174 total_debt,
14175 team_member_ids,
14176 team_member_pairs,
14177 accounts,
14178 vendor_names,
14179 customer_names,
14180 journal_entry_ids,
14181 account_balances,
14182 control_ids,
14183 anomaly_refs,
14184 journal_entries: entries.to_vec(),
14185 is_us_listed: false,
14186 entity_codes,
14187 auditor_firm_name: "DataSynth Audit LLP".into(),
14188 accounting_framework: self
14189 .config
14190 .accounting_standards
14191 .framework
14192 .map(|f| match f {
14193 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14194 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14195 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14196 "French GAAP"
14197 }
14198 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14199 "German GAAP"
14200 }
14201 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14202 "Dual Reporting"
14203 }
14204 })
14205 .unwrap_or("IFRS")
14206 .into(),
14207 };
14208
14209 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14211 let rng = ChaCha8Rng::seed_from_u64(seed);
14212 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14213
14214 let mut result = engine
14215 .run_engagement(&context)
14216 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14217
14218 info!(
14219 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14220 {} phases completed, duration {:.1}h",
14221 result.event_log.len(),
14222 result.artifacts.total_artifacts(),
14223 result.anomalies.len(),
14224 result.phases_completed.len(),
14225 result.total_duration_hours,
14226 );
14227
14228 let tb_entity = context.company_code.clone();
14230 let tb_fy = context.fiscal_year;
14231 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14232 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14233 entries,
14234 &tb_entity,
14235 tb_fy,
14236 self.coa.as_ref().map(|c| c.as_ref()),
14237 );
14238
14239 let bag = result.artifacts;
14241 let mut snapshot = AuditSnapshot {
14242 engagements: bag.engagements,
14243 engagement_letters: bag.engagement_letters,
14244 materiality_calculations: bag.materiality_calculations,
14245 risk_assessments: bag.risk_assessments,
14246 combined_risk_assessments: bag.combined_risk_assessments,
14247 workpapers: bag.workpapers,
14248 evidence: bag.evidence,
14249 findings: bag.findings,
14250 judgments: bag.judgments,
14251 sampling_plans: bag.sampling_plans,
14252 sampled_items: bag.sampled_items,
14253 analytical_results: bag.analytical_results,
14254 going_concern_assessments: bag.going_concern_assessments,
14255 subsequent_events: bag.subsequent_events,
14256 audit_opinions: bag.audit_opinions,
14257 key_audit_matters: bag.key_audit_matters,
14258 procedure_steps: bag.procedure_steps,
14259 samples: bag.samples,
14260 confirmations: bag.confirmations,
14261 confirmation_responses: bag.confirmation_responses,
14262 fsm_event_trail: Some(result.event_log),
14264 ..Default::default()
14266 };
14267
14268 {
14270 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14271 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14272 }
14273 {
14274 use datasynth_standards::audit::isa_reference::IsaStandard;
14275 snapshot.isa_mappings = IsaStandard::standard_entries();
14276 }
14277
14278 info!(
14279 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14280 {} risk assessments, {} findings, {} materiality calcs",
14281 snapshot.engagements.len(),
14282 snapshot.workpapers.len(),
14283 snapshot.evidence.len(),
14284 snapshot.risk_assessments.len(),
14285 snapshot.findings.len(),
14286 snapshot.materiality_calculations.len(),
14287 );
14288
14289 Ok(snapshot)
14290 }
14291
14292 fn export_graphs(
14299 &mut self,
14300 entries: &[JournalEntry],
14301 _coa: &Arc<ChartOfAccounts>,
14302 stats: &mut EnhancedGenerationStatistics,
14303 ) -> SynthResult<GraphExportSnapshot> {
14304 let pb = self.create_progress_bar(100, "Exporting Graphs");
14305
14306 let mut snapshot = GraphExportSnapshot::default();
14307
14308 let output_dir = self
14310 .output_path
14311 .clone()
14312 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14313 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14314
14315 for graph_type in &self.config.graph_export.graph_types {
14317 if let Some(pb) = &pb {
14318 pb.inc(10);
14319 }
14320
14321 let graph_config = TransactionGraphConfig {
14323 include_vendors: false,
14324 include_customers: false,
14325 create_debit_credit_edges: true,
14326 include_document_nodes: graph_type.include_document_nodes,
14327 min_edge_weight: graph_type.min_edge_weight,
14328 aggregate_parallel_edges: graph_type.aggregate_edges,
14329 framework: None,
14330 };
14331
14332 let mut builder = TransactionGraphBuilder::new(graph_config);
14333 builder.add_journal_entries(entries);
14334 let graph = builder.build();
14335
14336 stats.graph_node_count += graph.node_count();
14338 stats.graph_edge_count += graph.edge_count();
14339
14340 if let Some(pb) = &pb {
14341 pb.inc(40);
14342 }
14343
14344 for format in &self.config.graph_export.formats {
14346 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14347
14348 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14350 warn!("Failed to create graph output directory: {}", e);
14351 continue;
14352 }
14353
14354 match format {
14355 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14356 let pyg_config = PyGExportConfig {
14357 common: datasynth_graph::CommonExportConfig {
14358 export_node_features: true,
14359 export_edge_features: true,
14360 export_node_labels: true,
14361 export_edge_labels: true,
14362 export_masks: true,
14363 train_ratio: self.config.graph_export.train_ratio,
14364 val_ratio: self.config.graph_export.validation_ratio,
14365 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14366 },
14367 one_hot_categoricals: false,
14368 };
14369
14370 let exporter = PyGExporter::new(pyg_config);
14371 match exporter.export(&graph, &format_dir) {
14372 Ok(metadata) => {
14373 snapshot.exports.insert(
14374 format!("{}_{}", graph_type.name, "pytorch_geometric"),
14375 GraphExportInfo {
14376 name: graph_type.name.clone(),
14377 format: "pytorch_geometric".to_string(),
14378 output_path: format_dir.clone(),
14379 node_count: metadata.num_nodes,
14380 edge_count: metadata.num_edges,
14381 },
14382 );
14383 snapshot.graph_count += 1;
14384 }
14385 Err(e) => {
14386 warn!("Failed to export PyTorch Geometric graph: {}", e);
14387 }
14388 }
14389 }
14390 datasynth_config::schema::GraphExportFormat::Neo4j => {
14391 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14392
14393 let neo4j_config = Neo4jExportConfig {
14394 export_node_properties: true,
14395 export_edge_properties: true,
14396 export_features: true,
14397 generate_cypher: true,
14398 generate_admin_import: true,
14399 database_name: "synth".to_string(),
14400 cypher_batch_size: 1000,
14401 };
14402
14403 let exporter = Neo4jExporter::new(neo4j_config);
14404 match exporter.export(&graph, &format_dir) {
14405 Ok(metadata) => {
14406 snapshot.exports.insert(
14407 format!("{}_{}", graph_type.name, "neo4j"),
14408 GraphExportInfo {
14409 name: graph_type.name.clone(),
14410 format: "neo4j".to_string(),
14411 output_path: format_dir.clone(),
14412 node_count: metadata.num_nodes,
14413 edge_count: metadata.num_edges,
14414 },
14415 );
14416 snapshot.graph_count += 1;
14417 }
14418 Err(e) => {
14419 warn!("Failed to export Neo4j graph: {}", e);
14420 }
14421 }
14422 }
14423 datasynth_config::schema::GraphExportFormat::Dgl => {
14424 use datasynth_graph::{DGLExportConfig, DGLExporter};
14425
14426 let dgl_config = DGLExportConfig {
14427 common: datasynth_graph::CommonExportConfig {
14428 export_node_features: true,
14429 export_edge_features: true,
14430 export_node_labels: true,
14431 export_edge_labels: true,
14432 export_masks: true,
14433 train_ratio: self.config.graph_export.train_ratio,
14434 val_ratio: self.config.graph_export.validation_ratio,
14435 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14436 },
14437 heterogeneous: self.config.graph_export.dgl.heterogeneous,
14438 include_pickle_script: true, };
14440
14441 let exporter = DGLExporter::new(dgl_config);
14442 match exporter.export(&graph, &format_dir) {
14443 Ok(metadata) => {
14444 snapshot.exports.insert(
14445 format!("{}_{}", graph_type.name, "dgl"),
14446 GraphExportInfo {
14447 name: graph_type.name.clone(),
14448 format: "dgl".to_string(),
14449 output_path: format_dir.clone(),
14450 node_count: metadata.common.num_nodes,
14451 edge_count: metadata.common.num_edges,
14452 },
14453 );
14454 snapshot.graph_count += 1;
14455 }
14456 Err(e) => {
14457 warn!("Failed to export DGL graph: {}", e);
14458 }
14459 }
14460 }
14461 datasynth_config::schema::GraphExportFormat::RustGraph => {
14462 use datasynth_graph::{
14463 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14464 };
14465
14466 let rustgraph_config = RustGraphExportConfig {
14467 include_features: true,
14468 include_temporal: true,
14469 include_labels: true,
14470 source_name: "datasynth".to_string(),
14471 batch_id: None,
14472 output_format: RustGraphOutputFormat::JsonLines,
14473 export_node_properties: true,
14474 export_edge_properties: true,
14475 pretty_print: false,
14476 };
14477
14478 let exporter = RustGraphExporter::new(rustgraph_config);
14479 match exporter.export(&graph, &format_dir) {
14480 Ok(metadata) => {
14481 snapshot.exports.insert(
14482 format!("{}_{}", graph_type.name, "rustgraph"),
14483 GraphExportInfo {
14484 name: graph_type.name.clone(),
14485 format: "rustgraph".to_string(),
14486 output_path: format_dir.clone(),
14487 node_count: metadata.num_nodes,
14488 edge_count: metadata.num_edges,
14489 },
14490 );
14491 snapshot.graph_count += 1;
14492 }
14493 Err(e) => {
14494 warn!("Failed to export RustGraph: {}", e);
14495 }
14496 }
14497 }
14498 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14499 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14501 }
14502 }
14503 }
14504
14505 if let Some(pb) = &pb {
14506 pb.inc(40);
14507 }
14508 }
14509
14510 stats.graph_export_count = snapshot.graph_count;
14511 snapshot.exported = snapshot.graph_count > 0;
14512
14513 if let Some(pb) = pb {
14514 pb.finish_with_message(format!(
14515 "Graphs exported: {} graphs ({} nodes, {} edges)",
14516 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14517 ));
14518 }
14519
14520 Ok(snapshot)
14521 }
14522
14523 fn build_additional_graphs(
14528 &self,
14529 banking: &BankingSnapshot,
14530 intercompany: &IntercompanySnapshot,
14531 entries: &[JournalEntry],
14532 stats: &mut EnhancedGenerationStatistics,
14533 ) {
14534 let output_dir = self
14535 .output_path
14536 .clone()
14537 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14538 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14539
14540 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14542 info!("Phase 10c: Building banking network graph");
14543 let config = BankingGraphConfig::default();
14544 let mut builder = BankingGraphBuilder::new(config);
14545 builder.add_customers(&banking.customers);
14546 builder.add_accounts(&banking.accounts, &banking.customers);
14547 builder.add_transactions(&banking.transactions);
14548 let graph = builder.build();
14549
14550 let node_count = graph.node_count();
14551 let edge_count = graph.edge_count();
14552 stats.graph_node_count += node_count;
14553 stats.graph_edge_count += edge_count;
14554
14555 for format in &self.config.graph_export.formats {
14557 if matches!(
14558 format,
14559 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14560 ) {
14561 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14562 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14563 warn!("Failed to create banking graph output dir: {}", e);
14564 continue;
14565 }
14566 let pyg_config = PyGExportConfig::default();
14567 let exporter = PyGExporter::new(pyg_config);
14568 if let Err(e) = exporter.export(&graph, &format_dir) {
14569 warn!("Failed to export banking graph as PyG: {}", e);
14570 } else {
14571 info!(
14572 "Banking network graph exported: {} nodes, {} edges",
14573 node_count, edge_count
14574 );
14575 }
14576 }
14577 }
14578 }
14579
14580 let approval_entries: Vec<_> = entries
14582 .iter()
14583 .filter(|je| je.header.approval_workflow.is_some())
14584 .collect();
14585
14586 if !approval_entries.is_empty() {
14587 info!(
14588 "Phase 10c: Building approval network graph ({} entries with approvals)",
14589 approval_entries.len()
14590 );
14591 let config = ApprovalGraphConfig::default();
14592 let mut builder = ApprovalGraphBuilder::new(config);
14593
14594 for je in &approval_entries {
14595 if let Some(ref wf) = je.header.approval_workflow {
14596 for action in &wf.actions {
14597 let record = datasynth_core::models::ApprovalRecord {
14598 approval_id: format!(
14599 "APR-{}-{}",
14600 je.header.document_id, action.approval_level
14601 ),
14602 document_number: je.header.document_id.to_string(),
14603 document_type: "JE".to_string(),
14604 company_code: je.company_code().to_string(),
14605 requester_id: wf.preparer_id.clone(),
14606 requester_name: Some(wf.preparer_name.clone()),
14607 approver_id: action.actor_id.clone(),
14608 approver_name: action.actor_name.clone(),
14609 approval_date: je.posting_date(),
14610 action: format!("{:?}", action.action),
14611 amount: wf.amount,
14612 approval_limit: None,
14613 comments: action.comments.clone(),
14614 delegation_from: None,
14615 is_auto_approved: false,
14616 };
14617 builder.add_approval(&record);
14618 }
14619 }
14620 }
14621
14622 let graph = builder.build();
14623 let node_count = graph.node_count();
14624 let edge_count = graph.edge_count();
14625 stats.graph_node_count += node_count;
14626 stats.graph_edge_count += edge_count;
14627
14628 for format in &self.config.graph_export.formats {
14630 if matches!(
14631 format,
14632 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14633 ) {
14634 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14635 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14636 warn!("Failed to create approval graph output dir: {}", e);
14637 continue;
14638 }
14639 let pyg_config = PyGExportConfig::default();
14640 let exporter = PyGExporter::new(pyg_config);
14641 if let Err(e) = exporter.export(&graph, &format_dir) {
14642 warn!("Failed to export approval graph as PyG: {}", e);
14643 } else {
14644 info!(
14645 "Approval network graph exported: {} nodes, {} edges",
14646 node_count, edge_count
14647 );
14648 }
14649 }
14650 }
14651 }
14652
14653 if self.config.companies.len() >= 2 {
14655 info!(
14656 "Phase 10c: Building entity relationship graph ({} companies)",
14657 self.config.companies.len()
14658 );
14659
14660 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14661 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14662
14663 let parent_code = &self.config.companies[0].code;
14665 let mut companies: Vec<datasynth_core::models::Company> =
14666 Vec::with_capacity(self.config.companies.len());
14667
14668 let first = &self.config.companies[0];
14670 companies.push(datasynth_core::models::Company::parent(
14671 &first.code,
14672 &first.name,
14673 &first.country,
14674 &first.currency,
14675 ));
14676
14677 for cc in self.config.companies.iter().skip(1) {
14679 companies.push(datasynth_core::models::Company::subsidiary(
14680 &cc.code,
14681 &cc.name,
14682 &cc.country,
14683 &cc.currency,
14684 parent_code,
14685 rust_decimal::Decimal::from(100),
14686 ));
14687 }
14688
14689 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14691 self.config
14692 .companies
14693 .iter()
14694 .skip(1)
14695 .enumerate()
14696 .map(|(i, cc)| {
14697 let mut rel =
14698 datasynth_core::models::intercompany::IntercompanyRelationship::new(
14699 format!("REL{:03}", i + 1),
14700 parent_code.clone(),
14701 cc.code.clone(),
14702 rust_decimal::Decimal::from(100),
14703 start_date,
14704 );
14705 rel.functional_currency = cc.currency.clone();
14706 rel
14707 })
14708 .collect();
14709
14710 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14711 builder.add_companies(&companies);
14712 builder.add_ownership_relationships(&relationships);
14713
14714 for pair in &intercompany.matched_pairs {
14716 builder.add_intercompany_edge(
14717 &pair.seller_company,
14718 &pair.buyer_company,
14719 pair.amount,
14720 &format!("{:?}", pair.transaction_type),
14721 );
14722 }
14723
14724 let graph = builder.build();
14725 let node_count = graph.node_count();
14726 let edge_count = graph.edge_count();
14727 stats.graph_node_count += node_count;
14728 stats.graph_edge_count += edge_count;
14729
14730 for format in &self.config.graph_export.formats {
14732 if matches!(
14733 format,
14734 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14735 ) {
14736 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14737 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14738 warn!("Failed to create entity graph output dir: {}", e);
14739 continue;
14740 }
14741 let pyg_config = PyGExportConfig::default();
14742 let exporter = PyGExporter::new(pyg_config);
14743 if let Err(e) = exporter.export(&graph, &format_dir) {
14744 warn!("Failed to export entity graph as PyG: {}", e);
14745 } else {
14746 info!(
14747 "Entity relationship graph exported: {} nodes, {} edges",
14748 node_count, edge_count
14749 );
14750 }
14751 }
14752 }
14753 } else {
14754 debug!(
14755 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14756 self.config.companies.len()
14757 );
14758 }
14759 }
14760
14761 #[allow(clippy::too_many_arguments)]
14768 fn export_hypergraph(
14769 &self,
14770 coa: &Arc<ChartOfAccounts>,
14771 entries: &[JournalEntry],
14772 document_flows: &DocumentFlowSnapshot,
14773 sourcing: &SourcingSnapshot,
14774 hr: &HrSnapshot,
14775 manufacturing: &ManufacturingSnapshot,
14776 banking: &BankingSnapshot,
14777 audit: &AuditSnapshot,
14778 financial_reporting: &FinancialReportingSnapshot,
14779 ocpm: &OcpmSnapshot,
14780 compliance: &ComplianceRegulationsSnapshot,
14781 stats: &mut EnhancedGenerationStatistics,
14782 ) -> SynthResult<HypergraphExportInfo> {
14783 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14784 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14785 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14786 use datasynth_graph::models::hypergraph::AggregationStrategy;
14787
14788 let hg_settings = &self.config.graph_export.hypergraph;
14789
14790 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14792 "truncate" => AggregationStrategy::Truncate,
14793 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14794 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14795 "importance_sample" => AggregationStrategy::ImportanceSample,
14796 _ => AggregationStrategy::PoolByCounterparty,
14797 };
14798
14799 let builder_config = HypergraphConfig {
14800 max_nodes: hg_settings.max_nodes,
14801 aggregation_strategy,
14802 include_coso: hg_settings.governance_layer.include_coso,
14803 include_controls: hg_settings.governance_layer.include_controls,
14804 include_sox: hg_settings.governance_layer.include_sox,
14805 include_vendors: hg_settings.governance_layer.include_vendors,
14806 include_customers: hg_settings.governance_layer.include_customers,
14807 include_employees: hg_settings.governance_layer.include_employees,
14808 include_p2p: hg_settings.process_layer.include_p2p,
14809 include_o2c: hg_settings.process_layer.include_o2c,
14810 include_s2c: hg_settings.process_layer.include_s2c,
14811 include_h2r: hg_settings.process_layer.include_h2r,
14812 include_mfg: hg_settings.process_layer.include_mfg,
14813 include_bank: hg_settings.process_layer.include_bank,
14814 include_audit: hg_settings.process_layer.include_audit,
14815 include_r2r: hg_settings.process_layer.include_r2r,
14816 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14817 docs_per_counterparty_threshold: hg_settings
14818 .process_layer
14819 .docs_per_counterparty_threshold,
14820 include_accounts: hg_settings.accounting_layer.include_accounts,
14821 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14822 include_cross_layer_edges: hg_settings.cross_layer.enabled,
14823 include_compliance: self.config.compliance_regulations.enabled,
14824 include_tax: true,
14825 include_treasury: true,
14826 include_esg: true,
14827 include_project: true,
14828 include_intercompany: true,
14829 include_temporal_events: true,
14830 };
14831
14832 let mut builder = HypergraphBuilder::new(builder_config);
14833
14834 builder.add_coso_framework();
14836
14837 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14840 let controls = InternalControl::standard_controls();
14841 builder.add_controls(&controls);
14842 }
14843
14844 builder.add_vendors(&self.master_data.vendors);
14846 builder.add_customers(&self.master_data.customers);
14847 builder.add_employees(&self.master_data.employees);
14848
14849 builder.add_p2p_documents(
14851 &document_flows.purchase_orders,
14852 &document_flows.goods_receipts,
14853 &document_flows.vendor_invoices,
14854 &document_flows.payments,
14855 );
14856 builder.add_o2c_documents(
14857 &document_flows.sales_orders,
14858 &document_flows.deliveries,
14859 &document_flows.customer_invoices,
14860 );
14861 builder.add_s2c_documents(
14862 &sourcing.sourcing_projects,
14863 &sourcing.qualifications,
14864 &sourcing.rfx_events,
14865 &sourcing.bids,
14866 &sourcing.bid_evaluations,
14867 &sourcing.contracts,
14868 );
14869 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14870 builder.add_mfg_documents(
14871 &manufacturing.production_orders,
14872 &manufacturing.quality_inspections,
14873 &manufacturing.cycle_counts,
14874 );
14875 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14876 builder.add_audit_documents(
14877 &audit.engagements,
14878 &audit.workpapers,
14879 &audit.findings,
14880 &audit.evidence,
14881 &audit.risk_assessments,
14882 &audit.judgments,
14883 &audit.materiality_calculations,
14884 &audit.audit_opinions,
14885 &audit.going_concern_assessments,
14886 );
14887 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14888
14889 if let Some(ref event_log) = ocpm.event_log {
14891 builder.add_ocpm_events(event_log);
14892 }
14893
14894 if self.config.compliance_regulations.enabled
14896 && hg_settings.governance_layer.include_controls
14897 {
14898 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14900 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14901 .standard_records
14902 .iter()
14903 .filter_map(|r| {
14904 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14905 registry.get(&sid).cloned()
14906 })
14907 .collect();
14908
14909 builder.add_compliance_regulations(
14910 &standards,
14911 &compliance.findings,
14912 &compliance.filings,
14913 );
14914 }
14915
14916 builder.add_accounts(coa);
14918 builder.add_journal_entries_as_hyperedges(entries);
14919
14920 let hypergraph = builder.build();
14922
14923 let output_dir = self
14925 .output_path
14926 .clone()
14927 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14928 let hg_dir = output_dir
14929 .join(&self.config.graph_export.output_subdirectory)
14930 .join(&hg_settings.output_subdirectory);
14931
14932 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14934 "unified" => {
14935 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14936 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14937 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14938 })?;
14939 (
14940 metadata.num_nodes,
14941 metadata.num_edges,
14942 metadata.num_hyperedges,
14943 )
14944 }
14945 _ => {
14946 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14948 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14949 SynthError::generation(format!("Hypergraph export failed: {e}"))
14950 })?;
14951 (
14952 metadata.num_nodes,
14953 metadata.num_edges,
14954 metadata.num_hyperedges,
14955 )
14956 }
14957 };
14958
14959 #[cfg(feature = "streaming")]
14961 if let Some(ref target_url) = hg_settings.stream_target {
14962 use crate::stream_client::{StreamClient, StreamConfig};
14963 use std::io::Write as _;
14964
14965 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14966 let stream_config = StreamConfig {
14967 target_url: target_url.clone(),
14968 batch_size: hg_settings.stream_batch_size,
14969 api_key,
14970 ..StreamConfig::default()
14971 };
14972
14973 match StreamClient::new(stream_config) {
14974 Ok(mut client) => {
14975 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14976 match exporter.export_to_writer(&hypergraph, &mut client) {
14977 Ok(_) => {
14978 if let Err(e) = client.flush() {
14979 warn!("Failed to flush stream client: {}", e);
14980 } else {
14981 info!("Streamed {} records to {}", client.total_sent(), target_url);
14982 }
14983 }
14984 Err(e) => {
14985 warn!("Streaming export failed: {}", e);
14986 }
14987 }
14988 }
14989 Err(e) => {
14990 warn!("Failed to create stream client: {}", e);
14991 }
14992 }
14993 }
14994
14995 stats.graph_node_count += num_nodes;
14997 stats.graph_edge_count += num_edges;
14998 stats.graph_export_count += 1;
14999
15000 Ok(HypergraphExportInfo {
15001 node_count: num_nodes,
15002 edge_count: num_edges,
15003 hyperedge_count: num_hyperedges,
15004 output_path: hg_dir,
15005 })
15006 }
15007
15008 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15013 let pb = self.create_progress_bar(100, "Generating Banking Data");
15014
15015 let orchestrator = BankingOrchestratorBuilder::new()
15017 .config(self.config.banking.clone())
15018 .seed(self.seed + 9000)
15019 .country_pack(self.primary_pack().clone())
15020 .build();
15021
15022 if let Some(pb) = &pb {
15023 pb.inc(10);
15024 }
15025
15026 let result = orchestrator.generate();
15028
15029 if let Some(pb) = &pb {
15030 pb.inc(90);
15031 pb.finish_with_message(format!(
15032 "Banking: {} customers, {} transactions",
15033 result.customers.len(),
15034 result.transactions.len()
15035 ));
15036 }
15037
15038 let mut banking_customers = result.customers;
15043 let core_customers = &self.master_data.customers;
15044 if !core_customers.is_empty() {
15045 for (i, bc) in banking_customers.iter_mut().enumerate() {
15046 let core = &core_customers[i % core_customers.len()];
15047 bc.name = CustomerName::business(&core.name);
15048 bc.residence_country = core.country.clone();
15049 bc.enterprise_customer_id = Some(core.customer_id.clone());
15050 }
15051 debug!(
15052 "Cross-referenced {} banking customers with {} core customers",
15053 banking_customers.len(),
15054 core_customers.len()
15055 );
15056 }
15057
15058 Ok(BankingSnapshot {
15059 customers: banking_customers,
15060 accounts: result.accounts,
15061 transactions: result.transactions,
15062 transaction_labels: result.transaction_labels,
15063 customer_labels: result.customer_labels,
15064 account_labels: result.account_labels,
15065 relationship_labels: result.relationship_labels,
15066 narratives: result.narratives,
15067 suspicious_count: result.stats.suspicious_count,
15068 scenario_count: result.scenarios.len(),
15069 })
15070 }
15071
15072 fn calculate_total_transactions(&self) -> u64 {
15074 let months = self.config.global.period_months as f64;
15075 self.config
15076 .companies
15077 .iter()
15078 .map(|c| {
15079 let annual = c.annual_transaction_volume.count() as f64;
15080 let weighted = annual * c.volume_weight;
15081 (weighted * months / 12.0) as u64
15082 })
15083 .sum()
15084 }
15085
15086 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15088 if !self.phase_config.show_progress {
15089 return None;
15090 }
15091
15092 let pb = if let Some(mp) = &self.multi_progress {
15093 mp.add(ProgressBar::new(total))
15094 } else {
15095 ProgressBar::new(total)
15096 };
15097
15098 pb.set_style(
15099 ProgressStyle::default_bar()
15100 .template(&format!(
15101 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15102 ))
15103 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15104 .progress_chars("#>-"),
15105 );
15106
15107 Some(pb)
15108 }
15109
15110 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15112 self.coa.clone()
15113 }
15114
15115 pub fn get_master_data(&self) -> &MasterDataSnapshot {
15117 &self.master_data
15118 }
15119
15120 fn phase_compliance_regulations(
15122 &mut self,
15123 _stats: &mut EnhancedGenerationStatistics,
15124 ) -> SynthResult<ComplianceRegulationsSnapshot> {
15125 if !self.phase_config.generate_compliance_regulations {
15126 return Ok(ComplianceRegulationsSnapshot::default());
15127 }
15128
15129 info!("Phase: Generating Compliance Regulations Data");
15130
15131 let cr_config = &self.config.compliance_regulations;
15132
15133 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15135 self.config
15136 .companies
15137 .iter()
15138 .map(|c| c.country.clone())
15139 .collect::<std::collections::HashSet<_>>()
15140 .into_iter()
15141 .collect()
15142 } else {
15143 cr_config.jurisdictions.clone()
15144 };
15145
15146 let fallback_date =
15148 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15149 let reference_date = cr_config
15150 .reference_date
15151 .as_ref()
15152 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15153 .unwrap_or_else(|| {
15154 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15155 .unwrap_or(fallback_date)
15156 });
15157
15158 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15160 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15161 let cross_reference_records = reg_gen.generate_cross_reference_records();
15162 let jurisdiction_records =
15163 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15164
15165 info!(
15166 " Standards: {} records, {} cross-references, {} jurisdictions",
15167 standard_records.len(),
15168 cross_reference_records.len(),
15169 jurisdiction_records.len()
15170 );
15171
15172 let audit_procedures = if cr_config.audit_procedures.enabled {
15174 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15175 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15176 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15177 confidence_level: cr_config.audit_procedures.confidence_level,
15178 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15179 };
15180 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15181 self.seed + 9000,
15182 proc_config,
15183 );
15184 let registry = reg_gen.registry();
15185 let mut all_procs = Vec::new();
15186 for jurisdiction in &jurisdictions {
15187 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15188 all_procs.extend(procs);
15189 }
15190 info!(" Audit procedures: {}", all_procs.len());
15191 all_procs
15192 } else {
15193 Vec::new()
15194 };
15195
15196 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15198 let finding_config =
15199 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15200 finding_rate: cr_config.findings.finding_rate,
15201 material_weakness_rate: cr_config.findings.material_weakness_rate,
15202 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15203 generate_remediation: cr_config.findings.generate_remediation,
15204 };
15205 let mut finding_gen =
15206 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15207 self.seed + 9100,
15208 finding_config,
15209 );
15210 let mut all_findings = Vec::new();
15211 for company in &self.config.companies {
15212 let company_findings =
15213 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15214 all_findings.extend(company_findings);
15215 }
15216 info!(" Compliance findings: {}", all_findings.len());
15217 all_findings
15218 } else {
15219 Vec::new()
15220 };
15221
15222 let filings = if cr_config.filings.enabled {
15224 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15225 filing_types: cr_config.filings.filing_types.clone(),
15226 generate_status_progression: cr_config.filings.generate_status_progression,
15227 };
15228 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15229 self.seed + 9200,
15230 filing_config,
15231 );
15232 let company_codes: Vec<String> = self
15233 .config
15234 .companies
15235 .iter()
15236 .map(|c| c.code.clone())
15237 .collect();
15238 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15239 .unwrap_or(fallback_date);
15240 let filings = filing_gen.generate_filings(
15241 &company_codes,
15242 &jurisdictions,
15243 start_date,
15244 self.config.global.period_months,
15245 );
15246 info!(" Regulatory filings: {}", filings.len());
15247 filings
15248 } else {
15249 Vec::new()
15250 };
15251
15252 let compliance_graph = if cr_config.graph.enabled {
15254 let graph_config = datasynth_graph::ComplianceGraphConfig {
15255 include_standard_nodes: cr_config.graph.include_compliance_nodes,
15256 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15257 include_cross_references: cr_config.graph.include_cross_references,
15258 include_supersession_edges: cr_config.graph.include_supersession_edges,
15259 include_account_links: cr_config.graph.include_account_links,
15260 include_control_links: cr_config.graph.include_control_links,
15261 include_company_links: cr_config.graph.include_company_links,
15262 };
15263 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15264
15265 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15267 .iter()
15268 .map(|r| datasynth_graph::StandardNodeInput {
15269 standard_id: r.standard_id.clone(),
15270 title: r.title.clone(),
15271 category: r.category.clone(),
15272 domain: r.domain.clone(),
15273 is_active: r.is_active,
15274 features: vec![if r.is_active { 1.0 } else { 0.0 }],
15275 applicable_account_types: r.applicable_account_types.clone(),
15276 applicable_processes: r.applicable_processes.clone(),
15277 })
15278 .collect();
15279 builder.add_standards(&standard_inputs);
15280
15281 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15283 jurisdiction_records
15284 .iter()
15285 .map(|r| datasynth_graph::JurisdictionNodeInput {
15286 country_code: r.country_code.clone(),
15287 country_name: r.country_name.clone(),
15288 framework: r.accounting_framework.clone(),
15289 standard_count: r.standard_count,
15290 tax_rate: r.statutory_tax_rate,
15291 })
15292 .collect();
15293 builder.add_jurisdictions(&jurisdiction_inputs);
15294
15295 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15297 cross_reference_records
15298 .iter()
15299 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15300 from_standard: r.from_standard.clone(),
15301 to_standard: r.to_standard.clone(),
15302 relationship: r.relationship.clone(),
15303 convergence_level: r.convergence_level,
15304 })
15305 .collect();
15306 builder.add_cross_references(&xref_inputs);
15307
15308 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15310 .iter()
15311 .map(|r| datasynth_graph::JurisdictionMappingInput {
15312 country_code: r.jurisdiction.clone(),
15313 standard_id: r.standard_id.clone(),
15314 })
15315 .collect();
15316 builder.add_jurisdiction_mappings(&mapping_inputs);
15317
15318 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15320 .iter()
15321 .map(|p| datasynth_graph::ProcedureNodeInput {
15322 procedure_id: p.procedure_id.clone(),
15323 standard_id: p.standard_id.clone(),
15324 procedure_type: p.procedure_type.clone(),
15325 sample_size: p.sample_size,
15326 confidence_level: p.confidence_level,
15327 })
15328 .collect();
15329 builder.add_procedures(&proc_inputs);
15330
15331 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15333 .iter()
15334 .map(|f| datasynth_graph::FindingNodeInput {
15335 finding_id: f.finding_id.to_string(),
15336 standard_id: f
15337 .related_standards
15338 .first()
15339 .map(|s| s.as_str().to_string())
15340 .unwrap_or_default(),
15341 severity: f.severity.to_string(),
15342 deficiency_level: f.deficiency_level.to_string(),
15343 severity_score: f.deficiency_level.severity_score(),
15344 control_id: f.control_id.clone(),
15345 affected_accounts: f.affected_accounts.clone(),
15346 })
15347 .collect();
15348 builder.add_findings(&finding_inputs);
15349
15350 if cr_config.graph.include_account_links {
15352 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15353 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15354 for std_record in &standard_records {
15355 if let Some(std_obj) =
15356 registry.get(&datasynth_core::models::compliance::StandardId::parse(
15357 &std_record.standard_id,
15358 ))
15359 {
15360 for acct_type in &std_obj.applicable_account_types {
15361 account_links.push(datasynth_graph::AccountLinkInput {
15362 standard_id: std_record.standard_id.clone(),
15363 account_code: acct_type.clone(),
15364 account_name: acct_type.clone(),
15365 });
15366 }
15367 }
15368 }
15369 builder.add_account_links(&account_links);
15370 }
15371
15372 if cr_config.graph.include_control_links {
15374 let mut control_links = Vec::new();
15375 let sox_like_ids: Vec<String> = standard_records
15377 .iter()
15378 .filter(|r| {
15379 r.standard_id.starts_with("SOX")
15380 || r.standard_id.starts_with("PCAOB-AS-2201")
15381 })
15382 .map(|r| r.standard_id.clone())
15383 .collect();
15384 let control_ids = [
15386 ("C001", "Cash Controls"),
15387 ("C002", "Large Transaction Approval"),
15388 ("C010", "PO Approval"),
15389 ("C011", "Three-Way Match"),
15390 ("C020", "Revenue Recognition"),
15391 ("C021", "Credit Check"),
15392 ("C030", "Manual JE Approval"),
15393 ("C031", "Period Close Review"),
15394 ("C032", "Account Reconciliation"),
15395 ("C040", "Payroll Processing"),
15396 ("C050", "Fixed Asset Capitalization"),
15397 ("C060", "Intercompany Elimination"),
15398 ];
15399 for sox_id in &sox_like_ids {
15400 for (ctrl_id, ctrl_name) in &control_ids {
15401 control_links.push(datasynth_graph::ControlLinkInput {
15402 standard_id: sox_id.clone(),
15403 control_id: ctrl_id.to_string(),
15404 control_name: ctrl_name.to_string(),
15405 });
15406 }
15407 }
15408 builder.add_control_links(&control_links);
15409 }
15410
15411 if cr_config.graph.include_company_links {
15413 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15414 .iter()
15415 .enumerate()
15416 .map(|(i, f)| datasynth_graph::FilingNodeInput {
15417 filing_id: format!("F{:04}", i + 1),
15418 filing_type: f.filing_type.to_string(),
15419 company_code: f.company_code.clone(),
15420 jurisdiction: f.jurisdiction.clone(),
15421 status: format!("{:?}", f.status),
15422 })
15423 .collect();
15424 builder.add_filings(&filing_inputs);
15425 }
15426
15427 let graph = builder.build();
15428 info!(
15429 " Compliance graph: {} nodes, {} edges",
15430 graph.nodes.len(),
15431 graph.edges.len()
15432 );
15433 Some(graph)
15434 } else {
15435 None
15436 };
15437
15438 self.check_resources_with_log("post-compliance-regulations")?;
15439
15440 Ok(ComplianceRegulationsSnapshot {
15441 standard_records,
15442 cross_reference_records,
15443 jurisdiction_records,
15444 audit_procedures,
15445 findings,
15446 filings,
15447 compliance_graph,
15448 })
15449 }
15450
15451 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15453 use super::lineage::LineageGraphBuilder;
15454
15455 let mut builder = LineageGraphBuilder::new();
15456
15457 builder.add_config_section("config:global", "Global Config");
15459 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15460 builder.add_config_section("config:transactions", "Transaction Config");
15461
15462 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15464 builder.add_generator_phase("phase:je", "Journal Entry Generation");
15465
15466 builder.configured_by("phase:coa", "config:chart_of_accounts");
15468 builder.configured_by("phase:je", "config:transactions");
15469
15470 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15472 builder.produced_by("output:je", "phase:je");
15473
15474 if self.phase_config.generate_master_data {
15476 builder.add_config_section("config:master_data", "Master Data Config");
15477 builder.add_generator_phase("phase:master_data", "Master Data Generation");
15478 builder.configured_by("phase:master_data", "config:master_data");
15479 builder.input_to("phase:master_data", "phase:je");
15480 }
15481
15482 if self.phase_config.generate_document_flows {
15483 builder.add_config_section("config:document_flows", "Document Flow Config");
15484 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15485 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15486 builder.configured_by("phase:p2p", "config:document_flows");
15487 builder.configured_by("phase:o2c", "config:document_flows");
15488
15489 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15490 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15491 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15492 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15493 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15494
15495 builder.produced_by("output:po", "phase:p2p");
15496 builder.produced_by("output:gr", "phase:p2p");
15497 builder.produced_by("output:vi", "phase:p2p");
15498 builder.produced_by("output:so", "phase:o2c");
15499 builder.produced_by("output:ci", "phase:o2c");
15500 }
15501
15502 if self.phase_config.inject_anomalies {
15503 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15504 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15505 builder.configured_by("phase:anomaly", "config:fraud");
15506 builder.add_output_file(
15507 "output:labels",
15508 "Anomaly Labels",
15509 "labels/anomaly_labels.csv",
15510 );
15511 builder.produced_by("output:labels", "phase:anomaly");
15512 }
15513
15514 if self.phase_config.generate_audit {
15515 builder.add_config_section("config:audit", "Audit Config");
15516 builder.add_generator_phase("phase:audit", "Audit Data Generation");
15517 builder.configured_by("phase:audit", "config:audit");
15518 }
15519
15520 if self.phase_config.generate_banking {
15521 builder.add_config_section("config:banking", "Banking Config");
15522 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15523 builder.configured_by("phase:banking", "config:banking");
15524 }
15525
15526 if self.config.llm.enabled {
15527 builder.add_config_section("config:llm", "LLM Enrichment Config");
15528 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15529 builder.configured_by("phase:llm_enrichment", "config:llm");
15530 }
15531
15532 if self.config.diffusion.enabled {
15533 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15534 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15535 builder.configured_by("phase:diffusion", "config:diffusion");
15536 }
15537
15538 if self.config.causal.enabled {
15539 builder.add_config_section("config:causal", "Causal Generation Config");
15540 builder.add_generator_phase("phase:causal", "Causal Overlay");
15541 builder.configured_by("phase:causal", "config:causal");
15542 }
15543
15544 builder.build()
15545 }
15546
15547 fn compute_company_revenue(
15556 entries: &[JournalEntry],
15557 company_code: &str,
15558 ) -> rust_decimal::Decimal {
15559 use rust_decimal::Decimal;
15560 let mut revenue = Decimal::ZERO;
15561 for je in entries {
15562 if je.header.company_code != company_code {
15563 continue;
15564 }
15565 for line in &je.lines {
15566 if line.gl_account.starts_with('4') {
15567 revenue += line.credit_amount - line.debit_amount;
15569 }
15570 }
15571 }
15572 revenue.max(Decimal::ZERO)
15573 }
15574
15575 fn compute_entity_net_assets(
15579 entries: &[JournalEntry],
15580 entity_code: &str,
15581 ) -> rust_decimal::Decimal {
15582 use rust_decimal::Decimal;
15583 let mut asset_net = Decimal::ZERO;
15584 let mut liability_net = Decimal::ZERO;
15585 for je in entries {
15586 if je.header.company_code != entity_code {
15587 continue;
15588 }
15589 for line in &je.lines {
15590 if line.gl_account.starts_with('1') {
15591 asset_net += line.debit_amount - line.credit_amount;
15592 } else if line.gl_account.starts_with('2') {
15593 liability_net += line.credit_amount - line.debit_amount;
15594 }
15595 }
15596 }
15597 asset_net - liability_net
15598 }
15599
15600 fn phase_statistical_validation(
15611 &self,
15612 entries: &[JournalEntry],
15613 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15614 use datasynth_config::schema::StatisticalTestConfig;
15615 use datasynth_core::distributions::{
15616 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15617 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15618 };
15619 use rust_decimal::prelude::ToPrimitive;
15620
15621 let cfg = &self.config.distributions.validation;
15622 if !cfg.enabled {
15623 return Ok(None);
15624 }
15625
15626 let amounts: Vec<rust_decimal::Decimal> = entries
15629 .iter()
15630 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15631 .filter(|a| *a > rust_decimal::Decimal::ZERO)
15632 .collect();
15633
15634 let paired_amount_linecount: Vec<(f64, f64)> = entries
15638 .iter()
15639 .filter_map(|je| {
15640 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15641 if amt > rust_decimal::Decimal::ZERO {
15642 amt.to_f64().map(|a| (a, je.lines.len() as f64))
15643 } else {
15644 None
15645 }
15646 })
15647 .collect();
15648
15649 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15650 for test_cfg in &cfg.tests {
15651 match test_cfg {
15652 StatisticalTestConfig::BenfordFirstDigit {
15653 threshold_mad,
15654 warning_mad,
15655 } => {
15656 results.push(run_benford_first_digit(
15657 &amounts,
15658 *threshold_mad,
15659 *warning_mad,
15660 ));
15661 }
15662 StatisticalTestConfig::ChiSquared { bins, significance } => {
15663 results.push(run_chi_squared(&amounts, *bins, *significance));
15664 }
15665 StatisticalTestConfig::DistributionFit {
15666 target: _,
15667 ks_significance,
15668 method: _,
15669 } => {
15670 results.push(run_ks_uniform_log(&amounts, *ks_significance));
15673 }
15674 StatisticalTestConfig::AndersonDarling {
15675 target: _,
15676 significance,
15677 } => {
15678 results.push(run_anderson_darling(&amounts, *significance));
15681 }
15682 StatisticalTestConfig::CorrelationCheck {
15683 expected_correlations,
15684 } => {
15685 if expected_correlations.is_empty() {
15689 results.push(StatisticalTestResult {
15690 name: "correlation_check".to_string(),
15691 outcome: TestOutcome::Skipped,
15692 statistic: 0.0,
15693 threshold: 0.0,
15694 message: "no expected correlations declared".to_string(),
15695 });
15696 } else {
15697 for ec in expected_correlations {
15698 let pair_key = format!("{}_{}", ec.field1, ec.field2);
15699 let is_amount_linecount = (ec.field1 == "amount"
15700 && ec.field2 == "line_count")
15701 || (ec.field1 == "line_count" && ec.field2 == "amount");
15702 if is_amount_linecount {
15703 let xs: Vec<f64> =
15704 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15705 let ys: Vec<f64> =
15706 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15707 results.push(run_correlation_check(
15708 &pair_key,
15709 &xs,
15710 &ys,
15711 ec.expected_r,
15712 ec.tolerance,
15713 ));
15714 } else {
15715 results.push(StatisticalTestResult {
15716 name: format!("correlation_check_{pair_key}"),
15717 outcome: TestOutcome::Skipped,
15718 statistic: 0.0,
15719 threshold: ec.tolerance,
15720 message: format!(
15721 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15722 ec.field1, ec.field2
15723 ),
15724 });
15725 }
15726 }
15727 }
15728 }
15729 }
15730 }
15731
15732 let report = StatisticalValidationReport {
15733 sample_count: amounts.len(),
15734 results,
15735 };
15736
15737 if cfg.reporting.fail_on_error && !report.all_passed() {
15738 let failed = report.failed_names().join(", ");
15739 return Err(SynthError::validation(format!(
15740 "statistical validation failed: {failed}"
15741 )));
15742 }
15743
15744 Ok(Some(report))
15745 }
15746
15747 fn phase_analytics_metadata(
15760 &mut self,
15761 entries: &[JournalEntry],
15762 ) -> SynthResult<AnalyticsMetadataSnapshot> {
15763 use datasynth_generators::drift_event_generator::DriftEventGenerator;
15764 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15765 use datasynth_generators::management_report_generator::ManagementReportGenerator;
15766 use datasynth_generators::prior_year_generator::PriorYearGenerator;
15767 use std::collections::BTreeMap;
15768
15769 let mut snap = AnalyticsMetadataSnapshot::default();
15770
15771 if !self.phase_config.generate_analytics_metadata {
15772 return Ok(snap);
15773 }
15774
15775 let cfg = &self.config.analytics_metadata;
15776 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15777 .map(|d| d.year())
15778 .unwrap_or(2025);
15779
15780 if cfg.prior_year {
15782 let mut gen = PriorYearGenerator::new(self.seed + 9100);
15783 for company in &self.config.companies {
15784 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15787 BTreeMap::new();
15788 for je in entries {
15789 if je.header.company_code != company.code {
15790 continue;
15791 }
15792 for line in &je.lines {
15793 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15794 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15795 });
15796 entry.1 += line.debit_amount - line.credit_amount;
15797 }
15798 }
15799 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15800 .into_iter()
15801 .filter(|(_, (_, bal))| !bal.is_zero())
15802 .map(|(code, (name, bal))| (code, name, bal))
15803 .collect();
15804 if !current.is_empty() {
15805 let comparatives =
15806 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
15807 snap.prior_year_comparatives.extend(comparatives);
15808 }
15809 }
15810 info!(
15811 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15812 snap.prior_year_comparatives.len(),
15813 self.config.companies.len()
15814 );
15815 }
15816
15817 if cfg.industry_benchmark {
15819 use datasynth_core::models::IndustrySector;
15820 let industry = match self.config.global.industry {
15821 IndustrySector::Manufacturing => "manufacturing",
15822 IndustrySector::Retail => "retail",
15823 IndustrySector::FinancialServices => "financial_services",
15824 IndustrySector::Technology => "technology",
15825 IndustrySector::Healthcare => "healthcare",
15826 _ => "other",
15827 };
15828 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15829 let benchmarks = gen.generate(industry, fiscal_year);
15830 info!(
15831 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15832 benchmarks.len()
15833 );
15834 snap.industry_benchmarks = benchmarks;
15835 }
15836
15837 if cfg.management_reports {
15839 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15840 let period_months = self.config.global.period_months;
15841 for company in &self.config.companies {
15842 let reports =
15843 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15844 snap.management_reports.extend(reports);
15845 }
15846 info!(
15847 "v3.3.0 analytics: {} management reports across {} companies",
15848 snap.management_reports.len(),
15849 self.config.companies.len()
15850 );
15851 }
15852
15853 if cfg.drift_events {
15855 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15856 .expect("hardcoded NaiveDate 2025-01-01 is valid");
15857 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15858 .unwrap_or(fallback_start);
15859 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15860 let mut gen = DriftEventGenerator::new(self.seed + 9400);
15861 let drifts = gen.generate_standalone_drifts(start_date, end_date);
15862 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15863 snap.drift_events = drifts;
15864 }
15865 let _ = entries;
15867
15868 Ok(snap)
15869 }
15870}
15871
15872fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15874 match format {
15875 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15876 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15877 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15878 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15879 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15880 }
15881}
15882
15883fn compute_trial_balance_entries(
15888 entries: &[JournalEntry],
15889 entity_code: &str,
15890 fiscal_year: i32,
15891 coa: Option<&ChartOfAccounts>,
15892) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15893 use std::collections::BTreeMap;
15894
15895 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15896 BTreeMap::new();
15897
15898 for je in entries {
15899 for line in &je.lines {
15900 let entry = balances.entry(line.account_code.clone()).or_default();
15901 entry.0 += line.debit_amount;
15902 entry.1 += line.credit_amount;
15903 }
15904 }
15905
15906 balances
15907 .into_iter()
15908 .map(
15909 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15910 account_description: coa
15911 .and_then(|c| c.get_account(&account_code))
15912 .map(|a| a.description().to_string())
15913 .unwrap_or_else(|| account_code.clone()),
15914 account_code,
15915 debit_balance: debit,
15916 credit_balance: credit,
15917 net_balance: debit - credit,
15918 entity_code: entity_code.to_string(),
15919 period: format!("FY{}", fiscal_year),
15920 },
15921 )
15922 .collect()
15923}
15924
15925#[cfg(test)]
15926mod tests {
15927 use super::*;
15928 use datasynth_config::schema::*;
15929
15930 fn create_test_config() -> GeneratorConfig {
15931 GeneratorConfig {
15932 global: GlobalConfig {
15933 industry: IndustrySector::Manufacturing,
15934 start_date: "2024-01-01".to_string(),
15935 period_months: 1,
15936 seed: Some(42),
15937 parallel: false,
15938 group_currency: "USD".to_string(),
15939 presentation_currency: None,
15940 worker_threads: 0,
15941 memory_limit_mb: 0,
15942 fiscal_year_months: None,
15943 },
15944 companies: vec![CompanyConfig {
15945 code: "1000".to_string(),
15946 name: "Test Company".to_string(),
15947 currency: "USD".to_string(),
15948 functional_currency: None,
15949 country: "US".to_string(),
15950 annual_transaction_volume: TransactionVolume::TenK,
15951 volume_weight: 1.0,
15952 fiscal_year_variant: "K4".to_string(),
15953 }],
15954 chart_of_accounts: ChartOfAccountsConfig {
15955 complexity: CoAComplexity::Small,
15956 industry_specific: true,
15957 custom_accounts: None,
15958 min_hierarchy_depth: 2,
15959 max_hierarchy_depth: 4,
15960 expand_industry_subaccounts: false,
15961 },
15962 transactions: TransactionConfig::default(),
15963 output: OutputConfig::default(),
15964 fraud: FraudConfig::default(),
15965 internal_controls: InternalControlsConfig::default(),
15966 business_processes: BusinessProcessConfig::default(),
15967 user_personas: UserPersonaConfig::default(),
15968 templates: TemplateConfig::default(),
15969 approval: ApprovalConfig::default(),
15970 departments: DepartmentConfig::default(),
15971 master_data: MasterDataConfig::default(),
15972 document_flows: DocumentFlowConfig::default(),
15973 intercompany: IntercompanyConfig::default(),
15974 balance: BalanceConfig::default(),
15975 ocpm: OcpmConfig::default(),
15976 audit: AuditGenerationConfig::default(),
15977 banking: datasynth_banking::BankingConfig::default(),
15978 data_quality: DataQualitySchemaConfig::default(),
15979 scenario: ScenarioConfig::default(),
15980 temporal: TemporalDriftConfig::default(),
15981 graph_export: GraphExportConfig::default(),
15982 streaming: StreamingSchemaConfig::default(),
15983 rate_limit: RateLimitSchemaConfig::default(),
15984 temporal_attributes: TemporalAttributeSchemaConfig::default(),
15985 relationships: RelationshipSchemaConfig::default(),
15986 accounting_standards: AccountingStandardsConfig::default(),
15987 audit_standards: AuditStandardsConfig::default(),
15988 distributions: Default::default(),
15989 temporal_patterns: Default::default(),
15990 vendor_network: VendorNetworkSchemaConfig::default(),
15991 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15992 relationship_strength: RelationshipStrengthSchemaConfig::default(),
15993 cross_process_links: CrossProcessLinksSchemaConfig::default(),
15994 organizational_events: OrganizationalEventsSchemaConfig::default(),
15995 behavioral_drift: BehavioralDriftSchemaConfig::default(),
15996 market_drift: MarketDriftSchemaConfig::default(),
15997 drift_labeling: DriftLabelingSchemaConfig::default(),
15998 anomaly_injection: Default::default(),
15999 industry_specific: Default::default(),
16000 fingerprint_privacy: Default::default(),
16001 quality_gates: Default::default(),
16002 compliance: Default::default(),
16003 webhooks: Default::default(),
16004 llm: Default::default(),
16005 diffusion: Default::default(),
16006 causal: Default::default(),
16007 source_to_pay: Default::default(),
16008 financial_reporting: Default::default(),
16009 hr: Default::default(),
16010 manufacturing: Default::default(),
16011 sales_quotes: Default::default(),
16012 tax: Default::default(),
16013 treasury: Default::default(),
16014 project_accounting: Default::default(),
16015 esg: Default::default(),
16016 country_packs: None,
16017 scenarios: Default::default(),
16018 session: Default::default(),
16019 compliance_regulations: Default::default(),
16020 analytics_metadata: Default::default(),
16021 concentration: Default::default(),
16022 }
16023 }
16024
16025 #[test]
16026 fn test_enhanced_orchestrator_creation() {
16027 let config = create_test_config();
16028 let orchestrator = EnhancedOrchestrator::with_defaults(config);
16029 assert!(orchestrator.is_ok());
16030 }
16031
16032 #[test]
16033 fn test_minimal_generation() {
16034 let config = create_test_config();
16035 let phase_config = PhaseConfig {
16036 generate_master_data: false,
16037 generate_document_flows: false,
16038 generate_journal_entries: true,
16039 inject_anomalies: false,
16040 show_progress: false,
16041 ..Default::default()
16042 };
16043
16044 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16045 let result = orchestrator.generate();
16046
16047 assert!(result.is_ok());
16048 let result = result.unwrap();
16049 assert!(!result.journal_entries.is_empty());
16050 }
16051
16052 #[test]
16053 fn test_master_data_generation() {
16054 let config = create_test_config();
16055 let phase_config = PhaseConfig {
16056 generate_master_data: true,
16057 generate_document_flows: false,
16058 generate_journal_entries: false,
16059 inject_anomalies: false,
16060 show_progress: false,
16061 vendors_per_company: 5,
16062 customers_per_company: 5,
16063 materials_per_company: 10,
16064 assets_per_company: 5,
16065 employees_per_company: 10,
16066 ..Default::default()
16067 };
16068
16069 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16070 let result = orchestrator.generate().unwrap();
16071
16072 assert!(!result.master_data.vendors.is_empty());
16073 assert!(!result.master_data.customers.is_empty());
16074 assert!(!result.master_data.materials.is_empty());
16075 }
16076
16077 #[test]
16078 fn test_document_flow_generation() {
16079 let config = create_test_config();
16080 let phase_config = PhaseConfig {
16081 generate_master_data: true,
16082 generate_document_flows: true,
16083 generate_journal_entries: false,
16084 inject_anomalies: false,
16085 inject_data_quality: false,
16086 validate_balances: false,
16087 validate_coa_coverage_strict: false,
16088 generate_ocpm_events: false,
16089 show_progress: false,
16090 vendors_per_company: 5,
16091 customers_per_company: 5,
16092 materials_per_company: 10,
16093 assets_per_company: 5,
16094 employees_per_company: 10,
16095 p2p_chains: 5,
16096 o2c_chains: 5,
16097 ..Default::default()
16098 };
16099
16100 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16101 let result = orchestrator.generate().unwrap();
16102
16103 assert!(!result.document_flows.p2p_chains.is_empty());
16105 assert!(!result.document_flows.o2c_chains.is_empty());
16106
16107 assert!(!result.document_flows.purchase_orders.is_empty());
16109 assert!(!result.document_flows.sales_orders.is_empty());
16110 }
16111
16112 #[test]
16113 fn test_anomaly_injection() {
16114 let config = create_test_config();
16115 let phase_config = PhaseConfig {
16116 generate_master_data: false,
16117 generate_document_flows: false,
16118 generate_journal_entries: true,
16119 inject_anomalies: true,
16120 show_progress: false,
16121 ..Default::default()
16122 };
16123
16124 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16125 let result = orchestrator.generate().unwrap();
16126
16127 assert!(!result.journal_entries.is_empty());
16129
16130 assert!(result.anomaly_labels.summary.is_some());
16133 }
16134
16135 #[test]
16136 fn test_full_generation_pipeline() {
16137 let config = create_test_config();
16138 let phase_config = PhaseConfig {
16139 generate_master_data: true,
16140 generate_document_flows: true,
16141 generate_journal_entries: true,
16142 inject_anomalies: false,
16143 inject_data_quality: false,
16144 validate_balances: true,
16145 validate_coa_coverage_strict: false,
16146 generate_ocpm_events: false,
16147 show_progress: false,
16148 vendors_per_company: 3,
16149 customers_per_company: 3,
16150 materials_per_company: 5,
16151 assets_per_company: 3,
16152 employees_per_company: 5,
16153 p2p_chains: 3,
16154 o2c_chains: 3,
16155 ..Default::default()
16156 };
16157
16158 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16159 let result = orchestrator.generate().unwrap();
16160
16161 assert!(!result.master_data.vendors.is_empty());
16163 assert!(!result.master_data.customers.is_empty());
16164 assert!(!result.document_flows.p2p_chains.is_empty());
16165 assert!(!result.document_flows.o2c_chains.is_empty());
16166 assert!(!result.journal_entries.is_empty());
16167 assert!(result.statistics.accounts_count > 0);
16168
16169 assert!(!result.subledger.ap_invoices.is_empty());
16171 assert!(!result.subledger.ar_invoices.is_empty());
16172
16173 assert!(result.balance_validation.validated);
16175 assert!(result.balance_validation.entries_processed > 0);
16176 }
16177
16178 #[test]
16179 fn test_subledger_linking() {
16180 let config = create_test_config();
16181 let phase_config = PhaseConfig {
16182 generate_master_data: true,
16183 generate_document_flows: true,
16184 generate_journal_entries: false,
16185 inject_anomalies: false,
16186 inject_data_quality: false,
16187 validate_balances: false,
16188 validate_coa_coverage_strict: false,
16189 generate_ocpm_events: false,
16190 show_progress: false,
16191 vendors_per_company: 5,
16192 customers_per_company: 5,
16193 materials_per_company: 10,
16194 assets_per_company: 3,
16195 employees_per_company: 5,
16196 p2p_chains: 5,
16197 o2c_chains: 5,
16198 ..Default::default()
16199 };
16200
16201 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16202 let result = orchestrator.generate().unwrap();
16203
16204 assert!(!result.document_flows.vendor_invoices.is_empty());
16206 assert!(!result.document_flows.customer_invoices.is_empty());
16207
16208 assert!(!result.subledger.ap_invoices.is_empty());
16210 assert!(!result.subledger.ar_invoices.is_empty());
16211
16212 assert_eq!(
16214 result.subledger.ap_invoices.len(),
16215 result.document_flows.vendor_invoices.len()
16216 );
16217
16218 assert_eq!(
16220 result.subledger.ar_invoices.len(),
16221 result.document_flows.customer_invoices.len()
16222 );
16223
16224 assert_eq!(
16226 result.statistics.ap_invoice_count,
16227 result.subledger.ap_invoices.len()
16228 );
16229 assert_eq!(
16230 result.statistics.ar_invoice_count,
16231 result.subledger.ar_invoices.len()
16232 );
16233 }
16234
16235 #[test]
16236 fn test_balance_validation() {
16237 let config = create_test_config();
16238 let phase_config = PhaseConfig {
16239 generate_master_data: false,
16240 generate_document_flows: false,
16241 generate_journal_entries: true,
16242 inject_anomalies: false,
16243 validate_balances: true,
16244 validate_coa_coverage_strict: false,
16245 show_progress: false,
16246 ..Default::default()
16247 };
16248
16249 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16250 let result = orchestrator.generate().unwrap();
16251
16252 assert!(result.balance_validation.validated);
16254 assert!(result.balance_validation.entries_processed > 0);
16255
16256 assert!(!result.balance_validation.has_unbalanced_entries);
16258
16259 assert_eq!(
16261 result.balance_validation.total_debits,
16262 result.balance_validation.total_credits
16263 );
16264 }
16265
16266 #[test]
16267 fn test_statistics_accuracy() {
16268 let config = create_test_config();
16269 let phase_config = PhaseConfig {
16270 generate_master_data: true,
16271 generate_document_flows: false,
16272 generate_journal_entries: true,
16273 inject_anomalies: false,
16274 show_progress: false,
16275 vendors_per_company: 10,
16276 customers_per_company: 20,
16277 materials_per_company: 15,
16278 assets_per_company: 5,
16279 employees_per_company: 8,
16280 ..Default::default()
16281 };
16282
16283 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16284 let result = orchestrator.generate().unwrap();
16285
16286 assert_eq!(
16288 result.statistics.vendor_count,
16289 result.master_data.vendors.len()
16290 );
16291 assert_eq!(
16292 result.statistics.customer_count,
16293 result.master_data.customers.len()
16294 );
16295 assert_eq!(
16296 result.statistics.material_count,
16297 result.master_data.materials.len()
16298 );
16299 assert_eq!(
16300 result.statistics.total_entries as usize,
16301 result.journal_entries.len()
16302 );
16303 }
16304
16305 #[test]
16306 fn test_phase_config_defaults() {
16307 let config = PhaseConfig::default();
16308 assert!(config.generate_master_data);
16309 assert!(config.generate_document_flows);
16310 assert!(config.generate_journal_entries);
16311 assert!(!config.inject_anomalies);
16312 assert!(config.validate_balances);
16313 assert!(config.show_progress);
16314 assert!(config.vendors_per_company > 0);
16315 assert!(config.customers_per_company > 0);
16316 }
16317
16318 #[test]
16319 fn test_get_coa_before_generation() {
16320 let config = create_test_config();
16321 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16322
16323 assert!(orchestrator.get_coa().is_none());
16325 }
16326
16327 #[test]
16328 fn test_get_coa_after_generation() {
16329 let config = create_test_config();
16330 let phase_config = PhaseConfig {
16331 generate_master_data: false,
16332 generate_document_flows: false,
16333 generate_journal_entries: true,
16334 inject_anomalies: false,
16335 show_progress: false,
16336 ..Default::default()
16337 };
16338
16339 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16340 let _ = orchestrator.generate().unwrap();
16341
16342 assert!(orchestrator.get_coa().is_some());
16344 }
16345
16346 #[test]
16347 fn test_get_master_data() {
16348 let config = create_test_config();
16349 let phase_config = PhaseConfig {
16350 generate_master_data: true,
16351 generate_document_flows: false,
16352 generate_journal_entries: false,
16353 inject_anomalies: false,
16354 show_progress: false,
16355 vendors_per_company: 5,
16356 customers_per_company: 5,
16357 materials_per_company: 5,
16358 assets_per_company: 5,
16359 employees_per_company: 5,
16360 ..Default::default()
16361 };
16362
16363 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16364 let result = orchestrator.generate().unwrap();
16365
16366 assert!(!result.master_data.vendors.is_empty());
16368 }
16369
16370 #[test]
16371 fn test_with_progress_builder() {
16372 let config = create_test_config();
16373 let orchestrator = EnhancedOrchestrator::with_defaults(config)
16374 .unwrap()
16375 .with_progress(false);
16376
16377 assert!(!orchestrator.phase_config.show_progress);
16379 }
16380
16381 #[test]
16382 fn test_multi_company_generation() {
16383 let mut config = create_test_config();
16384 config.companies.push(CompanyConfig {
16385 code: "2000".to_string(),
16386 name: "Subsidiary".to_string(),
16387 currency: "EUR".to_string(),
16388 functional_currency: None,
16389 country: "DE".to_string(),
16390 annual_transaction_volume: TransactionVolume::TenK,
16391 volume_weight: 0.5,
16392 fiscal_year_variant: "K4".to_string(),
16393 });
16394
16395 let phase_config = PhaseConfig {
16396 generate_master_data: true,
16397 generate_document_flows: false,
16398 generate_journal_entries: true,
16399 inject_anomalies: false,
16400 show_progress: false,
16401 vendors_per_company: 5,
16402 customers_per_company: 5,
16403 materials_per_company: 5,
16404 assets_per_company: 5,
16405 employees_per_company: 5,
16406 ..Default::default()
16407 };
16408
16409 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16410 let result = orchestrator.generate().unwrap();
16411
16412 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
16415 assert!(result.statistics.companies_count == 2);
16416 }
16417
16418 #[test]
16419 fn test_empty_master_data_skips_document_flows() {
16420 let config = create_test_config();
16421 let phase_config = PhaseConfig {
16422 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
16425 inject_anomalies: false,
16426 show_progress: false,
16427 ..Default::default()
16428 };
16429
16430 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16431 let result = orchestrator.generate().unwrap();
16432
16433 assert!(result.document_flows.p2p_chains.is_empty());
16435 assert!(result.document_flows.o2c_chains.is_empty());
16436 }
16437
16438 #[test]
16439 fn test_journal_entry_line_item_count() {
16440 let config = create_test_config();
16441 let phase_config = PhaseConfig {
16442 generate_master_data: false,
16443 generate_document_flows: false,
16444 generate_journal_entries: true,
16445 inject_anomalies: false,
16446 show_progress: false,
16447 ..Default::default()
16448 };
16449
16450 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16451 let result = orchestrator.generate().unwrap();
16452
16453 let calculated_line_items: u64 = result
16455 .journal_entries
16456 .iter()
16457 .map(|e| e.line_count() as u64)
16458 .sum();
16459 assert_eq!(result.statistics.total_line_items, calculated_line_items);
16460 }
16461
16462 #[test]
16463 fn test_audit_generation() {
16464 let config = create_test_config();
16465 let phase_config = PhaseConfig {
16466 generate_master_data: false,
16467 generate_document_flows: false,
16468 generate_journal_entries: true,
16469 inject_anomalies: false,
16470 show_progress: false,
16471 generate_audit: true,
16472 audit_engagements: 2,
16473 workpapers_per_engagement: 5,
16474 evidence_per_workpaper: 2,
16475 risks_per_engagement: 3,
16476 findings_per_engagement: 2,
16477 judgments_per_engagement: 2,
16478 ..Default::default()
16479 };
16480
16481 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16482 let result = orchestrator.generate().unwrap();
16483
16484 assert_eq!(result.audit.engagements.len(), 2);
16486 assert!(!result.audit.workpapers.is_empty());
16487 assert!(!result.audit.evidence.is_empty());
16488 assert!(!result.audit.risk_assessments.is_empty());
16489 assert!(!result.audit.findings.is_empty());
16490 assert!(!result.audit.judgments.is_empty());
16491
16492 assert!(
16494 !result.audit.confirmations.is_empty(),
16495 "ISA 505 confirmations should be generated"
16496 );
16497 assert!(
16498 !result.audit.confirmation_responses.is_empty(),
16499 "ISA 505 confirmation responses should be generated"
16500 );
16501 assert!(
16502 !result.audit.procedure_steps.is_empty(),
16503 "ISA 330 procedure steps should be generated"
16504 );
16505 assert!(
16507 !result.audit.analytical_results.is_empty(),
16508 "ISA 520 analytical procedures should be generated"
16509 );
16510 assert!(
16511 !result.audit.ia_functions.is_empty(),
16512 "ISA 610 IA functions should be generated (one per engagement)"
16513 );
16514 assert!(
16515 !result.audit.related_parties.is_empty(),
16516 "ISA 550 related parties should be generated"
16517 );
16518
16519 assert_eq!(
16521 result.statistics.audit_engagement_count,
16522 result.audit.engagements.len()
16523 );
16524 assert_eq!(
16525 result.statistics.audit_workpaper_count,
16526 result.audit.workpapers.len()
16527 );
16528 assert_eq!(
16529 result.statistics.audit_evidence_count,
16530 result.audit.evidence.len()
16531 );
16532 assert_eq!(
16533 result.statistics.audit_risk_count,
16534 result.audit.risk_assessments.len()
16535 );
16536 assert_eq!(
16537 result.statistics.audit_finding_count,
16538 result.audit.findings.len()
16539 );
16540 assert_eq!(
16541 result.statistics.audit_judgment_count,
16542 result.audit.judgments.len()
16543 );
16544 assert_eq!(
16545 result.statistics.audit_confirmation_count,
16546 result.audit.confirmations.len()
16547 );
16548 assert_eq!(
16549 result.statistics.audit_confirmation_response_count,
16550 result.audit.confirmation_responses.len()
16551 );
16552 assert_eq!(
16553 result.statistics.audit_procedure_step_count,
16554 result.audit.procedure_steps.len()
16555 );
16556 assert_eq!(
16557 result.statistics.audit_sample_count,
16558 result.audit.samples.len()
16559 );
16560 assert_eq!(
16561 result.statistics.audit_analytical_result_count,
16562 result.audit.analytical_results.len()
16563 );
16564 assert_eq!(
16565 result.statistics.audit_ia_function_count,
16566 result.audit.ia_functions.len()
16567 );
16568 assert_eq!(
16569 result.statistics.audit_ia_report_count,
16570 result.audit.ia_reports.len()
16571 );
16572 assert_eq!(
16573 result.statistics.audit_related_party_count,
16574 result.audit.related_parties.len()
16575 );
16576 assert_eq!(
16577 result.statistics.audit_related_party_transaction_count,
16578 result.audit.related_party_transactions.len()
16579 );
16580 }
16581
16582 #[test]
16583 fn test_new_phases_disabled_by_default() {
16584 let config = create_test_config();
16585 assert!(!config.llm.enabled);
16587 assert!(!config.diffusion.enabled);
16588 assert!(!config.causal.enabled);
16589
16590 let phase_config = PhaseConfig {
16591 generate_master_data: false,
16592 generate_document_flows: false,
16593 generate_journal_entries: true,
16594 inject_anomalies: false,
16595 show_progress: false,
16596 ..Default::default()
16597 };
16598
16599 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16600 let result = orchestrator.generate().unwrap();
16601
16602 assert_eq!(result.statistics.llm_enrichment_ms, 0);
16604 assert_eq!(result.statistics.llm_vendors_enriched, 0);
16605 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16606 assert_eq!(result.statistics.diffusion_samples_generated, 0);
16607 assert_eq!(result.statistics.causal_generation_ms, 0);
16608 assert_eq!(result.statistics.causal_samples_generated, 0);
16609 assert!(result.statistics.causal_validation_passed.is_none());
16610 assert_eq!(result.statistics.counterfactual_pair_count, 0);
16611 assert!(result.counterfactual_pairs.is_empty());
16612 }
16613
16614 #[test]
16615 fn test_counterfactual_generation_enabled() {
16616 let config = create_test_config();
16617 let phase_config = PhaseConfig {
16618 generate_master_data: false,
16619 generate_document_flows: false,
16620 generate_journal_entries: true,
16621 inject_anomalies: false,
16622 show_progress: false,
16623 generate_counterfactuals: true,
16624 generate_period_close: false, ..Default::default()
16626 };
16627
16628 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16629 let result = orchestrator.generate().unwrap();
16630
16631 if !result.journal_entries.is_empty() {
16633 assert_eq!(
16634 result.counterfactual_pairs.len(),
16635 result.journal_entries.len()
16636 );
16637 assert_eq!(
16638 result.statistics.counterfactual_pair_count,
16639 result.journal_entries.len()
16640 );
16641 let ids: std::collections::HashSet<_> = result
16643 .counterfactual_pairs
16644 .iter()
16645 .map(|p| p.pair_id.clone())
16646 .collect();
16647 assert_eq!(ids.len(), result.counterfactual_pairs.len());
16648 }
16649 }
16650
16651 #[test]
16652 fn test_llm_enrichment_enabled() {
16653 let mut config = create_test_config();
16654 config.llm.enabled = true;
16655 config.llm.max_vendor_enrichments = 3;
16656
16657 let phase_config = PhaseConfig {
16658 generate_master_data: true,
16659 generate_document_flows: false,
16660 generate_journal_entries: false,
16661 inject_anomalies: false,
16662 show_progress: false,
16663 vendors_per_company: 5,
16664 customers_per_company: 3,
16665 materials_per_company: 3,
16666 assets_per_company: 3,
16667 employees_per_company: 3,
16668 ..Default::default()
16669 };
16670
16671 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16672 let result = orchestrator.generate().unwrap();
16673
16674 assert!(result.statistics.llm_vendors_enriched > 0);
16676 assert!(result.statistics.llm_vendors_enriched <= 3);
16677 }
16678
16679 #[test]
16680 fn test_diffusion_enhancement_enabled() {
16681 let mut config = create_test_config();
16682 config.diffusion.enabled = true;
16683 config.diffusion.n_steps = 50;
16684 config.diffusion.sample_size = 20;
16685
16686 let phase_config = PhaseConfig {
16687 generate_master_data: false,
16688 generate_document_flows: false,
16689 generate_journal_entries: true,
16690 inject_anomalies: false,
16691 show_progress: false,
16692 ..Default::default()
16693 };
16694
16695 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16696 let result = orchestrator.generate().unwrap();
16697
16698 assert_eq!(result.statistics.diffusion_samples_generated, 20);
16700 }
16701
16702 #[test]
16703 fn test_causal_overlay_enabled() {
16704 let mut config = create_test_config();
16705 config.causal.enabled = true;
16706 config.causal.template = "fraud_detection".to_string();
16707 config.causal.sample_size = 100;
16708 config.causal.validate = true;
16709
16710 let phase_config = PhaseConfig {
16711 generate_master_data: false,
16712 generate_document_flows: false,
16713 generate_journal_entries: true,
16714 inject_anomalies: false,
16715 show_progress: false,
16716 ..Default::default()
16717 };
16718
16719 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16720 let result = orchestrator.generate().unwrap();
16721
16722 assert_eq!(result.statistics.causal_samples_generated, 100);
16724 assert!(result.statistics.causal_validation_passed.is_some());
16726 }
16727
16728 #[test]
16729 fn test_causal_overlay_revenue_cycle_template() {
16730 let mut config = create_test_config();
16731 config.causal.enabled = true;
16732 config.causal.template = "revenue_cycle".to_string();
16733 config.causal.sample_size = 50;
16734 config.causal.validate = false;
16735
16736 let phase_config = PhaseConfig {
16737 generate_master_data: false,
16738 generate_document_flows: false,
16739 generate_journal_entries: true,
16740 inject_anomalies: false,
16741 show_progress: false,
16742 ..Default::default()
16743 };
16744
16745 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16746 let result = orchestrator.generate().unwrap();
16747
16748 assert_eq!(result.statistics.causal_samples_generated, 50);
16750 assert!(result.statistics.causal_validation_passed.is_none());
16752 }
16753
16754 #[test]
16755 fn test_all_new_phases_enabled_together() {
16756 let mut config = create_test_config();
16757 config.llm.enabled = true;
16758 config.llm.max_vendor_enrichments = 2;
16759 config.diffusion.enabled = true;
16760 config.diffusion.n_steps = 20;
16761 config.diffusion.sample_size = 10;
16762 config.causal.enabled = true;
16763 config.causal.sample_size = 50;
16764 config.causal.validate = true;
16765
16766 let phase_config = PhaseConfig {
16767 generate_master_data: true,
16768 generate_document_flows: false,
16769 generate_journal_entries: true,
16770 inject_anomalies: false,
16771 show_progress: false,
16772 vendors_per_company: 5,
16773 customers_per_company: 3,
16774 materials_per_company: 3,
16775 assets_per_company: 3,
16776 employees_per_company: 3,
16777 ..Default::default()
16778 };
16779
16780 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16781 let result = orchestrator.generate().unwrap();
16782
16783 assert!(result.statistics.llm_vendors_enriched > 0);
16785 assert_eq!(result.statistics.diffusion_samples_generated, 10);
16786 assert_eq!(result.statistics.causal_samples_generated, 50);
16787 assert!(result.statistics.causal_validation_passed.is_some());
16788 }
16789
16790 #[test]
16791 fn test_statistics_serialization_with_new_fields() {
16792 let stats = EnhancedGenerationStatistics {
16793 total_entries: 100,
16794 total_line_items: 500,
16795 llm_enrichment_ms: 42,
16796 llm_vendors_enriched: 10,
16797 diffusion_enhancement_ms: 100,
16798 diffusion_samples_generated: 50,
16799 causal_generation_ms: 200,
16800 causal_samples_generated: 100,
16801 causal_validation_passed: Some(true),
16802 ..Default::default()
16803 };
16804
16805 let json = serde_json::to_string(&stats).unwrap();
16806 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16807
16808 assert_eq!(deserialized.llm_enrichment_ms, 42);
16809 assert_eq!(deserialized.llm_vendors_enriched, 10);
16810 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16811 assert_eq!(deserialized.diffusion_samples_generated, 50);
16812 assert_eq!(deserialized.causal_generation_ms, 200);
16813 assert_eq!(deserialized.causal_samples_generated, 100);
16814 assert_eq!(deserialized.causal_validation_passed, Some(true));
16815 }
16816
16817 #[test]
16818 fn test_statistics_backward_compat_deserialization() {
16819 let old_json = r#"{
16821 "total_entries": 100,
16822 "total_line_items": 500,
16823 "accounts_count": 50,
16824 "companies_count": 1,
16825 "period_months": 12,
16826 "vendor_count": 10,
16827 "customer_count": 20,
16828 "material_count": 15,
16829 "asset_count": 5,
16830 "employee_count": 8,
16831 "p2p_chain_count": 5,
16832 "o2c_chain_count": 5,
16833 "ap_invoice_count": 5,
16834 "ar_invoice_count": 5,
16835 "ocpm_event_count": 0,
16836 "ocpm_object_count": 0,
16837 "ocpm_case_count": 0,
16838 "audit_engagement_count": 0,
16839 "audit_workpaper_count": 0,
16840 "audit_evidence_count": 0,
16841 "audit_risk_count": 0,
16842 "audit_finding_count": 0,
16843 "audit_judgment_count": 0,
16844 "anomalies_injected": 0,
16845 "data_quality_issues": 0,
16846 "banking_customer_count": 0,
16847 "banking_account_count": 0,
16848 "banking_transaction_count": 0,
16849 "banking_suspicious_count": 0,
16850 "graph_export_count": 0,
16851 "graph_node_count": 0,
16852 "graph_edge_count": 0
16853 }"#;
16854
16855 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16856
16857 assert_eq!(stats.llm_enrichment_ms, 0);
16859 assert_eq!(stats.llm_vendors_enriched, 0);
16860 assert_eq!(stats.diffusion_enhancement_ms, 0);
16861 assert_eq!(stats.diffusion_samples_generated, 0);
16862 assert_eq!(stats.causal_generation_ms, 0);
16863 assert_eq!(stats.causal_samples_generated, 0);
16864 assert!(stats.causal_validation_passed.is_none());
16865 }
16866}