1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164 AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165 TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195 #[allow(clippy::field_reassign_with_default)]
196 {
197 let mut s = DataQualityStats::default();
198 s.total_records = n_entries;
199 s.missing_values.total_records = n_entries;
200 s.format_variations.total_processed = n_entries;
201 s.duplicates.total_processed = n_entries;
202 s
203 }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207 let payment_behavior = &schema_config.payment_behavior;
208 let late_dist = &payment_behavior.late_payment_days_distribution;
209
210 P2PGeneratorConfig {
211 three_way_match_rate: schema_config.three_way_match_rate,
212 partial_delivery_rate: schema_config.partial_delivery_rate,
213 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214 price_variance_rate: schema_config.price_variance_rate,
215 max_price_variance_percent: schema_config.max_price_variance_percent,
216 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219 payment_method_distribution: vec![
220 (PaymentMethod::BankTransfer, 0.60),
221 (PaymentMethod::Check, 0.25),
222 (PaymentMethod::Wire, 0.10),
223 (PaymentMethod::CreditCard, 0.05),
224 ],
225 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226 payment_behavior: P2PPaymentBehavior {
227 late_payment_rate: payment_behavior.late_payment_rate,
228 late_payment_distribution: LatePaymentDistribution {
229 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230 late_8_to_14: late_dist.late_8_to_14,
231 very_late_15_to_30: late_dist.very_late_15_to_30,
232 severely_late_31_to_60: late_dist.severely_late_31_to_60,
233 extremely_late_over_60: late_dist.extremely_late_over_60,
234 },
235 partial_payment_rate: payment_behavior.partial_payment_rate,
236 payment_correction_rate: payment_behavior.payment_correction_rate,
237 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238 },
239 }
240}
241
242fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244 let payment_behavior = &schema_config.payment_behavior;
245
246 O2CGeneratorConfig {
247 credit_check_failure_rate: schema_config.credit_check_failure_rate,
248 partial_shipment_rate: schema_config.partial_shipment_rate,
249 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253 bad_debt_rate: schema_config.bad_debt_rate,
254 returns_rate: schema_config.return_rate,
255 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256 payment_method_distribution: vec![
257 (PaymentMethod::BankTransfer, 0.50),
258 (PaymentMethod::Check, 0.30),
259 (PaymentMethod::Wire, 0.15),
260 (PaymentMethod::CreditCard, 0.05),
261 ],
262 payment_behavior: O2CPaymentBehavior {
263 partial_payment_rate: payment_behavior.partial_payments.rate,
264 short_payment_rate: payment_behavior.short_payments.rate,
265 max_short_percent: payment_behavior.short_payments.max_short_percent,
266 on_account_rate: payment_behavior.on_account_payments.rate,
267 payment_correction_rate: payment_behavior.payment_corrections.rate,
268 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269 },
270 }
271}
272
273#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276 pub generate_master_data: bool,
278 pub generate_document_flows: bool,
280 pub generate_ocpm_events: bool,
282 pub generate_journal_entries: bool,
284 pub inject_anomalies: bool,
286 pub inject_data_quality: bool,
288 pub validate_balances: bool,
290 pub validate_coa_coverage_strict: bool,
294 pub show_progress: bool,
296 pub vendors_per_company: usize,
298 pub customers_per_company: usize,
300 pub materials_per_company: usize,
302 pub assets_per_company: usize,
304 pub employees_per_company: usize,
306 pub p2p_chains: usize,
308 pub o2c_chains: usize,
310 pub generate_audit: bool,
312 pub audit_engagements: usize,
314 pub workpapers_per_engagement: usize,
316 pub evidence_per_workpaper: usize,
318 pub risks_per_engagement: usize,
320 pub findings_per_engagement: usize,
322 pub judgments_per_engagement: usize,
324 pub generate_banking: bool,
326 pub generate_graph_export: bool,
328 pub generate_sourcing: bool,
330 pub generate_bank_reconciliation: bool,
332 pub generate_financial_statements: bool,
334 pub generate_accounting_standards: bool,
336 pub generate_manufacturing: bool,
338 pub generate_sales_kpi_budgets: bool,
340 pub generate_tax: bool,
342 pub generate_esg: bool,
344 pub generate_intercompany: bool,
346 pub generate_evolution_events: bool,
348 pub generate_counterfactuals: bool,
350 pub generate_compliance_regulations: bool,
352 pub generate_period_close: bool,
354 pub generate_hr: bool,
356 pub generate_treasury: bool,
358 pub generate_project_accounting: bool,
360 pub generate_legal_documents: bool,
364 pub generate_it_controls: bool,
368 pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376 fn default() -> Self {
377 Self {
378 generate_master_data: true,
379 generate_document_flows: true,
380 generate_ocpm_events: false, generate_journal_entries: true,
382 inject_anomalies: false,
383 inject_data_quality: false, validate_balances: true,
385 validate_coa_coverage_strict: false,
386 show_progress: true,
387 vendors_per_company: 50,
388 customers_per_company: 100,
389 materials_per_company: 200,
390 assets_per_company: 50,
391 employees_per_company: 100,
392 p2p_chains: 100,
393 o2c_chains: 100,
394 generate_audit: false, audit_engagements: 5,
396 workpapers_per_engagement: 20,
397 evidence_per_workpaper: 5,
398 risks_per_engagement: 15,
399 findings_per_engagement: 8,
400 judgments_per_engagement: 10,
401 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
423 }
424}
425
426impl PhaseConfig {
427 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432 Self {
433 generate_master_data: true,
435 generate_document_flows: true,
436 generate_journal_entries: true,
437 validate_balances: true,
438 validate_coa_coverage_strict: false,
439 generate_period_close: true,
440 generate_evolution_events: true,
441 show_progress: true,
442
443 generate_audit: cfg.audit.enabled,
445 generate_banking: cfg.banking.enabled,
446 generate_graph_export: cfg.graph_export.enabled,
447 generate_sourcing: cfg.source_to_pay.enabled,
448 generate_intercompany: cfg.intercompany.enabled,
449 generate_financial_statements: cfg.financial_reporting.enabled,
450 generate_bank_reconciliation: cfg.financial_reporting.enabled,
451 generate_accounting_standards: cfg.accounting_standards.enabled,
452 generate_manufacturing: cfg.manufacturing.enabled,
453 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454 generate_tax: cfg.tax.enabled,
455 generate_esg: cfg.esg.enabled,
456 generate_ocpm_events: cfg.ocpm.enabled,
457 generate_compliance_regulations: cfg.compliance_regulations.enabled,
458 generate_hr: cfg.hr.enabled,
459 generate_treasury: cfg.treasury.enabled,
460 generate_project_accounting: cfg.project_accounting.enabled,
461
462 generate_legal_documents: cfg.compliance_regulations.enabled
466 && cfg.compliance_regulations.legal_documents.enabled,
467 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470 generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478 inject_data_quality: cfg.data_quality.enabled,
479
480 vendors_per_company: 50,
482 customers_per_company: 100,
483 materials_per_company: 200,
484 assets_per_company: 50,
485 employees_per_company: 100,
486 p2p_chains: 100,
487 o2c_chains: 100,
488 audit_engagements: 5,
489 workpapers_per_engagement: 20,
490 evidence_per_workpaper: 5,
491 risks_per_engagement: 15,
492 findings_per_engagement: 8,
493 judgments_per_engagement: 10,
494 }
495 }
496}
497
498#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501 pub vendors: Vec<Vendor>,
503 pub customers: Vec<Customer>,
505 pub materials: Vec<Material>,
507 pub assets: Vec<FixedAsset>,
509 pub employees: Vec<Employee>,
511 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513 pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528 pub node_count: usize,
530 pub edge_count: usize,
532 pub hyperedge_count: usize,
534 pub output_path: PathBuf,
536}
537
538#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541 pub p2p_chains: Vec<P2PDocumentChain>,
543 pub o2c_chains: Vec<O2CDocumentChain>,
545 pub purchase_orders: Vec<documents::PurchaseOrder>,
547 pub goods_receipts: Vec<documents::GoodsReceipt>,
549 pub vendor_invoices: Vec<documents::VendorInvoice>,
551 pub sales_orders: Vec<documents::SalesOrder>,
553 pub deliveries: Vec<documents::Delivery>,
555 pub customer_invoices: Vec<documents::CustomerInvoice>,
557 pub payments: Vec<documents::Payment>,
559 pub document_references: Vec<documents::DocumentReference>,
562}
563
564#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567 pub ap_invoices: Vec<APInvoice>,
569 pub ar_invoices: Vec<ARInvoice>,
571 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577 pub ar_aging_reports: Vec<ARAgingReport>,
579 pub ap_aging_reports: Vec<APAgingReport>,
581 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594 pub event_log: Option<OcpmEventLog>,
596 pub event_count: usize,
598 pub object_count: usize,
600 pub case_count: usize,
602}
603
604#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607 pub engagements: Vec<AuditEngagement>,
609 pub workpapers: Vec<Workpaper>,
611 pub evidence: Vec<AuditEvidence>,
613 pub risk_assessments: Vec<RiskAssessment>,
615 pub findings: Vec<AuditFinding>,
617 pub judgments: Vec<ProfessionalJudgment>,
619 pub confirmations: Vec<ExternalConfirmation>,
621 pub confirmation_responses: Vec<ConfirmationResponse>,
623 pub procedure_steps: Vec<AuditProcedureStep>,
625 pub samples: Vec<AuditSample>,
627 pub analytical_results: Vec<AnalyticalProcedureResult>,
629 pub ia_functions: Vec<InternalAuditFunction>,
631 pub ia_reports: Vec<InternalAuditReport>,
633 pub related_parties: Vec<RelatedParty>,
635 pub related_party_transactions: Vec<RelatedPartyTransaction>,
637 pub component_auditors: Vec<ComponentAuditor>,
640 pub group_audit_plan: Option<GroupAuditPlan>,
642 pub component_instructions: Vec<ComponentInstruction>,
644 pub component_reports: Vec<ComponentAuditorReport>,
646 pub engagement_letters: Vec<EngagementLetter>,
649 pub subsequent_events: Vec<SubsequentEvent>,
652 pub service_organizations: Vec<ServiceOrganization>,
655 pub soc_reports: Vec<SocReport>,
657 pub user_entity_controls: Vec<UserEntityControl>,
659 pub going_concern_assessments:
662 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663 pub accounting_estimates:
666 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677 pub materiality_calculations:
680 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681 pub combined_risk_assessments:
684 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690 pub significant_transaction_classes:
693 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697 pub analytical_relationships:
700 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733 pub customers: Vec<BankingCustomer>,
735 pub accounts: Vec<BankAccount>,
737 pub transactions: Vec<BankTransaction>,
739 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749 pub suspicious_count: usize,
751 pub scenario_count: usize,
753}
754
755#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758 pub exported: bool,
760 pub graph_count: usize,
762 pub exports: HashMap<String, GraphExportInfo>,
764}
765
766#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769 pub name: String,
771 pub format: String,
773 pub output_path: PathBuf,
775 pub node_count: usize,
777 pub edge_count: usize,
779}
780
781#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784 pub spend_analyses: Vec<SpendAnalysis>,
786 pub sourcing_projects: Vec<SourcingProject>,
788 pub qualifications: Vec<SupplierQualification>,
790 pub rfx_events: Vec<RfxEvent>,
792 pub bids: Vec<SupplierBid>,
794 pub bid_evaluations: Vec<BidEvaluation>,
796 pub contracts: Vec<ProcurementContract>,
798 pub catalog_items: Vec<CatalogItem>,
800 pub scorecards: Vec<SupplierScorecard>,
802}
803
804#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816 pub fiscal_year: u16,
818 pub fiscal_period: u8,
820 pub period_start: NaiveDate,
822 pub period_end: NaiveDate,
824 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826}
827
828impl PeriodTrialBalance {
829 pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
838 let mut total_debits = Decimal::ZERO;
839 let mut total_credits = Decimal::ZERO;
840 let lines: Vec<TrialBalanceLine> = self
841 .entries
842 .into_iter()
843 .map(|e| {
844 total_debits += e.debit_balance;
845 total_credits += e.credit_balance;
846 let category = AccountCategory::from_account_code(&e.account_code);
847 TrialBalanceLine {
848 account_code: e.account_code,
849 account_description: e.account_name,
850 category,
851 account_type: AccountType::Asset,
852 opening_balance: Decimal::ZERO,
853 period_debits: e.debit_balance,
854 period_credits: e.credit_balance,
855 closing_balance: e.debit_balance - e.credit_balance,
856 debit_balance: e.debit_balance,
857 credit_balance: e.credit_balance,
858 cost_center: None,
859 profit_center: None,
860 }
861 })
862 .collect();
863 let imbalance = total_debits - total_credits;
864 let is_balanced = imbalance.abs() < Decimal::new(1, 2);
865 TrialBalance {
866 trial_balance_id: format!(
867 "{company_code}-{:04}{:02}",
868 self.fiscal_year, self.fiscal_period
869 ),
870 company_code: company_code.to_string(),
871 company_name: None,
872 as_of_date: self.period_end,
873 fiscal_year: self.fiscal_year as i32,
874 fiscal_period: self.fiscal_period as u32,
875 currency: currency.to_string(),
876 balance_type: TrialBalanceType::Adjusted,
877 lines,
878 total_debits,
879 total_credits,
880 is_balanced,
881 out_of_balance: imbalance,
882 is_equation_valid: is_balanced,
883 equation_difference: imbalance,
884 category_summary: std::collections::HashMap::new(),
885 created_at: self
886 .period_start
887 .and_hms_opt(0, 0, 0)
888 .expect("midnight is a valid time"),
889 created_by: "ORCHESTRATOR".to_string(),
890 approved_by: None,
891 approved_at: None,
892 status: TrialBalanceStatus::Final,
893 }
894 }
895}
896
897#[derive(Debug, Clone, Default)]
899pub struct FinancialReportingSnapshot {
900 pub financial_statements: Vec<FinancialStatement>,
903 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
906 pub consolidated_statements: Vec<FinancialStatement>,
908 pub consolidation_schedules: Vec<ConsolidationSchedule>,
910 pub bank_reconciliations: Vec<BankReconciliation>,
912 pub trial_balances: Vec<PeriodTrialBalance>,
914 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
916 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
918 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
920}
921
922#[derive(Debug, Clone, Default)]
924pub struct HrSnapshot {
925 pub payroll_runs: Vec<PayrollRun>,
927 pub payroll_line_items: Vec<PayrollLineItem>,
929 pub time_entries: Vec<TimeEntry>,
931 pub expense_reports: Vec<ExpenseReport>,
933 pub benefit_enrollments: Vec<BenefitEnrollment>,
935 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
937 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
939 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
941 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
943 pub pension_journal_entries: Vec<JournalEntry>,
945 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
947 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
949 pub stock_comp_journal_entries: Vec<JournalEntry>,
951 pub payroll_run_count: usize,
953 pub payroll_line_item_count: usize,
955 pub time_entry_count: usize,
957 pub expense_report_count: usize,
959 pub benefit_enrollment_count: usize,
961 pub pension_plan_count: usize,
963 pub stock_grant_count: usize,
965}
966
967#[derive(Debug, Clone, Default)]
969pub struct AccountingStandardsSnapshot {
970 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
972 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
974 pub business_combinations:
976 Vec<datasynth_core::models::business_combination::BusinessCombination>,
977 pub business_combination_journal_entries: Vec<JournalEntry>,
979 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
981 pub ecl_provision_movements:
983 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
984 pub ecl_journal_entries: Vec<JournalEntry>,
986 pub provisions: Vec<datasynth_core::models::provision::Provision>,
988 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
990 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
992 pub provision_journal_entries: Vec<JournalEntry>,
994 pub currency_translation_results:
996 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
997 pub revenue_contract_count: usize,
999 pub impairment_test_count: usize,
1001 pub business_combination_count: usize,
1003 pub ecl_model_count: usize,
1005 pub provision_count: usize,
1007 pub currency_translation_count: usize,
1009 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1013 pub fair_value_measurements:
1015 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1016 pub framework_differences:
1018 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1019 pub framework_reconciliations:
1021 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1022 pub lease_count: usize,
1024 pub fair_value_measurement_count: usize,
1025 pub framework_difference_count: usize,
1026}
1027
1028#[derive(Debug, Clone, Default)]
1030pub struct ComplianceRegulationsSnapshot {
1031 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1033 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1035 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1037 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1039 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1041 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1043 pub compliance_graph: Option<datasynth_graph::Graph>,
1045}
1046
1047#[derive(Debug, Clone, Default)]
1049pub struct ManufacturingSnapshot {
1050 pub production_orders: Vec<ProductionOrder>,
1052 pub quality_inspections: Vec<QualityInspection>,
1054 pub cycle_counts: Vec<CycleCount>,
1056 pub bom_components: Vec<BomComponent>,
1058 pub inventory_movements: Vec<InventoryMovement>,
1060 pub production_order_count: usize,
1062 pub quality_inspection_count: usize,
1064 pub cycle_count_count: usize,
1066 pub bom_component_count: usize,
1068 pub inventory_movement_count: usize,
1070}
1071
1072#[derive(Debug, Clone, Default)]
1074pub struct SalesKpiBudgetsSnapshot {
1075 pub sales_quotes: Vec<SalesQuote>,
1077 pub kpis: Vec<ManagementKpi>,
1079 pub budgets: Vec<Budget>,
1081 pub sales_quote_count: usize,
1083 pub kpi_count: usize,
1085 pub budget_line_count: usize,
1087}
1088
1089#[derive(Debug, Clone, Default)]
1091pub struct AnomalyLabels {
1092 pub labels: Vec<LabeledAnomaly>,
1094 pub summary: Option<AnomalySummary>,
1096 pub by_type: HashMap<String, usize>,
1098}
1099
1100#[derive(Debug, Clone, Default)]
1102pub struct BalanceValidationResult {
1103 pub validated: bool,
1105 pub is_balanced: bool,
1107 pub entries_processed: u64,
1109 pub total_debits: rust_decimal::Decimal,
1111 pub total_credits: rust_decimal::Decimal,
1113 pub accounts_tracked: usize,
1115 pub companies_tracked: usize,
1117 pub validation_errors: Vec<ValidationError>,
1119 pub has_unbalanced_entries: bool,
1121}
1122
1123#[derive(Debug, Clone, Default)]
1125pub struct TaxSnapshot {
1126 pub jurisdictions: Vec<TaxJurisdiction>,
1128 pub codes: Vec<TaxCode>,
1130 pub tax_lines: Vec<TaxLine>,
1132 pub tax_returns: Vec<TaxReturn>,
1134 pub tax_provisions: Vec<TaxProvision>,
1136 pub withholding_records: Vec<WithholdingTaxRecord>,
1138 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1140 pub jurisdiction_count: usize,
1142 pub code_count: usize,
1144 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1146 pub tax_posting_journal_entries: Vec<JournalEntry>,
1148}
1149
1150#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1152pub struct IntercompanySnapshot {
1153 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1155 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1157 pub seller_journal_entries: Vec<JournalEntry>,
1159 pub buyer_journal_entries: Vec<JournalEntry>,
1161 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1163 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1165 #[serde(skip)]
1167 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1168 pub matched_pair_count: usize,
1170 pub elimination_entry_count: usize,
1172 pub match_rate: f64,
1174}
1175
1176#[derive(Debug, Clone, Default)]
1178pub struct EsgSnapshot {
1179 pub emissions: Vec<EmissionRecord>,
1181 pub energy: Vec<EnergyConsumption>,
1183 pub water: Vec<WaterUsage>,
1185 pub waste: Vec<WasteRecord>,
1187 pub diversity: Vec<WorkforceDiversityMetric>,
1189 pub pay_equity: Vec<PayEquityMetric>,
1191 pub safety_incidents: Vec<SafetyIncident>,
1193 pub safety_metrics: Vec<SafetyMetric>,
1195 pub governance: Vec<GovernanceMetric>,
1197 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1199 pub materiality: Vec<MaterialityAssessment>,
1201 pub disclosures: Vec<EsgDisclosure>,
1203 pub climate_scenarios: Vec<ClimateScenario>,
1205 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1207 pub emission_count: usize,
1209 pub disclosure_count: usize,
1211}
1212
1213#[derive(Debug, Clone, Default)]
1215pub struct TreasurySnapshot {
1216 pub cash_positions: Vec<CashPosition>,
1218 pub cash_forecasts: Vec<CashForecast>,
1220 pub cash_pools: Vec<CashPool>,
1222 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1224 pub hedging_instruments: Vec<HedgingInstrument>,
1226 pub hedge_relationships: Vec<HedgeRelationship>,
1228 pub debt_instruments: Vec<DebtInstrument>,
1230 pub bank_guarantees: Vec<BankGuarantee>,
1232 pub netting_runs: Vec<NettingRun>,
1234 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1236 pub journal_entries: Vec<JournalEntry>,
1239}
1240
1241#[derive(Debug, Clone, Default)]
1243pub struct ProjectAccountingSnapshot {
1244 pub projects: Vec<Project>,
1246 pub cost_lines: Vec<ProjectCostLine>,
1248 pub revenue_records: Vec<ProjectRevenue>,
1250 pub earned_value_metrics: Vec<EarnedValueMetric>,
1252 pub change_orders: Vec<ChangeOrder>,
1254 pub milestones: Vec<ProjectMilestone>,
1256}
1257
1258#[derive(Debug, Default)]
1260pub struct EnhancedGenerationResult {
1261 pub chart_of_accounts: ChartOfAccounts,
1263 pub master_data: MasterDataSnapshot,
1265 pub document_flows: DocumentFlowSnapshot,
1267 pub subledger: SubledgerSnapshot,
1269 pub ocpm: OcpmSnapshot,
1271 pub audit: AuditSnapshot,
1273 pub banking: BankingSnapshot,
1275 pub graph_export: GraphExportSnapshot,
1277 pub sourcing: SourcingSnapshot,
1279 pub financial_reporting: FinancialReportingSnapshot,
1281 pub hr: HrSnapshot,
1283 pub accounting_standards: AccountingStandardsSnapshot,
1285 pub manufacturing: ManufacturingSnapshot,
1287 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1289 pub tax: TaxSnapshot,
1291 pub esg: EsgSnapshot,
1293 pub treasury: TreasurySnapshot,
1295 pub project_accounting: ProjectAccountingSnapshot,
1297 pub process_evolution: Vec<ProcessEvolutionEvent>,
1299 pub organizational_events: Vec<OrganizationalEvent>,
1301 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1303 pub intercompany: IntercompanySnapshot,
1305 pub journal_entries: Vec<JournalEntry>,
1307 pub anomaly_labels: AnomalyLabels,
1309 pub balance_validation: BalanceValidationResult,
1311 pub data_quality_stats: DataQualityStats,
1313 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1315 pub statistics: EnhancedGenerationStatistics,
1317 pub lineage: Option<super::lineage::LineageGraph>,
1319 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1321 pub internal_controls: Vec<InternalControl>,
1323 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1327 pub opening_balances: Vec<GeneratedOpeningBalance>,
1329 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1331 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1333 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1335 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1337 pub temporal_vendor_chains:
1339 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1340 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1342 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1344 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1346 pub compliance_regulations: ComplianceRegulationsSnapshot,
1348 pub analytics_metadata: AnalyticsMetadataSnapshot,
1352 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1356 pub interconnectivity: InterconnectivitySnapshot,
1362}
1363
1364#[derive(Debug, Clone, Default)]
1370pub struct InterconnectivitySnapshot {
1371 pub vendor_tiers: Vec<(String, u8)>,
1374 pub vendor_clusters: Vec<(String, String)>,
1378 pub customer_value_segments: Vec<(String, String)>,
1381 pub customer_lifecycle_stages: Vec<(String, String)>,
1385 pub industry_metadata: Vec<String>,
1388}
1389
1390#[derive(Debug, Clone, Default)]
1392pub struct AnalyticsMetadataSnapshot {
1393 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1395 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1397 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1399 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1401}
1402
1403#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1405pub struct EnhancedGenerationStatistics {
1406 pub total_entries: u64,
1408 pub total_line_items: u64,
1410 pub accounts_count: usize,
1412 pub companies_count: usize,
1414 pub period_months: u32,
1416 pub vendor_count: usize,
1418 pub customer_count: usize,
1419 pub material_count: usize,
1420 pub asset_count: usize,
1421 pub employee_count: usize,
1422 pub p2p_chain_count: usize,
1424 pub o2c_chain_count: usize,
1425 pub ap_invoice_count: usize,
1427 pub ar_invoice_count: usize,
1428 pub ocpm_event_count: usize,
1430 pub ocpm_object_count: usize,
1431 pub ocpm_case_count: usize,
1432 pub audit_engagement_count: usize,
1434 pub audit_workpaper_count: usize,
1435 pub audit_evidence_count: usize,
1436 pub audit_risk_count: usize,
1437 pub audit_finding_count: usize,
1438 pub audit_judgment_count: usize,
1439 #[serde(default)]
1441 pub audit_confirmation_count: usize,
1442 #[serde(default)]
1443 pub audit_confirmation_response_count: usize,
1444 #[serde(default)]
1446 pub audit_procedure_step_count: usize,
1447 #[serde(default)]
1448 pub audit_sample_count: usize,
1449 #[serde(default)]
1451 pub audit_analytical_result_count: usize,
1452 #[serde(default)]
1454 pub audit_ia_function_count: usize,
1455 #[serde(default)]
1456 pub audit_ia_report_count: usize,
1457 #[serde(default)]
1459 pub audit_related_party_count: usize,
1460 #[serde(default)]
1461 pub audit_related_party_transaction_count: usize,
1462 pub anomalies_injected: usize,
1464 pub data_quality_issues: usize,
1466 pub banking_customer_count: usize,
1468 pub banking_account_count: usize,
1469 pub banking_transaction_count: usize,
1470 pub banking_suspicious_count: usize,
1471 pub graph_export_count: usize,
1473 pub graph_node_count: usize,
1474 pub graph_edge_count: usize,
1475 #[serde(default)]
1477 pub llm_enrichment_ms: u64,
1478 #[serde(default)]
1480 pub llm_vendors_enriched: usize,
1481 #[serde(default)]
1483 pub llm_customers_enriched: usize,
1484 #[serde(default)]
1486 pub llm_materials_enriched: usize,
1487 #[serde(default)]
1489 pub llm_findings_enriched: usize,
1490 #[serde(default)]
1492 pub diffusion_enhancement_ms: u64,
1493 #[serde(default)]
1495 pub diffusion_samples_generated: usize,
1496 #[serde(default, skip_serializing_if = "Option::is_none")]
1499 pub neural_hybrid_weight: Option<f64>,
1500 #[serde(default, skip_serializing_if = "Option::is_none")]
1502 pub neural_hybrid_strategy: Option<String>,
1503 #[serde(default, skip_serializing_if = "Option::is_none")]
1505 pub neural_routed_column_count: Option<usize>,
1506 #[serde(default)]
1508 pub causal_generation_ms: u64,
1509 #[serde(default)]
1511 pub causal_samples_generated: usize,
1512 #[serde(default)]
1514 pub causal_validation_passed: Option<bool>,
1515 #[serde(default)]
1517 pub sourcing_project_count: usize,
1518 #[serde(default)]
1519 pub rfx_event_count: usize,
1520 #[serde(default)]
1521 pub bid_count: usize,
1522 #[serde(default)]
1523 pub contract_count: usize,
1524 #[serde(default)]
1525 pub catalog_item_count: usize,
1526 #[serde(default)]
1527 pub scorecard_count: usize,
1528 #[serde(default)]
1530 pub financial_statement_count: usize,
1531 #[serde(default)]
1532 pub bank_reconciliation_count: usize,
1533 #[serde(default)]
1535 pub payroll_run_count: usize,
1536 #[serde(default)]
1537 pub time_entry_count: usize,
1538 #[serde(default)]
1539 pub expense_report_count: usize,
1540 #[serde(default)]
1541 pub benefit_enrollment_count: usize,
1542 #[serde(default)]
1543 pub pension_plan_count: usize,
1544 #[serde(default)]
1545 pub stock_grant_count: usize,
1546 #[serde(default)]
1548 pub revenue_contract_count: usize,
1549 #[serde(default)]
1550 pub impairment_test_count: usize,
1551 #[serde(default)]
1552 pub business_combination_count: usize,
1553 #[serde(default)]
1554 pub ecl_model_count: usize,
1555 #[serde(default)]
1556 pub provision_count: usize,
1557 #[serde(default)]
1559 pub production_order_count: usize,
1560 #[serde(default)]
1561 pub quality_inspection_count: usize,
1562 #[serde(default)]
1563 pub cycle_count_count: usize,
1564 #[serde(default)]
1565 pub bom_component_count: usize,
1566 #[serde(default)]
1567 pub inventory_movement_count: usize,
1568 #[serde(default)]
1570 pub sales_quote_count: usize,
1571 #[serde(default)]
1572 pub kpi_count: usize,
1573 #[serde(default)]
1574 pub budget_line_count: usize,
1575 #[serde(default)]
1577 pub tax_jurisdiction_count: usize,
1578 #[serde(default)]
1579 pub tax_code_count: usize,
1580 #[serde(default)]
1582 pub esg_emission_count: usize,
1583 #[serde(default)]
1584 pub esg_disclosure_count: usize,
1585 #[serde(default)]
1587 pub ic_matched_pair_count: usize,
1588 #[serde(default)]
1589 pub ic_elimination_count: usize,
1590 #[serde(default)]
1592 pub ic_transaction_count: usize,
1593 #[serde(default)]
1595 pub fa_subledger_count: usize,
1596 #[serde(default)]
1598 pub inventory_subledger_count: usize,
1599 #[serde(default)]
1601 pub treasury_debt_instrument_count: usize,
1602 #[serde(default)]
1604 pub treasury_hedging_instrument_count: usize,
1605 #[serde(default)]
1607 pub project_count: usize,
1608 #[serde(default)]
1610 pub project_change_order_count: usize,
1611 #[serde(default)]
1613 pub tax_provision_count: usize,
1614 #[serde(default)]
1616 pub opening_balance_count: usize,
1617 #[serde(default)]
1619 pub subledger_reconciliation_count: usize,
1620 #[serde(default)]
1622 pub tax_line_count: usize,
1623 #[serde(default)]
1625 pub project_cost_line_count: usize,
1626 #[serde(default)]
1628 pub cash_position_count: usize,
1629 #[serde(default)]
1631 pub cash_forecast_count: usize,
1632 #[serde(default)]
1634 pub cash_pool_count: usize,
1635 #[serde(default)]
1637 pub process_evolution_event_count: usize,
1638 #[serde(default)]
1640 pub organizational_event_count: usize,
1641 #[serde(default)]
1643 pub counterfactual_pair_count: usize,
1644 #[serde(default)]
1646 pub red_flag_count: usize,
1647 #[serde(default)]
1649 pub collusion_ring_count: usize,
1650 #[serde(default)]
1652 pub temporal_version_chain_count: usize,
1653 #[serde(default)]
1655 pub entity_relationship_node_count: usize,
1656 #[serde(default)]
1658 pub entity_relationship_edge_count: usize,
1659 #[serde(default)]
1661 pub cross_process_link_count: usize,
1662 #[serde(default)]
1664 pub disruption_event_count: usize,
1665 #[serde(default)]
1667 pub industry_gl_account_count: usize,
1668 #[serde(default)]
1670 pub period_close_je_count: usize,
1671}
1672
1673pub struct EnhancedOrchestrator {
1675 config: GeneratorConfig,
1676 phase_config: PhaseConfig,
1677 coa: Option<Arc<ChartOfAccounts>>,
1678 master_data: MasterDataSnapshot,
1679 seed: u64,
1680 multi_progress: Option<MultiProgress>,
1681 resource_guard: ResourceGuard,
1683 output_path: Option<PathBuf>,
1685 copula_generators: Vec<CopulaGeneratorSpec>,
1687 country_pack_registry: datasynth_core::CountryPackRegistry,
1689 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1691 template_provider: datasynth_core::templates::SharedTemplateProvider,
1698 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1705 shard_context: Option<crate::shard_context::ShardContext>,
1708}
1709
1710impl EnhancedOrchestrator {
1711 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1713 datasynth_config::validate_config(&config)?;
1714
1715 let seed = config.global.seed.unwrap_or_else(rand::random);
1716
1717 let resource_guard = Self::build_resource_guard(&config, None);
1719
1720 let country_pack_registry = match &config.country_packs {
1722 Some(cp) => {
1723 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1724 .map_err(|e| SynthError::config(e.to_string()))?
1725 }
1726 None => datasynth_core::CountryPackRegistry::builtin_only()
1727 .map_err(|e| SynthError::config(e.to_string()))?,
1728 };
1729
1730 let template_provider = Self::build_template_provider(&config)?;
1734
1735 let temporal_context = Self::build_temporal_context(&config)?;
1739
1740 Ok(Self {
1741 config,
1742 phase_config,
1743 coa: None,
1744 master_data: MasterDataSnapshot::default(),
1745 seed,
1746 multi_progress: None,
1747 resource_guard,
1748 output_path: None,
1749 copula_generators: Vec::new(),
1750 country_pack_registry,
1751 phase_sink: None,
1752 template_provider,
1753 temporal_context,
1754 shard_context: None,
1755 })
1756 }
1757
1758 pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1764 self.shard_context = Some(ctx);
1765 }
1766
1767 fn build_temporal_context(
1773 config: &GeneratorConfig,
1774 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1775 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1776
1777 let tp = &config.temporal_patterns;
1778 if !tp.enabled || !tp.business_days.enabled {
1779 return Ok(None);
1780 }
1781
1782 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1783 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1784 let end_date = start_date + chrono::Months::new(config.global.period_months);
1785
1786 let region_code = tp
1787 .calendars
1788 .regions
1789 .first()
1790 .cloned()
1791 .unwrap_or_else(|| "US".to_string());
1792 let region = parse_region_code(®ion_code);
1793
1794 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1795 }
1796
1797 fn build_template_provider(
1805 config: &GeneratorConfig,
1806 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1807 use datasynth_core::templates::{
1808 loader::{MergeStrategy, TemplateLoader},
1809 DefaultTemplateProvider,
1810 };
1811 use std::sync::Arc;
1812
1813 let provider = match &config.templates.path {
1814 None => DefaultTemplateProvider::new(),
1815 Some(path) => {
1816 let data = if path.is_dir() {
1817 TemplateLoader::load_from_directory(path)
1818 } else {
1819 TemplateLoader::load_from_file(path)
1820 }
1821 .map_err(|e| {
1822 SynthError::config(format!(
1823 "Failed to load templates from {}: {e}",
1824 path.display()
1825 ))
1826 })?;
1827 let strategy = match config.templates.merge_strategy {
1828 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1829 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1830 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1831 MergeStrategy::MergePreferFile
1832 }
1833 };
1834 DefaultTemplateProvider::with_templates(data, strategy)
1835 }
1836 };
1837 Ok(Arc::new(provider))
1838 }
1839
1840 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1842 Self::new(config, PhaseConfig::default())
1843 }
1844
1845 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1847 self.phase_sink = Some(sink);
1848 self
1849 }
1850
1851 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1853 self.phase_sink = Some(sink);
1854 }
1855
1856 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1858 if let Some(ref sink) = self.phase_sink {
1859 for item in items {
1860 if let Ok(value) = serde_json::to_value(item) {
1861 if let Err(e) = sink.emit(phase, type_name, &value) {
1862 warn!(
1863 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1864 );
1865 }
1866 }
1867 }
1868 if let Err(e) = sink.phase_complete(phase) {
1869 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1870 }
1871 }
1872 }
1873
1874 pub fn with_progress(mut self, show: bool) -> Self {
1876 self.phase_config.show_progress = show;
1877 if show {
1878 self.multi_progress = Some(MultiProgress::new());
1879 }
1880 self
1881 }
1882
1883 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1885 let path = path.into();
1886 self.output_path = Some(path.clone());
1887 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1889 self
1890 }
1891
1892 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1894 &self.country_pack_registry
1895 }
1896
1897 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1899 self.country_pack_registry.get_by_str(country)
1900 }
1901
1902 fn primary_country_code(&self) -> &str {
1905 self.config
1906 .companies
1907 .first()
1908 .map(|c| c.country.as_str())
1909 .unwrap_or("US")
1910 }
1911
1912 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1914 self.country_pack_for(self.primary_country_code())
1915 }
1916
1917 fn resolve_coa_framework(&self) -> CoAFramework {
1919 if self.config.accounting_standards.enabled {
1920 match self.config.accounting_standards.framework {
1921 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1922 return CoAFramework::FrenchPcg;
1923 }
1924 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1925 return CoAFramework::GermanSkr04;
1926 }
1927 _ => {}
1928 }
1929 }
1930 let pack = self.primary_pack();
1932 match pack.accounting.framework.as_str() {
1933 "french_gaap" => CoAFramework::FrenchPcg,
1934 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1935 _ => CoAFramework::UsGaap,
1936 }
1937 }
1938
1939 pub fn has_copulas(&self) -> bool {
1944 !self.copula_generators.is_empty()
1945 }
1946
1947 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1953 &self.copula_generators
1954 }
1955
1956 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1960 &mut self.copula_generators
1961 }
1962
1963 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1967 self.copula_generators
1968 .iter_mut()
1969 .find(|c| c.name == copula_name)
1970 .map(|c| c.generator.sample())
1971 }
1972
1973 pub fn from_fingerprint(
1996 fingerprint_path: &std::path::Path,
1997 phase_config: PhaseConfig,
1998 scale: f64,
1999 ) -> SynthResult<Self> {
2000 info!("Loading fingerprint from: {}", fingerprint_path.display());
2001
2002 let reader = FingerprintReader::new();
2004 let fingerprint = reader
2005 .read_from_file(fingerprint_path)
2006 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2007
2008 Self::from_fingerprint_data(fingerprint, phase_config, scale)
2009 }
2010
2011 pub fn from_fingerprint_data(
2018 fingerprint: Fingerprint,
2019 phase_config: PhaseConfig,
2020 scale: f64,
2021 ) -> SynthResult<Self> {
2022 info!(
2023 "Synthesizing config from fingerprint (version: {}, tables: {})",
2024 fingerprint.manifest.version,
2025 fingerprint.schema.tables.len()
2026 );
2027
2028 let seed: u64 = rand::random();
2030 info!("Fingerprint synthesis seed: {}", seed);
2031
2032 let options = SynthesisOptions {
2034 scale,
2035 seed: Some(seed),
2036 preserve_correlations: true,
2037 inject_anomalies: true,
2038 };
2039 let synthesizer = ConfigSynthesizer::with_options(options);
2040
2041 let synthesis_result = synthesizer
2043 .synthesize_full(&fingerprint, seed)
2044 .map_err(|e| {
2045 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2046 })?;
2047
2048 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2050 Self::base_config_for_industry(industry)
2051 } else {
2052 Self::base_config_for_industry("manufacturing")
2053 };
2054
2055 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2057
2058 info!(
2060 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2061 fingerprint.schema.tables.len(),
2062 scale,
2063 synthesis_result.copula_generators.len()
2064 );
2065
2066 if !synthesis_result.copula_generators.is_empty() {
2067 for spec in &synthesis_result.copula_generators {
2068 info!(
2069 " Copula '{}' for table '{}': {} columns",
2070 spec.name,
2071 spec.table,
2072 spec.columns.len()
2073 );
2074 }
2075 }
2076
2077 let mut orchestrator = Self::new(config, phase_config)?;
2079
2080 orchestrator.copula_generators = synthesis_result.copula_generators;
2082
2083 Ok(orchestrator)
2084 }
2085
2086 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2088 use datasynth_config::presets::create_preset;
2089 use datasynth_config::TransactionVolume;
2090 use datasynth_core::models::{CoAComplexity, IndustrySector};
2091
2092 let sector = match industry.to_lowercase().as_str() {
2093 "manufacturing" => IndustrySector::Manufacturing,
2094 "retail" => IndustrySector::Retail,
2095 "financial" | "financial_services" => IndustrySector::FinancialServices,
2096 "healthcare" => IndustrySector::Healthcare,
2097 "technology" | "tech" => IndustrySector::Technology,
2098 _ => IndustrySector::Manufacturing,
2099 };
2100
2101 create_preset(
2103 sector,
2104 1, 12, CoAComplexity::Medium,
2107 TransactionVolume::TenK,
2108 )
2109 }
2110
2111 fn apply_config_patch(
2113 mut config: GeneratorConfig,
2114 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2115 ) -> GeneratorConfig {
2116 use datasynth_fingerprint::synthesis::ConfigValue;
2117
2118 for (key, value) in patch.values() {
2119 match (key.as_str(), value) {
2120 ("transactions.count", ConfigValue::Integer(n)) => {
2123 info!(
2124 "Fingerprint suggests {} transactions (apply via company volumes)",
2125 n
2126 );
2127 }
2128 ("global.period_months", ConfigValue::Integer(n)) => {
2129 config.global.period_months = (*n).clamp(1, 120) as u32;
2130 }
2131 ("global.start_date", ConfigValue::String(s)) => {
2132 config.global.start_date = s.clone();
2133 }
2134 ("global.seed", ConfigValue::Integer(n)) => {
2135 config.global.seed = Some(*n as u64);
2136 }
2137 ("fraud.enabled", ConfigValue::Bool(b)) => {
2138 config.fraud.enabled = *b;
2139 }
2140 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2141 config.fraud.fraud_rate = *f;
2142 }
2143 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2144 config.data_quality.enabled = *b;
2145 }
2146 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2148 config.fraud.enabled = *b;
2149 }
2150 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2151 config.fraud.fraud_rate = *f;
2152 }
2153 _ => {
2154 debug!("Ignoring unknown config patch key: {}", key);
2155 }
2156 }
2157 }
2158
2159 config
2160 }
2161
2162 fn build_resource_guard(
2164 config: &GeneratorConfig,
2165 output_path: Option<PathBuf>,
2166 ) -> ResourceGuard {
2167 let mut builder = ResourceGuardBuilder::new();
2168
2169 if config.global.memory_limit_mb > 0 {
2171 builder = builder.memory_limit(config.global.memory_limit_mb);
2172 }
2173
2174 if let Some(path) = output_path {
2176 builder = builder.output_path(path).min_free_disk(100); }
2178
2179 builder = builder.conservative();
2181
2182 builder.build()
2183 }
2184
2185 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2190 self.resource_guard.check()
2191 }
2192
2193 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2195 let level = self.resource_guard.check()?;
2196
2197 if level != DegradationLevel::Normal {
2198 warn!(
2199 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2200 phase,
2201 level,
2202 self.resource_guard.current_memory_mb(),
2203 self.resource_guard.available_disk_mb()
2204 );
2205 }
2206
2207 Ok(level)
2208 }
2209
2210 fn get_degradation_actions(&self) -> DegradationActions {
2212 self.resource_guard.get_actions()
2213 }
2214
2215 fn check_memory_limit(&self) -> SynthResult<()> {
2217 self.check_resources()?;
2218 Ok(())
2219 }
2220
2221 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2223 info!("Starting enhanced generation workflow");
2224 info!(
2225 "Config: industry={:?}, period_months={}, companies={}",
2226 self.config.global.industry,
2227 self.config.global.period_months,
2228 self.config.companies.len()
2229 );
2230
2231 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2234 datasynth_core::serde_decimal::set_numeric_native(is_native);
2235 struct NumericModeGuard;
2236 impl Drop for NumericModeGuard {
2237 fn drop(&mut self) {
2238 datasynth_core::serde_decimal::set_numeric_native(false);
2239 }
2240 }
2241 let _numeric_guard = if is_native {
2242 Some(NumericModeGuard)
2243 } else {
2244 None
2245 };
2246
2247 let initial_level = self.check_resources_with_log("initial")?;
2249 if initial_level == DegradationLevel::Emergency {
2250 return Err(SynthError::resource(
2251 "Insufficient resources to start generation",
2252 ));
2253 }
2254
2255 let mut stats = EnhancedGenerationStatistics {
2256 companies_count: self.config.companies.len(),
2257 period_months: self.config.global.period_months,
2258 ..Default::default()
2259 };
2260
2261 let coa = self.phase_chart_of_accounts(&mut stats)?;
2263
2264 self.phase_master_data(&mut stats)?;
2266
2267 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2269 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2270 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2271
2272 let (mut document_flows, mut subledger, fa_journal_entries) =
2274 self.phase_document_flows(&mut stats)?;
2275
2276 self.emit_phase_items(
2278 "document_flows",
2279 "PurchaseOrder",
2280 &document_flows.purchase_orders,
2281 );
2282 self.emit_phase_items(
2283 "document_flows",
2284 "GoodsReceipt",
2285 &document_flows.goods_receipts,
2286 );
2287 self.emit_phase_items(
2288 "document_flows",
2289 "VendorInvoice",
2290 &document_flows.vendor_invoices,
2291 );
2292 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2293 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2294
2295 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2297
2298 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2303 .iter()
2304 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2305 .collect();
2306 if !opening_balance_jes.is_empty() {
2307 debug!(
2308 "Prepending {} opening balance JEs to entries",
2309 opening_balance_jes.len()
2310 );
2311 }
2312
2313 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2315
2316 if !opening_balance_jes.is_empty() {
2319 let mut combined = opening_balance_jes;
2320 combined.extend(entries);
2321 entries = combined;
2322 }
2323
2324 if !fa_journal_entries.is_empty() {
2326 debug!(
2327 "Appending {} FA acquisition JEs to main entries",
2328 fa_journal_entries.len()
2329 );
2330 entries.extend(fa_journal_entries);
2331 }
2332
2333 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2335
2336 let actions = self.get_degradation_actions();
2338
2339 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2341
2342 if !sourcing.contracts.is_empty() {
2345 let mut linked_count = 0usize;
2346 let po_vendor_pairs: Vec<(String, String)> = document_flows
2348 .p2p_chains
2349 .iter()
2350 .map(|chain| {
2351 (
2352 chain.purchase_order.vendor_id.clone(),
2353 chain.purchase_order.header.document_id.clone(),
2354 )
2355 })
2356 .collect();
2357
2358 for chain in &mut document_flows.p2p_chains {
2359 if chain.purchase_order.contract_id.is_none() {
2360 if let Some(contract) = sourcing
2361 .contracts
2362 .iter()
2363 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2364 {
2365 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2366 linked_count += 1;
2367 }
2368 }
2369 }
2370
2371 for contract in &mut sourcing.contracts {
2373 let po_ids: Vec<String> = po_vendor_pairs
2374 .iter()
2375 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2376 .map(|(_, po_id)| po_id.clone())
2377 .collect();
2378 if !po_ids.is_empty() {
2379 contract.purchase_order_ids = po_ids;
2380 }
2381 }
2382
2383 if linked_count > 0 {
2384 debug!(
2385 "Linked {} purchase orders to S2C contracts by vendor match",
2386 linked_count
2387 );
2388 }
2389 }
2390
2391 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2393
2394 if !intercompany.seller_journal_entries.is_empty()
2396 || !intercompany.buyer_journal_entries.is_empty()
2397 {
2398 let ic_je_count = intercompany.seller_journal_entries.len()
2399 + intercompany.buyer_journal_entries.len();
2400 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2401 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2402 debug!(
2403 "Appended {} IC journal entries to main entries",
2404 ic_je_count
2405 );
2406 }
2407
2408 if !intercompany.elimination_entries.is_empty() {
2410 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2411 &intercompany.elimination_entries,
2412 );
2413 if !elim_jes.is_empty() {
2414 debug!(
2415 "Appended {} elimination journal entries to main entries",
2416 elim_jes.len()
2417 );
2418 let elim_debit: rust_decimal::Decimal =
2420 elim_jes.iter().map(|je| je.total_debit()).sum();
2421 let elim_credit: rust_decimal::Decimal =
2422 elim_jes.iter().map(|je| je.total_credit()).sum();
2423 let elim_diff = (elim_debit - elim_credit).abs();
2424 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2426 return Err(datasynth_core::error::SynthError::generation(format!(
2427 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2428 elim_debit, elim_credit, elim_diff, tolerance
2429 )));
2430 }
2431 debug!(
2432 "IC elimination balance verified: debits={}, credits={} (diff={})",
2433 elim_debit, elim_credit, elim_diff
2434 );
2435 entries.extend(elim_jes);
2436 }
2437 }
2438
2439 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2441 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2442 document_flows
2443 .customer_invoices
2444 .extend(ic_docs.seller_invoices.iter().cloned());
2445 document_flows
2446 .purchase_orders
2447 .extend(ic_docs.buyer_orders.iter().cloned());
2448 document_flows
2449 .goods_receipts
2450 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2451 document_flows
2452 .vendor_invoices
2453 .extend(ic_docs.buyer_invoices.iter().cloned());
2454 debug!(
2455 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2456 ic_docs.seller_invoices.len(),
2457 ic_docs.buyer_orders.len(),
2458 ic_docs.buyer_goods_receipts.len(),
2459 ic_docs.buyer_invoices.len(),
2460 );
2461 }
2462 }
2463
2464 let hr = self.phase_hr_data(&mut stats)?;
2466
2467 if !hr.payroll_runs.is_empty() {
2469 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2470 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2471 entries.extend(payroll_jes);
2472 }
2473
2474 if !hr.pension_journal_entries.is_empty() {
2476 debug!(
2477 "Generated {} JEs from pension plans",
2478 hr.pension_journal_entries.len()
2479 );
2480 entries.extend(hr.pension_journal_entries.iter().cloned());
2481 }
2482
2483 if !hr.stock_comp_journal_entries.is_empty() {
2485 debug!(
2486 "Generated {} JEs from stock-based compensation",
2487 hr.stock_comp_journal_entries.len()
2488 );
2489 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2490 }
2491
2492 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2494
2495 if !manufacturing_snap.production_orders.is_empty() {
2497 let currency = self
2498 .config
2499 .companies
2500 .first()
2501 .map(|c| c.currency.as_str())
2502 .unwrap_or("USD");
2503 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2504 &manufacturing_snap.production_orders,
2505 &manufacturing_snap.quality_inspections,
2506 currency,
2507 );
2508 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2509 entries.extend(mfg_jes);
2510 }
2511
2512 if !manufacturing_snap.quality_inspections.is_empty() {
2514 let framework = match self.config.accounting_standards.framework {
2515 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2516 _ => "US_GAAP",
2517 };
2518 for company in &self.config.companies {
2519 let company_orders: Vec<_> = manufacturing_snap
2520 .production_orders
2521 .iter()
2522 .filter(|o| o.company_code == company.code)
2523 .cloned()
2524 .collect();
2525 let company_inspections: Vec<_> = manufacturing_snap
2526 .quality_inspections
2527 .iter()
2528 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2529 .cloned()
2530 .collect();
2531 if company_inspections.is_empty() {
2532 continue;
2533 }
2534 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2535 let warranty_result = warranty_gen.generate(
2536 &company.code,
2537 &company_orders,
2538 &company_inspections,
2539 &company.currency,
2540 framework,
2541 );
2542 if !warranty_result.journal_entries.is_empty() {
2543 debug!(
2544 "Generated {} warranty provision JEs for {}",
2545 warranty_result.journal_entries.len(),
2546 company.code
2547 );
2548 entries.extend(warranty_result.journal_entries);
2549 }
2550 }
2551 }
2552
2553 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2555 {
2556 let cogs_currency = self
2557 .config
2558 .companies
2559 .first()
2560 .map(|c| c.currency.as_str())
2561 .unwrap_or("USD");
2562 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2563 &document_flows.deliveries,
2564 &manufacturing_snap.production_orders,
2565 cogs_currency,
2566 );
2567 if !cogs_jes.is_empty() {
2568 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2569 entries.extend(cogs_jes);
2570 }
2571 }
2572
2573 if !manufacturing_snap.inventory_movements.is_empty()
2579 && !subledger.inventory_positions.is_empty()
2580 {
2581 use datasynth_core::models::MovementType as MfgMovementType;
2582 let mut receipt_count = 0usize;
2583 let mut issue_count = 0usize;
2584 for movement in &manufacturing_snap.inventory_movements {
2585 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2587 p.material_id == movement.material_code
2588 && p.company_code == movement.entity_code
2589 }) {
2590 match movement.movement_type {
2591 MfgMovementType::GoodsReceipt => {
2592 pos.add_quantity(
2594 movement.quantity,
2595 movement.value,
2596 movement.movement_date,
2597 );
2598 receipt_count += 1;
2599 }
2600 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2601 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2603 issue_count += 1;
2604 }
2605 _ => {}
2606 }
2607 }
2608 }
2609 debug!(
2610 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2611 manufacturing_snap.inventory_movements.len(),
2612 receipt_count,
2613 issue_count,
2614 );
2615 }
2616
2617 if !entries.is_empty() {
2620 stats.total_entries = entries.len() as u64;
2621 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2622 debug!(
2623 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2624 stats.total_entries, stats.total_line_items
2625 );
2626 }
2627
2628 if self.config.internal_controls.enabled && !entries.is_empty() {
2630 info!("Phase 7b: Applying internal controls to journal entries");
2631 let control_config = ControlGeneratorConfig {
2632 exception_rate: self.config.internal_controls.exception_rate,
2633 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2634 enable_sox_marking: true,
2635 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2636 self.config.internal_controls.sox_materiality_threshold,
2637 )
2638 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2639 ..Default::default()
2640 };
2641 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2642 for entry in &mut entries {
2643 control_gen.apply_controls(entry, &coa);
2644 }
2645 let with_controls = entries
2646 .iter()
2647 .filter(|e| !e.header.control_ids.is_empty())
2648 .count();
2649 info!(
2650 "Applied controls to {} entries ({} with control IDs assigned)",
2651 entries.len(),
2652 with_controls
2653 );
2654 }
2655
2656 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2660 .iter()
2661 .filter(|e| e.header.sod_violation)
2662 .filter_map(|e| {
2663 e.header.sod_conflict_type.map(|ct| {
2664 use datasynth_core::models::{RiskLevel, SodViolation};
2665 let severity = match ct {
2666 datasynth_core::models::SodConflictType::PaymentReleaser
2667 | datasynth_core::models::SodConflictType::RequesterApprover => {
2668 RiskLevel::Critical
2669 }
2670 datasynth_core::models::SodConflictType::PreparerApprover
2671 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2672 | datasynth_core::models::SodConflictType::JournalEntryPoster
2673 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2674 RiskLevel::High
2675 }
2676 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2677 RiskLevel::Medium
2678 }
2679 };
2680 let action = format!(
2681 "SoD conflict {:?} on entry {} ({})",
2682 ct, e.header.document_id, e.header.company_code
2683 );
2684 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2685 })
2686 })
2687 .collect();
2688 if !sod_violations.is_empty() {
2689 info!(
2690 "Phase 7c: Extracted {} SoD violations from {} entries",
2691 sod_violations.len(),
2692 entries.len()
2693 );
2694 }
2695
2696 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2698
2699 {
2707 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2708 if self.config.fraud.enabled && doc_rate > 0.0 {
2709 use datasynth_core::fraud_propagation::{
2710 inject_document_fraud, propagate_documents_to_entries,
2711 };
2712 use datasynth_core::utils::weighted_select;
2713 use datasynth_core::FraudType;
2714 use rand_chacha::rand_core::SeedableRng;
2715
2716 let dist = &self.config.fraud.fraud_type_distribution;
2717 let fraud_type_weights: [(FraudType, f64); 8] = [
2718 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2719 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2720 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2721 (
2722 FraudType::ImproperCapitalization,
2723 dist.expense_capitalization,
2724 ),
2725 (FraudType::SplitTransaction, dist.split_transaction),
2726 (FraudType::TimingAnomaly, dist.timing_anomaly),
2727 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2728 (FraudType::DuplicatePayment, dist.duplicate_payment),
2729 ];
2730 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2731 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2732 if weights_sum <= 0.0 {
2733 FraudType::FictitiousEntry
2734 } else {
2735 *weighted_select(rng, &fraud_type_weights)
2736 }
2737 };
2738
2739 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2740 let mut doc_tagged = 0usize;
2741 macro_rules! inject_into {
2742 ($collection:expr) => {{
2743 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2744 $collection.iter_mut().map(|d| &mut d.header).collect();
2745 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2746 }};
2747 }
2748 inject_into!(document_flows.purchase_orders);
2749 inject_into!(document_flows.goods_receipts);
2750 inject_into!(document_flows.vendor_invoices);
2751 inject_into!(document_flows.payments);
2752 inject_into!(document_flows.sales_orders);
2753 inject_into!(document_flows.deliveries);
2754 inject_into!(document_flows.customer_invoices);
2755 if doc_tagged > 0 {
2756 info!(
2757 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2758 );
2759 }
2760
2761 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2762 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2763 Vec::new();
2764 headers.extend(
2765 document_flows
2766 .purchase_orders
2767 .iter()
2768 .map(|d| d.header.clone()),
2769 );
2770 headers.extend(
2771 document_flows
2772 .goods_receipts
2773 .iter()
2774 .map(|d| d.header.clone()),
2775 );
2776 headers.extend(
2777 document_flows
2778 .vendor_invoices
2779 .iter()
2780 .map(|d| d.header.clone()),
2781 );
2782 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2783 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2784 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2785 headers.extend(
2786 document_flows
2787 .customer_invoices
2788 .iter()
2789 .map(|d| d.header.clone()),
2790 );
2791 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2792 if propagated > 0 {
2793 info!(
2794 "Propagated document-level fraud to {propagated} derived journal entries"
2795 );
2796 }
2797 }
2798 }
2799 }
2800
2801 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2803
2804 {
2822 use datasynth_core::fraud_bias::{
2823 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2824 };
2825 use rand_chacha::rand_core::SeedableRng;
2826 let cfg = FraudBehavioralBiasConfig::default();
2827 if cfg.enabled {
2828 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2829 let mut swept = 0usize;
2830 for entry in entries.iter_mut() {
2831 if entry.header.is_fraud && !entry.header.is_anomaly {
2832 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2833 swept += 1;
2834 }
2835 }
2836 if swept > 0 {
2837 info!(
2838 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2839 (doc-propagated + je_generator intrinsic fraud)"
2840 );
2841 }
2842 }
2843 }
2844
2845 self.emit_phase_items(
2847 "anomaly_injection",
2848 "LabeledAnomaly",
2849 &anomaly_labels.labels,
2850 );
2851
2852 if self.config.fraud.propagate_to_document {
2860 use std::collections::HashMap;
2861 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2874 for je in &entries {
2875 if je.header.is_fraud {
2876 if let Some(ref fraud_type) = je.header.fraud_type {
2877 if let Some(ref reference) = je.header.reference {
2878 fraud_map.insert(reference.clone(), *fraud_type);
2880 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2883 if !bare.is_empty() {
2884 fraud_map.insert(bare.to_string(), *fraud_type);
2885 }
2886 }
2887 }
2888 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2890 }
2891 }
2892 }
2893 if !fraud_map.is_empty() {
2894 let mut propagated = 0usize;
2895 macro_rules! propagate_to {
2897 ($collection:expr) => {
2898 for doc in &mut $collection {
2899 if doc.header.propagate_fraud(&fraud_map) {
2900 propagated += 1;
2901 }
2902 }
2903 };
2904 }
2905 propagate_to!(document_flows.purchase_orders);
2906 propagate_to!(document_flows.goods_receipts);
2907 propagate_to!(document_flows.vendor_invoices);
2908 propagate_to!(document_flows.payments);
2909 propagate_to!(document_flows.sales_orders);
2910 propagate_to!(document_flows.deliveries);
2911 propagate_to!(document_flows.customer_invoices);
2912 if propagated > 0 {
2913 info!(
2914 "Propagated fraud labels to {} document flow records",
2915 propagated
2916 );
2917 }
2918 }
2919 }
2920
2921 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2923
2924 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2926
2927 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2929
2930 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2932
2933 let balance_validation = self.phase_balance_validation(&entries)?;
2935
2936 self.validate_coa_coverage(&entries, coa.as_ref())?;
2940
2941 let subledger_reconciliation =
2943 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2944
2945 let (data_quality_stats, quality_issues) =
2947 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2948
2949 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2951
2952 {
2954 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2959 for je in &entries {
2960 if je.header.is_fraud || je.header.is_anomaly {
2961 continue;
2962 }
2963 let diff = (je.total_debit() - je.total_credit()).abs();
2964 if diff > tolerance {
2965 unbalanced_clean += 1;
2966 if unbalanced_clean <= 3 {
2967 warn!(
2968 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2969 je.header.document_id,
2970 je.total_debit(),
2971 je.total_credit(),
2972 diff
2973 );
2974 }
2975 }
2976 }
2977 if unbalanced_clean > 0 {
2978 return Err(datasynth_core::error::SynthError::generation(format!(
2979 "{} non-anomaly JEs are unbalanced (debits != credits). \
2980 First few logged above. Tolerance={}",
2981 unbalanced_clean, tolerance
2982 )));
2983 }
2984 debug!(
2985 "Phase 10c: All {} non-anomaly JEs individually balanced",
2986 entries
2987 .iter()
2988 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2989 .count()
2990 );
2991
2992 let company_codes: Vec<String> = self
2994 .config
2995 .companies
2996 .iter()
2997 .map(|c| c.code.clone())
2998 .collect();
2999 for company_code in &company_codes {
3000 let mut assets = rust_decimal::Decimal::ZERO;
3001 let mut liab_equity = rust_decimal::Decimal::ZERO;
3002
3003 for entry in &entries {
3004 if entry.header.company_code != *company_code {
3005 continue;
3006 }
3007 for line in &entry.lines {
3008 let acct = &line.gl_account;
3009 let net = line.debit_amount - line.credit_amount;
3010 if acct.starts_with('1') {
3012 assets += net;
3013 }
3014 else if acct.starts_with('2') || acct.starts_with('3') {
3016 liab_equity -= net; }
3018 }
3021 }
3022
3023 let bs_diff = (assets - liab_equity).abs();
3024 if bs_diff > tolerance {
3025 warn!(
3026 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3027 revenue/expense closing entries may not fully offset",
3028 company_code, assets, liab_equity, bs_diff
3029 );
3030 } else {
3034 debug!(
3035 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3036 company_code, assets, liab_equity, bs_diff
3037 );
3038 }
3039 }
3040
3041 info!("Phase 10c: All generation-time accounting assertions passed");
3042 }
3043
3044 let audit = self.phase_audit_data(&entries, &mut stats)?;
3046
3047 let mut banking = self.phase_banking_data(&mut stats)?;
3049
3050 if self.phase_config.generate_banking
3055 && !document_flows.payments.is_empty()
3056 && !banking.accounts.is_empty()
3057 {
3058 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3059 if bridge_rate > 0.0 {
3060 let mut bridge =
3061 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3062 self.seed,
3063 );
3064 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3065 &document_flows.payments,
3066 &banking.customers,
3067 &banking.accounts,
3068 bridge_rate,
3069 );
3070 info!(
3071 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3072 bridge_stats.bridged_count,
3073 bridge_stats.transactions_emitted,
3074 bridge_stats.fraud_propagated,
3075 );
3076 let bridged_count = bridged_txns.len();
3077 banking.transactions.extend(bridged_txns);
3078
3079 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3082 datasynth_banking::generators::velocity_computer::compute_velocity_features(
3083 &mut banking.transactions,
3084 );
3085 }
3086
3087 banking.suspicious_count = banking
3089 .transactions
3090 .iter()
3091 .filter(|t| t.is_suspicious)
3092 .count();
3093 stats.banking_transaction_count = banking.transactions.len();
3094 stats.banking_suspicious_count = banking.suspicious_count;
3095 }
3096 }
3097
3098 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3100
3101 self.phase_llm_enrichment(&mut stats);
3103
3104 self.phase_diffusion_enhancement(&entries, &mut stats);
3106
3107 self.phase_causal_overlay(&mut stats);
3109
3110 let mut financial_reporting = self.phase_financial_reporting(
3114 &document_flows,
3115 &entries,
3116 &coa,
3117 &hr,
3118 &audit,
3119 &mut stats,
3120 )?;
3121
3122 {
3124 use datasynth_core::models::StatementType;
3125 for stmt in &financial_reporting.consolidated_statements {
3126 if stmt.statement_type == StatementType::BalanceSheet {
3127 let total_assets: rust_decimal::Decimal = stmt
3128 .line_items
3129 .iter()
3130 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3131 .map(|li| li.amount)
3132 .sum();
3133 let total_le: rust_decimal::Decimal = stmt
3134 .line_items
3135 .iter()
3136 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3137 .map(|li| li.amount)
3138 .sum();
3139 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3140 warn!(
3141 "BS equation imbalance: assets={}, L+E={}",
3142 total_assets, total_le
3143 );
3144 }
3145 }
3146 }
3147 }
3148
3149 let accounting_standards =
3151 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3152
3153 if !accounting_standards.ecl_journal_entries.is_empty() {
3155 debug!(
3156 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3157 accounting_standards.ecl_journal_entries.len()
3158 );
3159 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3160 }
3161
3162 if !accounting_standards.provision_journal_entries.is_empty() {
3164 debug!(
3165 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3166 accounting_standards.provision_journal_entries.len()
3167 );
3168 entries.extend(
3169 accounting_standards
3170 .provision_journal_entries
3171 .iter()
3172 .cloned(),
3173 );
3174 }
3175
3176 let mut ocpm = self.phase_ocpm_events(
3178 &document_flows,
3179 &sourcing,
3180 &hr,
3181 &manufacturing_snap,
3182 &banking,
3183 &audit,
3184 &financial_reporting,
3185 &mut stats,
3186 )?;
3187
3188 if let Some(ref event_log) = ocpm.event_log {
3190 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3191 }
3192
3193 if let Some(ref event_log) = ocpm.event_log {
3195 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3197 std::collections::HashMap::new();
3198 for (idx, event) in event_log.events.iter().enumerate() {
3199 if let Some(ref doc_ref) = event.document_ref {
3200 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3201 }
3202 }
3203
3204 if !doc_index.is_empty() {
3205 let mut annotated = 0usize;
3206 for entry in &mut entries {
3207 let doc_id_str = entry.header.document_id.to_string();
3208 let mut matched_indices: Vec<usize> = Vec::new();
3210 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3211 matched_indices.extend(indices);
3212 }
3213 if let Some(ref reference) = entry.header.reference {
3214 let bare_ref = reference
3215 .find(':')
3216 .map(|i| &reference[i + 1..])
3217 .unwrap_or(reference.as_str());
3218 if let Some(indices) = doc_index.get(bare_ref) {
3219 for &idx in indices {
3220 if !matched_indices.contains(&idx) {
3221 matched_indices.push(idx);
3222 }
3223 }
3224 }
3225 }
3226 if !matched_indices.is_empty() {
3228 for &idx in &matched_indices {
3229 let event = &event_log.events[idx];
3230 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3231 entry.header.ocpm_event_ids.push(event.event_id);
3232 }
3233 for obj_ref in &event.object_refs {
3234 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3235 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3236 }
3237 }
3238 if entry.header.ocpm_case_id.is_none() {
3239 entry.header.ocpm_case_id = event.case_id;
3240 }
3241 }
3242 annotated += 1;
3243 }
3244 }
3245 debug!(
3246 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3247 annotated
3248 );
3249 }
3250 }
3251
3252 if let Some(ref mut event_log) = ocpm.event_log {
3256 let synthesized =
3257 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3258 if synthesized > 0 {
3259 info!(
3260 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3261 );
3262 }
3263
3264 let anomaly_events =
3269 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3270 if anomaly_events > 0 {
3271 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3272 }
3273
3274 let p2p_cfg = &self.config.ocpm.p2p_process;
3279 let any_imperfection = p2p_cfg.rework_probability > 0.0
3280 || p2p_cfg.skip_step_probability > 0.0
3281 || p2p_cfg.out_of_order_probability > 0.0;
3282 if any_imperfection {
3283 use rand_chacha::rand_core::SeedableRng;
3284 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3285 rework_rate: p2p_cfg.rework_probability,
3286 skip_rate: p2p_cfg.skip_step_probability,
3287 out_of_order_rate: p2p_cfg.out_of_order_probability,
3288 };
3289 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3290 let stats =
3291 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3292 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3293 info!(
3294 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3295 stats.rework, stats.skipped, stats.out_of_order
3296 );
3297 }
3298 }
3299 }
3300
3301 let sales_kpi_budgets =
3303 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3304
3305 let treasury =
3309 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3310
3311 if !treasury.journal_entries.is_empty() {
3313 debug!(
3314 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3315 treasury.journal_entries.len()
3316 );
3317 entries.extend(treasury.journal_entries.iter().cloned());
3318 }
3319
3320 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3322
3323 if !tax.tax_posting_journal_entries.is_empty() {
3325 debug!(
3326 "Merging {} tax posting JEs into GL",
3327 tax.tax_posting_journal_entries.len()
3328 );
3329 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3330 }
3331
3332 {
3350 use datasynth_core::fraud_bias::{
3351 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3352 };
3353 use rand_chacha::rand_core::SeedableRng;
3354 let cfg = FraudBehavioralBiasConfig::default();
3355 if cfg.enabled {
3356 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3357 let mut swept = 0usize;
3358 for entry in entries.iter_mut() {
3359 if entry.header.is_fraud && !entry.header.is_anomaly {
3360 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3361 swept += 1;
3362 }
3363 }
3364 if swept > 0 {
3365 info!(
3366 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3367 non-anomaly fraud entries (covers late-added JEs from \
3368 ECL / provisions / treasury / tax / period-close)"
3369 );
3370 }
3371 }
3372 }
3373
3374 {
3378 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3379
3380 let framework_str = {
3381 use datasynth_config::schema::AccountingFrameworkConfig;
3382 match self
3383 .config
3384 .accounting_standards
3385 .framework
3386 .unwrap_or_default()
3387 {
3388 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3389 "IFRS"
3390 }
3391 _ => "US_GAAP",
3392 }
3393 };
3394
3395 let depreciation_total: rust_decimal::Decimal = entries
3397 .iter()
3398 .filter(|je| je.header.document_type == "CL")
3399 .flat_map(|je| je.lines.iter())
3400 .filter(|l| l.gl_account.starts_with("6000"))
3401 .map(|l| l.debit_amount)
3402 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3403
3404 let interest_paid: rust_decimal::Decimal = entries
3406 .iter()
3407 .flat_map(|je| je.lines.iter())
3408 .filter(|l| l.gl_account.starts_with("7100"))
3409 .map(|l| l.debit_amount)
3410 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3411
3412 let tax_paid: rust_decimal::Decimal = entries
3414 .iter()
3415 .flat_map(|je| je.lines.iter())
3416 .filter(|l| l.gl_account.starts_with("8000"))
3417 .map(|l| l.debit_amount)
3418 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3419
3420 let capex: rust_decimal::Decimal = entries
3422 .iter()
3423 .flat_map(|je| je.lines.iter())
3424 .filter(|l| l.gl_account.starts_with("1500"))
3425 .map(|l| l.debit_amount)
3426 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3427
3428 let dividends_paid: rust_decimal::Decimal = entries
3430 .iter()
3431 .flat_map(|je| je.lines.iter())
3432 .filter(|l| l.gl_account == "2170")
3433 .map(|l| l.debit_amount)
3434 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3435
3436 let cf_data = CashFlowSourceData {
3437 depreciation_total,
3438 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3440 delta_ap: rust_decimal::Decimal::ZERO,
3441 delta_inventory: rust_decimal::Decimal::ZERO,
3442 capex,
3443 debt_issuance: rust_decimal::Decimal::ZERO,
3444 debt_repayment: rust_decimal::Decimal::ZERO,
3445 interest_paid,
3446 tax_paid,
3447 dividends_paid,
3448 framework: framework_str.to_string(),
3449 };
3450
3451 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3452 if !enhanced_cf_items.is_empty() {
3453 use datasynth_core::models::StatementType;
3455 let merge_count = enhanced_cf_items.len();
3456 for stmt in financial_reporting
3457 .financial_statements
3458 .iter_mut()
3459 .chain(financial_reporting.consolidated_statements.iter_mut())
3460 .chain(
3461 financial_reporting
3462 .standalone_statements
3463 .values_mut()
3464 .flat_map(|v| v.iter_mut()),
3465 )
3466 {
3467 if stmt.statement_type == StatementType::CashFlowStatement {
3468 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3469 }
3470 }
3471 info!(
3472 "Enhanced cash flow: {} supplementary items merged into CF statements",
3473 merge_count
3474 );
3475 }
3476 }
3477
3478 self.generate_notes_to_financial_statements(
3481 &mut financial_reporting,
3482 &accounting_standards,
3483 &tax,
3484 &hr,
3485 &audit,
3486 &treasury,
3487 );
3488
3489 if self.config.companies.len() >= 2 && !entries.is_empty() {
3493 let companies: Vec<(String, String)> = self
3494 .config
3495 .companies
3496 .iter()
3497 .map(|c| (c.code.clone(), c.name.clone()))
3498 .collect();
3499 let ic_elim: rust_decimal::Decimal =
3500 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3501 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3502 .unwrap_or(NaiveDate::MIN);
3503 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3504 let period_label = format!(
3505 "{}-{:02}",
3506 end_date.year(),
3507 (end_date - chrono::Days::new(1)).month()
3508 );
3509
3510 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3511 let (je_segments, je_recon) =
3512 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3513 if !je_segments.is_empty() {
3514 info!(
3515 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3516 je_segments.len(),
3517 ic_elim,
3518 );
3519 if financial_reporting.segment_reports.is_empty() {
3521 financial_reporting.segment_reports = je_segments;
3522 financial_reporting.segment_reconciliations = vec![je_recon];
3523 } else {
3524 financial_reporting.segment_reports.extend(je_segments);
3525 financial_reporting.segment_reconciliations.push(je_recon);
3526 }
3527 }
3528 }
3529
3530 let esg_snap =
3532 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3533
3534 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3536
3537 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3539
3540 let disruption_events = self.phase_disruption_events(&mut stats)?;
3542
3543 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3545
3546 let (entity_relationship_graph, cross_process_links) =
3548 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3549
3550 let industry_output = self.phase_industry_data(&mut stats);
3552
3553 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3555
3556 if self.config.diffusion.enabled
3574 && (self.config.diffusion.backend == "neural"
3575 || self.config.diffusion.backend == "hybrid")
3576 {
3577 let neural = &self.config.diffusion.neural;
3578 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3579 stats.neural_hybrid_weight = Some(weight);
3580 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3581 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3582 warn!(
3583 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3584 the neural/hybrid training path is not yet shipped. Config \
3585 is captured in stats (weight={weight:.2}, strategy={}, \
3586 columns={}) but no neural training runs. Statistical \
3587 diffusion (backend='statistical') continues to work.",
3588 self.config.diffusion.backend,
3589 neural.hybrid_strategy,
3590 neural.neural_columns.len(),
3591 );
3592 }
3593
3594 self.phase_hypergraph_export(
3596 &coa,
3597 &entries,
3598 &document_flows,
3599 &sourcing,
3600 &hr,
3601 &manufacturing_snap,
3602 &banking,
3603 &audit,
3604 &financial_reporting,
3605 &ocpm,
3606 &compliance_regulations,
3607 &mut stats,
3608 )?;
3609
3610 if self.phase_config.generate_graph_export {
3613 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3614 }
3615
3616 if self.config.streaming.enabled {
3618 info!("Note: streaming config is enabled but batch mode does not use it");
3619 }
3620 if self.config.vendor_network.enabled {
3621 debug!("Vendor network config available; relationship graph generation is partial");
3622 }
3623 if self.config.customer_segmentation.enabled {
3624 debug!("Customer segmentation config available; segment-aware generation is partial");
3625 }
3626
3627 let resource_stats = self.resource_guard.stats();
3629 info!(
3630 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3631 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3632 resource_stats.disk.estimated_bytes_written,
3633 resource_stats.degradation_level
3634 );
3635
3636 if let Some(ref sink) = self.phase_sink {
3638 if let Err(e) = sink.flush() {
3639 warn!("Stream sink flush failed: {e}");
3640 }
3641 }
3642
3643 let lineage = self.build_lineage_graph();
3645
3646 let gate_result = if self.config.quality_gates.enabled {
3648 let profile_name = &self.config.quality_gates.profile;
3649 match datasynth_eval::gates::get_profile(profile_name) {
3650 Some(profile) => {
3651 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3653
3654 if balance_validation.validated {
3656 eval.coherence.balance =
3657 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3658 equation_balanced: balance_validation.is_balanced,
3659 max_imbalance: (balance_validation.total_debits
3660 - balance_validation.total_credits)
3661 .abs(),
3662 periods_evaluated: 1,
3663 periods_imbalanced: if balance_validation.is_balanced {
3664 0
3665 } else {
3666 1
3667 },
3668 period_results: Vec::new(),
3669 companies_evaluated: self.config.companies.len(),
3670 });
3671 }
3672
3673 eval.coherence.passes = balance_validation.is_balanced;
3675 if !balance_validation.is_balanced {
3676 eval.coherence
3677 .failures
3678 .push("Balance sheet equation not satisfied".to_string());
3679 }
3680
3681 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3683 eval.statistical.passes = !entries.is_empty();
3684
3685 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3688
3689 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3690 info!(
3691 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3692 profile_name, result.gates_passed, result.gates_total, result.summary
3693 );
3694 Some(result)
3695 }
3696 None => {
3697 warn!(
3698 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3699 profile_name
3700 );
3701 None
3702 }
3703 }
3704 } else {
3705 None
3706 };
3707
3708 let internal_controls = if self.config.internal_controls.enabled {
3710 InternalControl::standard_controls()
3711 } else {
3712 Vec::new()
3713 };
3714
3715 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3719
3720 let statistical_validation = self.phase_statistical_validation(&entries)?;
3725
3726 let interconnectivity = self.phase_interconnectivity();
3730
3731 Ok(EnhancedGenerationResult {
3732 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3733 master_data: std::mem::take(&mut self.master_data),
3734 document_flows,
3735 subledger,
3736 ocpm,
3737 audit,
3738 banking,
3739 graph_export,
3740 sourcing,
3741 financial_reporting,
3742 hr,
3743 accounting_standards,
3744 manufacturing: manufacturing_snap,
3745 sales_kpi_budgets,
3746 tax,
3747 esg: esg_snap,
3748 treasury,
3749 project_accounting,
3750 process_evolution,
3751 organizational_events,
3752 disruption_events,
3753 intercompany,
3754 journal_entries: entries,
3755 anomaly_labels,
3756 balance_validation,
3757 data_quality_stats,
3758 quality_issues,
3759 statistics: stats,
3760 lineage: Some(lineage),
3761 gate_result,
3762 internal_controls,
3763 sod_violations,
3764 opening_balances,
3765 subledger_reconciliation,
3766 counterfactual_pairs,
3767 red_flags,
3768 collusion_rings,
3769 temporal_vendor_chains,
3770 entity_relationship_graph,
3771 cross_process_links,
3772 industry_output,
3773 compliance_regulations,
3774 analytics_metadata,
3775 statistical_validation,
3776 interconnectivity,
3777 })
3778 }
3779
3780 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3784 use rand::{RngExt, SeedableRng};
3785 use rand_chacha::ChaCha8Rng;
3786
3787 let mut snap = InterconnectivitySnapshot::default();
3788 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3789
3790 let vn = &self.config.vendor_network;
3792 if vn.enabled {
3793 let total = self.master_data.vendors.len();
3794 if total > 0 {
3795 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3796 let remaining_after_t1 = total.saturating_sub(tier1_count);
3797 let depth = vn.depth.clamp(1, 3);
3798 let tier2_count = if depth >= 2 {
3799 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3800 (tier1_count * avg).min(remaining_after_t1)
3801 } else {
3802 0
3803 };
3804 let tier3_count = total
3805 .saturating_sub(tier1_count)
3806 .saturating_sub(tier2_count);
3807
3808 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3809 let tier = if idx < tier1_count {
3810 1
3811 } else if idx < tier1_count + tier2_count {
3812 2
3813 } else {
3814 3
3815 };
3816 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3817
3818 let cl = &vn.clusters;
3820 let roll: f64 = rng.random();
3821 let cluster = if roll < cl.reliable_strategic {
3822 "reliable_strategic"
3823 } else if roll < cl.reliable_strategic + cl.standard_operational {
3824 "standard_operational"
3825 } else if roll
3826 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3827 {
3828 "transactional"
3829 } else {
3830 "problematic"
3831 };
3832 snap.vendor_clusters
3833 .push((vendor.vendor_id.clone(), cluster.to_string()));
3834 }
3835 let _ = tier3_count; }
3837 }
3838
3839 let cs = &self.config.customer_segmentation;
3841 if cs.enabled {
3842 let seg = &cs.value_segments;
3843 for customer in &self.master_data.customers {
3844 let roll: f64 = rng.random();
3845 let value_segment = if roll < seg.enterprise.customer_share {
3846 "enterprise"
3847 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3848 "mid_market"
3849 } else if roll
3850 < seg.enterprise.customer_share
3851 + seg.mid_market.customer_share
3852 + seg.smb.customer_share
3853 {
3854 "smb"
3855 } else {
3856 "consumer"
3857 };
3858 snap.customer_value_segments
3859 .push((customer.customer_id.clone(), value_segment.to_string()));
3860
3861 let roll2: f64 = rng.random();
3862 let life = &cs.lifecycle;
3863 let lifecycle = if roll2 < life.prospect_rate {
3864 "prospect"
3865 } else if roll2 < life.prospect_rate + life.new_rate {
3866 "new"
3867 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3868 "growth"
3869 } else if roll2
3870 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3871 {
3872 "mature"
3873 } else if roll2
3874 < life.prospect_rate
3875 + life.new_rate
3876 + life.growth_rate
3877 + life.mature_rate
3878 + life.at_risk_rate
3879 {
3880 "at_risk"
3881 } else if roll2
3882 < life.prospect_rate
3883 + life.new_rate
3884 + life.growth_rate
3885 + life.mature_rate
3886 + life.at_risk_rate
3887 + life.churned_rate
3888 {
3889 "churned"
3890 } else {
3891 "won_back"
3892 };
3893 snap.customer_lifecycle_stages
3894 .push((customer.customer_id.clone(), lifecycle.to_string()));
3895 }
3896 }
3897
3898 let is = &self.config.industry_specific;
3900 if is.enabled {
3901 snap.industry_metadata.push(format!(
3902 "industry_specific.enabled=true (industry={:?})",
3903 self.config.global.industry
3904 ));
3905 }
3906
3907 snap
3908 }
3909
3910 fn phase_chart_of_accounts(
3916 &mut self,
3917 stats: &mut EnhancedGenerationStatistics,
3918 ) -> SynthResult<Arc<ChartOfAccounts>> {
3919 info!("Phase 1: Generating Chart of Accounts");
3920 let coa = self.generate_coa()?;
3921 stats.accounts_count = coa.account_count();
3922 info!(
3923 "Chart of Accounts generated: {} accounts",
3924 stats.accounts_count
3925 );
3926 self.check_resources_with_log("post-coa")?;
3927 Ok(coa)
3928 }
3929
3930 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3932 if self.phase_config.generate_master_data {
3933 info!("Phase 2: Generating Master Data");
3934 self.generate_master_data()?;
3935 stats.vendor_count = self.master_data.vendors.len();
3936 stats.customer_count = self.master_data.customers.len();
3937 stats.material_count = self.master_data.materials.len();
3938 stats.asset_count = self.master_data.assets.len();
3939 stats.employee_count = self.master_data.employees.len();
3940 info!(
3941 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3942 stats.vendor_count, stats.customer_count, stats.material_count,
3943 stats.asset_count, stats.employee_count
3944 );
3945 self.check_resources_with_log("post-master-data")?;
3946 } else {
3947 debug!("Phase 2: Skipped (master data generation disabled)");
3948 }
3949 Ok(())
3950 }
3951
3952 fn phase_document_flows(
3954 &mut self,
3955 stats: &mut EnhancedGenerationStatistics,
3956 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3957 let mut document_flows = DocumentFlowSnapshot::default();
3958 let mut subledger = SubledgerSnapshot::default();
3959 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3962
3963 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3964 info!("Phase 3: Generating Document Flows");
3965 self.generate_document_flows(&mut document_flows)?;
3966 stats.p2p_chain_count = document_flows.p2p_chains.len();
3967 stats.o2c_chain_count = document_flows.o2c_chains.len();
3968 info!(
3969 "Document flows generated: {} P2P chains, {} O2C chains",
3970 stats.p2p_chain_count, stats.o2c_chain_count
3971 );
3972
3973 debug!("Phase 3b: Linking document flows to subledgers");
3975 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3976 stats.ap_invoice_count = subledger.ap_invoices.len();
3977 stats.ar_invoice_count = subledger.ar_invoices.len();
3978 debug!(
3979 "Subledgers linked: {} AP invoices, {} AR invoices",
3980 stats.ap_invoice_count, stats.ar_invoice_count
3981 );
3982
3983 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3988 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3989 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3990 debug!("Payment settlements applied to AP and AR subledgers");
3991
3992 if let Ok(start_date) =
3995 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3996 {
3997 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3998 - chrono::Days::new(1);
3999 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4000 for company in &self.config.companies {
4007 let ar_report = ARAgingReport::from_invoices(
4008 company.code.clone(),
4009 &subledger.ar_invoices,
4010 as_of_date,
4011 );
4012 subledger.ar_aging_reports.push(ar_report);
4013
4014 let ap_report = APAgingReport::from_invoices(
4015 company.code.clone(),
4016 &subledger.ap_invoices,
4017 as_of_date,
4018 );
4019 subledger.ap_aging_reports.push(ap_report);
4020 }
4021 debug!(
4022 "AR/AP aging reports built: {} AR, {} AP",
4023 subledger.ar_aging_reports.len(),
4024 subledger.ap_aging_reports.len()
4025 );
4026
4027 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4029 {
4030 use datasynth_generators::DunningGenerator;
4031 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4032 for company in &self.config.companies {
4033 let currency = company.currency.as_str();
4034 let mut company_invoices: Vec<
4037 datasynth_core::models::subledger::ar::ARInvoice,
4038 > = subledger
4039 .ar_invoices
4040 .iter()
4041 .filter(|inv| inv.company_code == company.code)
4042 .cloned()
4043 .collect();
4044
4045 if company_invoices.is_empty() {
4046 continue;
4047 }
4048
4049 let result = dunning_gen.execute_dunning_run(
4050 &company.code,
4051 as_of_date,
4052 &mut company_invoices,
4053 currency,
4054 );
4055
4056 for updated in &company_invoices {
4058 if let Some(orig) = subledger
4059 .ar_invoices
4060 .iter_mut()
4061 .find(|i| i.invoice_number == updated.invoice_number)
4062 {
4063 orig.dunning_info = updated.dunning_info.clone();
4064 }
4065 }
4066
4067 subledger.dunning_runs.push(result.dunning_run);
4068 subledger.dunning_letters.extend(result.letters);
4069 dunning_journal_entries.extend(result.journal_entries);
4071 }
4072 debug!(
4073 "Dunning runs complete: {} runs, {} letters",
4074 subledger.dunning_runs.len(),
4075 subledger.dunning_letters.len()
4076 );
4077 }
4078 }
4079
4080 self.check_resources_with_log("post-document-flows")?;
4081 } else {
4082 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4083 }
4084
4085 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4087 if !self.master_data.assets.is_empty() {
4088 debug!("Generating FA subledger records");
4089 let company_code = self
4090 .config
4091 .companies
4092 .first()
4093 .map(|c| c.code.as_str())
4094 .unwrap_or("1000");
4095 let currency = self
4096 .config
4097 .companies
4098 .first()
4099 .map(|c| c.currency.as_str())
4100 .unwrap_or("USD");
4101
4102 let mut fa_gen = datasynth_generators::FAGenerator::new(
4103 datasynth_generators::FAGeneratorConfig::default(),
4104 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4105 );
4106
4107 for asset in &self.master_data.assets {
4108 let (record, je) = fa_gen.generate_asset_acquisition(
4109 company_code,
4110 &format!("{:?}", asset.asset_class),
4111 &asset.description,
4112 asset.acquisition_date,
4113 currency,
4114 asset.cost_center.as_deref(),
4115 );
4116 subledger.fa_records.push(record);
4117 fa_journal_entries.push(je);
4118 }
4119
4120 stats.fa_subledger_count = subledger.fa_records.len();
4121 debug!(
4122 "FA subledger records generated: {} (with {} acquisition JEs)",
4123 stats.fa_subledger_count,
4124 fa_journal_entries.len()
4125 );
4126 }
4127
4128 if !self.master_data.materials.is_empty() {
4130 debug!("Generating Inventory subledger records");
4131 let first_company = self.config.companies.first();
4132 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4133 let inv_currency = first_company
4134 .map(|c| c.currency.clone())
4135 .unwrap_or_else(|| "USD".to_string());
4136
4137 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4138 datasynth_generators::InventoryGeneratorConfig::default(),
4139 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4140 inv_currency.clone(),
4141 );
4142
4143 for (i, material) in self.master_data.materials.iter().enumerate() {
4144 let plant = format!("PLANT{:02}", (i % 3) + 1);
4145 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4146 let initial_qty = rust_decimal::Decimal::from(
4147 material
4148 .safety_stock
4149 .to_string()
4150 .parse::<i64>()
4151 .unwrap_or(100),
4152 );
4153
4154 let position = inv_gen.generate_position(
4155 company_code,
4156 &plant,
4157 &storage_loc,
4158 &material.material_id,
4159 &material.description,
4160 initial_qty,
4161 Some(material.standard_cost),
4162 &inv_currency,
4163 );
4164 subledger.inventory_positions.push(position);
4165 }
4166
4167 stats.inventory_subledger_count = subledger.inventory_positions.len();
4168 debug!(
4169 "Inventory subledger records generated: {}",
4170 stats.inventory_subledger_count
4171 );
4172 }
4173
4174 if !subledger.fa_records.is_empty() {
4176 if let Ok(start_date) =
4177 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4178 {
4179 let company_code = self
4180 .config
4181 .companies
4182 .first()
4183 .map(|c| c.code.as_str())
4184 .unwrap_or("1000");
4185 let fiscal_year = start_date.year();
4186 let start_period = start_date.month();
4187 let end_period =
4188 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4189
4190 let depr_cfg = FaDepreciationScheduleConfig {
4191 fiscal_year,
4192 start_period,
4193 end_period,
4194 seed_offset: 800,
4195 };
4196 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4197 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4198 let run_count = runs.len();
4199 subledger.depreciation_runs = runs;
4200 debug!(
4201 "Depreciation runs generated: {} runs for {} periods",
4202 run_count, self.config.global.period_months
4203 );
4204 }
4205 }
4206
4207 if !subledger.inventory_positions.is_empty() {
4209 if let Ok(start_date) =
4210 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4211 {
4212 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4213 - chrono::Days::new(1);
4214
4215 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4216 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4217
4218 for company in &self.config.companies {
4219 let result = inv_val_gen.generate(
4220 &company.code,
4221 &subledger.inventory_positions,
4222 as_of_date,
4223 );
4224 subledger.inventory_valuations.push(result);
4225 }
4226 debug!(
4227 "Inventory valuations generated: {} company reports",
4228 subledger.inventory_valuations.len()
4229 );
4230 }
4231 }
4232
4233 Ok((document_flows, subledger, fa_journal_entries))
4234 }
4235
4236 #[allow(clippy::too_many_arguments)]
4238 fn phase_ocpm_events(
4239 &mut self,
4240 document_flows: &DocumentFlowSnapshot,
4241 sourcing: &SourcingSnapshot,
4242 hr: &HrSnapshot,
4243 manufacturing: &ManufacturingSnapshot,
4244 banking: &BankingSnapshot,
4245 audit: &AuditSnapshot,
4246 financial_reporting: &FinancialReportingSnapshot,
4247 stats: &mut EnhancedGenerationStatistics,
4248 ) -> SynthResult<OcpmSnapshot> {
4249 let degradation = self.check_resources()?;
4250 if degradation >= DegradationLevel::Reduced {
4251 debug!(
4252 "Phase skipped due to resource pressure (degradation: {:?})",
4253 degradation
4254 );
4255 return Ok(OcpmSnapshot::default());
4256 }
4257 if self.phase_config.generate_ocpm_events {
4258 info!("Phase 3c: Generating OCPM Events");
4259 let ocpm_snapshot = self.generate_ocpm_events(
4260 document_flows,
4261 sourcing,
4262 hr,
4263 manufacturing,
4264 banking,
4265 audit,
4266 financial_reporting,
4267 )?;
4268 stats.ocpm_event_count = ocpm_snapshot.event_count;
4269 stats.ocpm_object_count = ocpm_snapshot.object_count;
4270 stats.ocpm_case_count = ocpm_snapshot.case_count;
4271 info!(
4272 "OCPM events generated: {} events, {} objects, {} cases",
4273 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4274 );
4275 self.check_resources_with_log("post-ocpm")?;
4276 Ok(ocpm_snapshot)
4277 } else {
4278 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4279 Ok(OcpmSnapshot::default())
4280 }
4281 }
4282
4283 fn phase_journal_entries(
4285 &mut self,
4286 coa: &Arc<ChartOfAccounts>,
4287 document_flows: &DocumentFlowSnapshot,
4288 _stats: &mut EnhancedGenerationStatistics,
4289 ) -> SynthResult<Vec<JournalEntry>> {
4290 let mut entries = Vec::new();
4291
4292 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4294 debug!("Phase 4a: Generating JEs from document flows");
4295 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4296 debug!("Generated {} JEs from document flows", flow_entries.len());
4297 entries.extend(flow_entries);
4298 }
4299
4300 if self.phase_config.generate_journal_entries {
4302 info!("Phase 4: Generating Journal Entries");
4303 let je_entries = self.generate_journal_entries(coa)?;
4304 info!("Generated {} standalone journal entries", je_entries.len());
4305 entries.extend(je_entries);
4306 } else {
4307 debug!("Phase 4: Skipped (journal entry generation disabled)");
4308 }
4309
4310 if let Some(ctx) = &self.shard_context {
4314 if !ctx.extra_journal_entries.is_empty() {
4315 debug!(
4316 "Phase 4c: appending {} shard-mode IC journal entries",
4317 ctx.extra_journal_entries.len()
4318 );
4319 entries.extend(ctx.extra_journal_entries.iter().cloned());
4320 }
4321 }
4322
4323 if !entries.is_empty() {
4324 self.check_resources_with_log("post-journal-entries")?;
4327 }
4328
4329 Ok(entries)
4330 }
4331
4332 fn phase_anomaly_injection(
4334 &mut self,
4335 entries: &mut [JournalEntry],
4336 actions: &DegradationActions,
4337 stats: &mut EnhancedGenerationStatistics,
4338 ) -> SynthResult<AnomalyLabels> {
4339 if self.phase_config.inject_anomalies
4340 && !entries.is_empty()
4341 && !actions.skip_anomaly_injection
4342 {
4343 info!("Phase 5: Injecting Anomalies");
4344 let result = self.inject_anomalies(entries)?;
4345 stats.anomalies_injected = result.labels.len();
4346 info!("Injected {} anomalies", stats.anomalies_injected);
4347 self.check_resources_with_log("post-anomaly-injection")?;
4348 Ok(result)
4349 } else if actions.skip_anomaly_injection {
4350 warn!("Phase 5: Skipped due to resource degradation");
4351 Ok(AnomalyLabels::default())
4352 } else {
4353 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4354 Ok(AnomalyLabels::default())
4355 }
4356 }
4357
4358 fn phase_balance_validation(
4360 &mut self,
4361 entries: &[JournalEntry],
4362 ) -> SynthResult<BalanceValidationResult> {
4363 if self.phase_config.validate_balances && !entries.is_empty() {
4364 debug!("Phase 6: Validating Balances");
4365 let balance_validation = self.validate_journal_entries(entries)?;
4366 if balance_validation.is_balanced {
4367 debug!("Balance validation passed");
4368 } else {
4369 warn!(
4370 "Balance validation found {} errors",
4371 balance_validation.validation_errors.len()
4372 );
4373 }
4374 Ok(balance_validation)
4375 } else {
4376 Ok(BalanceValidationResult::default())
4377 }
4378 }
4379
4380 fn validate_coa_coverage(
4387 &self,
4388 entries: &[JournalEntry],
4389 coa: &ChartOfAccounts,
4390 ) -> SynthResult<()> {
4391 if entries.is_empty() {
4392 return Ok(());
4393 }
4394 let coa_set: std::collections::HashSet<&str> = coa
4395 .accounts
4396 .iter()
4397 .map(|a| a.account_number.as_str())
4398 .collect();
4399 let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4400 for je in entries {
4401 for line in je.lines.iter() {
4402 if !coa_set.contains(line.gl_account.as_str()) {
4403 missing.insert(line.gl_account.clone());
4404 }
4405 }
4406 }
4407 if missing.is_empty() {
4408 debug!("COA coverage validation passed");
4409 return Ok(());
4410 }
4411 let msg = format!(
4412 "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4413 missing.len(),
4414 missing.iter().take(10).collect::<Vec<_>>()
4415 );
4416 if self.phase_config.validate_coa_coverage_strict {
4417 Err(SynthError::generation(msg))
4418 } else {
4419 warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4420 Ok(())
4421 }
4422 }
4423
4424 fn phase_data_quality_injection(
4426 &mut self,
4427 entries: &mut [JournalEntry],
4428 actions: &DegradationActions,
4429 stats: &mut EnhancedGenerationStatistics,
4430 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4431 if self.phase_config.inject_data_quality
4432 && !entries.is_empty()
4433 && !actions.skip_data_quality
4434 {
4435 info!("Phase 7: Injecting Data Quality Variations");
4436 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4437 stats.data_quality_issues = dq_stats.records_with_issues;
4438 info!("Injected {} data quality issues", stats.data_quality_issues);
4439 self.check_resources_with_log("post-data-quality")?;
4440 Ok((dq_stats, quality_issues))
4441 } else if actions.skip_data_quality {
4442 warn!("Phase 7: Skipped due to resource degradation");
4443 Ok((stats_with_denominator(entries.len()), Vec::new()))
4447 } else {
4448 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4449 Ok((stats_with_denominator(entries.len()), Vec::new()))
4450 }
4451 }
4452
4453 fn phase_period_close(
4463 &mut self,
4464 entries: &mut Vec<JournalEntry>,
4465 subledger: &SubledgerSnapshot,
4466 stats: &mut EnhancedGenerationStatistics,
4467 ) -> SynthResult<()> {
4468 if !self.phase_config.generate_period_close || entries.is_empty() {
4469 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4470 return Ok(());
4471 }
4472
4473 info!("Phase 10b: Generating period-close journal entries");
4474
4475 use datasynth_core::accounts::{
4476 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4477 };
4478 use rust_decimal::Decimal;
4479
4480 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4481 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4482 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4483 let close_date = end_date - chrono::Days::new(1);
4485
4486 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4491 .config
4492 .companies
4493 .iter()
4494 .map(|c| c.code.clone())
4495 .collect();
4496
4497 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4499 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4500
4501 let period_months = self.config.global.period_months;
4505 for asset in &subledger.fa_records {
4506 use datasynth_core::models::subledger::fa::AssetStatus;
4508 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4509 continue;
4510 }
4511 let useful_life_months = asset.useful_life_months();
4512 if useful_life_months == 0 {
4513 continue;
4515 }
4516 let salvage_value = asset.salvage_value();
4517 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4518 if depreciable_base == Decimal::ZERO {
4519 continue;
4520 }
4521 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4522 * Decimal::from(period_months))
4523 .round_dp(2);
4524 if period_depr <= Decimal::ZERO {
4525 continue;
4526 }
4527
4528 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4529 depr_header.document_type = "CL".to_string();
4530 depr_header.header_text = Some(format!(
4531 "Depreciation - {} {}",
4532 asset.asset_number, asset.description
4533 ));
4534 depr_header.created_by = "CLOSE_ENGINE".to_string();
4535 depr_header.source = TransactionSource::Automated;
4536 depr_header.business_process = Some(BusinessProcess::R2R);
4537
4538 let doc_id = depr_header.document_id;
4539 let mut depr_je = JournalEntry::new(depr_header);
4540
4541 depr_je.add_line(JournalEntryLine::debit(
4543 doc_id,
4544 1,
4545 expense_accounts::DEPRECIATION.to_string(),
4546 period_depr,
4547 ));
4548 depr_je.add_line(JournalEntryLine::credit(
4550 doc_id,
4551 2,
4552 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4553 period_depr,
4554 ));
4555
4556 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4557 close_jes.push(depr_je);
4558 }
4559
4560 if !subledger.fa_records.is_empty() {
4561 debug!(
4562 "Generated {} depreciation JEs from {} FA records",
4563 close_jes.len(),
4564 subledger.fa_records.len()
4565 );
4566 }
4567
4568 {
4572 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4573 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4574 if let Some(ctx) = &self.temporal_context {
4577 accrual_gen.set_temporal_context(Arc::clone(ctx));
4578 }
4579
4580 let accrual_items: &[(&str, &str, &str)] = &[
4582 ("Accrued Utilities", "6200", "2100"),
4583 ("Accrued Rent", "6300", "2100"),
4584 ("Accrued Interest", "6100", "2150"),
4585 ];
4586
4587 for company_code in &company_codes {
4588 let company_revenue: Decimal = entries
4590 .iter()
4591 .filter(|e| e.header.company_code == *company_code)
4592 .flat_map(|e| e.lines.iter())
4593 .filter(|l| l.gl_account.starts_with('4'))
4594 .map(|l| l.credit_amount - l.debit_amount)
4595 .fold(Decimal::ZERO, |acc, v| acc + v);
4596
4597 if company_revenue <= Decimal::ZERO {
4598 continue;
4599 }
4600
4601 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4603 if accrual_base <= Decimal::ZERO {
4604 continue;
4605 }
4606
4607 for (description, expense_acct, liability_acct) in accrual_items {
4608 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4609 company_code,
4610 description,
4611 accrual_base,
4612 expense_acct,
4613 liability_acct,
4614 close_date,
4615 None,
4616 );
4617 close_jes.push(accrual_je);
4618 if let Some(rev_je) = reversal_je {
4619 close_jes.push(rev_je);
4620 }
4621 }
4622 }
4623
4624 debug!(
4625 "Generated accrual entries for {} companies",
4626 company_codes.len()
4627 );
4628 }
4629
4630 for company_code in &company_codes {
4631 let mut total_revenue = Decimal::ZERO;
4636 let mut total_expenses = Decimal::ZERO;
4637
4638 for entry in entries.iter() {
4639 if entry.header.company_code != *company_code {
4640 continue;
4641 }
4642 for line in &entry.lines {
4643 let category = AccountCategory::from_account(&line.gl_account);
4644 match category {
4645 AccountCategory::Revenue => {
4646 total_revenue += line.credit_amount - line.debit_amount;
4648 }
4649 AccountCategory::Cogs
4650 | AccountCategory::OperatingExpense
4651 | AccountCategory::OtherIncomeExpense
4652 | AccountCategory::Tax => {
4653 total_expenses += line.debit_amount - line.credit_amount;
4655 }
4656 _ => {}
4657 }
4658 }
4659 }
4660
4661 let pre_tax_income = total_revenue - total_expenses;
4662
4663 if pre_tax_income == Decimal::ZERO {
4665 debug!(
4666 "Company {}: no pre-tax income, skipping period close",
4667 company_code
4668 );
4669 continue;
4670 }
4671
4672 if pre_tax_income > Decimal::ZERO {
4674 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4676
4677 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4678 tax_header.document_type = "CL".to_string();
4679 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4680 tax_header.created_by = "CLOSE_ENGINE".to_string();
4681 tax_header.source = TransactionSource::Automated;
4682 tax_header.business_process = Some(BusinessProcess::R2R);
4683
4684 let doc_id = tax_header.document_id;
4685 let mut tax_je = JournalEntry::new(tax_header);
4686
4687 tax_je.add_line(JournalEntryLine::debit(
4689 doc_id,
4690 1,
4691 tax_accounts::TAX_EXPENSE.to_string(),
4692 tax_amount,
4693 ));
4694 tax_je.add_line(JournalEntryLine::credit(
4696 doc_id,
4697 2,
4698 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4699 tax_amount,
4700 ));
4701
4702 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4703 close_jes.push(tax_je);
4704 } else {
4705 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4708 if dta_amount > Decimal::ZERO {
4709 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4710 dta_header.document_type = "CL".to_string();
4711 dta_header.header_text =
4712 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4713 dta_header.created_by = "CLOSE_ENGINE".to_string();
4714 dta_header.source = TransactionSource::Automated;
4715 dta_header.business_process = Some(BusinessProcess::R2R);
4716
4717 let doc_id = dta_header.document_id;
4718 let mut dta_je = JournalEntry::new(dta_header);
4719
4720 dta_je.add_line(JournalEntryLine::debit(
4722 doc_id,
4723 1,
4724 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4725 dta_amount,
4726 ));
4727 dta_je.add_line(JournalEntryLine::credit(
4730 doc_id,
4731 2,
4732 tax_accounts::TAX_EXPENSE.to_string(),
4733 dta_amount,
4734 ));
4735
4736 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4737 close_jes.push(dta_je);
4738 debug!(
4739 "Company {}: loss year — recognised DTA of {}",
4740 company_code, dta_amount
4741 );
4742 }
4743 }
4744
4745 let tax_provision = if pre_tax_income > Decimal::ZERO {
4751 (pre_tax_income * tax_rate).round_dp(2)
4752 } else {
4753 Decimal::ZERO
4754 };
4755 let net_income = pre_tax_income - tax_provision;
4756
4757 if net_income > Decimal::ZERO {
4758 use datasynth_generators::DividendGenerator;
4759 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4761 let currency_str = self
4762 .config
4763 .companies
4764 .iter()
4765 .find(|c| c.code == *company_code)
4766 .map(|c| c.currency.as_str())
4767 .unwrap_or("USD");
4768 let div_result = div_gen.generate(
4769 company_code,
4770 close_date,
4771 Decimal::new(1, 0), dividend_amount,
4773 currency_str,
4774 );
4775 let div_je_count = div_result.journal_entries.len();
4776 close_jes.extend(div_result.journal_entries);
4777 debug!(
4778 "Company {}: declared dividend of {} ({} JEs)",
4779 company_code, dividend_amount, div_je_count
4780 );
4781 }
4782
4783 if net_income != Decimal::ZERO {
4788 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4789 close_header.document_type = "CL".to_string();
4790 close_header.header_text =
4791 Some(format!("Income statement close - {}", company_code));
4792 close_header.created_by = "CLOSE_ENGINE".to_string();
4793 close_header.source = TransactionSource::Automated;
4794 close_header.business_process = Some(BusinessProcess::R2R);
4795
4796 let doc_id = close_header.document_id;
4797 let mut close_je = JournalEntry::new(close_header);
4798
4799 let abs_net_income = net_income.abs();
4800
4801 if net_income > Decimal::ZERO {
4802 close_je.add_line(JournalEntryLine::debit(
4804 doc_id,
4805 1,
4806 equity_accounts::INCOME_SUMMARY.to_string(),
4807 abs_net_income,
4808 ));
4809 close_je.add_line(JournalEntryLine::credit(
4810 doc_id,
4811 2,
4812 equity_accounts::RETAINED_EARNINGS.to_string(),
4813 abs_net_income,
4814 ));
4815 } else {
4816 close_je.add_line(JournalEntryLine::debit(
4818 doc_id,
4819 1,
4820 equity_accounts::RETAINED_EARNINGS.to_string(),
4821 abs_net_income,
4822 ));
4823 close_je.add_line(JournalEntryLine::credit(
4824 doc_id,
4825 2,
4826 equity_accounts::INCOME_SUMMARY.to_string(),
4827 abs_net_income,
4828 ));
4829 }
4830
4831 debug_assert!(
4832 close_je.is_balanced(),
4833 "Income statement closing JE must be balanced"
4834 );
4835 close_jes.push(close_je);
4836 }
4837 }
4838
4839 let close_count = close_jes.len();
4840 if close_count > 0 {
4841 info!("Generated {} period-close journal entries", close_count);
4842 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4843 entries.extend(close_jes);
4844 stats.period_close_je_count = close_count;
4845
4846 stats.total_entries = entries.len() as u64;
4848 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4849 } else {
4850 debug!("No period-close entries generated (no income statement activity)");
4851 }
4852
4853 Ok(())
4854 }
4855
4856 fn phase_audit_data(
4858 &mut self,
4859 entries: &[JournalEntry],
4860 stats: &mut EnhancedGenerationStatistics,
4861 ) -> SynthResult<AuditSnapshot> {
4862 if self.phase_config.generate_audit {
4863 info!("Phase 8: Generating Audit Data");
4864 let audit_snapshot = self.generate_audit_data(entries)?;
4865 stats.audit_engagement_count = audit_snapshot.engagements.len();
4866 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4867 stats.audit_evidence_count = audit_snapshot.evidence.len();
4868 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4869 stats.audit_finding_count = audit_snapshot.findings.len();
4870 stats.audit_judgment_count = audit_snapshot.judgments.len();
4871 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4872 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4873 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4874 stats.audit_sample_count = audit_snapshot.samples.len();
4875 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4876 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4877 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4878 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4879 stats.audit_related_party_transaction_count =
4880 audit_snapshot.related_party_transactions.len();
4881 info!(
4882 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4883 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4884 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4885 {} RP transactions",
4886 stats.audit_engagement_count,
4887 stats.audit_workpaper_count,
4888 stats.audit_evidence_count,
4889 stats.audit_risk_count,
4890 stats.audit_finding_count,
4891 stats.audit_judgment_count,
4892 stats.audit_confirmation_count,
4893 stats.audit_procedure_step_count,
4894 stats.audit_sample_count,
4895 stats.audit_analytical_result_count,
4896 stats.audit_ia_function_count,
4897 stats.audit_ia_report_count,
4898 stats.audit_related_party_count,
4899 stats.audit_related_party_transaction_count,
4900 );
4901 self.check_resources_with_log("post-audit")?;
4902 Ok(audit_snapshot)
4903 } else {
4904 debug!("Phase 8: Skipped (audit generation disabled)");
4905 Ok(AuditSnapshot::default())
4906 }
4907 }
4908
4909 fn phase_banking_data(
4911 &mut self,
4912 stats: &mut EnhancedGenerationStatistics,
4913 ) -> SynthResult<BankingSnapshot> {
4914 if self.phase_config.generate_banking {
4915 info!("Phase 9: Generating Banking KYC/AML Data");
4916 let banking_snapshot = self.generate_banking_data()?;
4917 stats.banking_customer_count = banking_snapshot.customers.len();
4918 stats.banking_account_count = banking_snapshot.accounts.len();
4919 stats.banking_transaction_count = banking_snapshot.transactions.len();
4920 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4921 info!(
4922 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4923 stats.banking_customer_count, stats.banking_account_count,
4924 stats.banking_transaction_count, stats.banking_suspicious_count
4925 );
4926 self.check_resources_with_log("post-banking")?;
4927 Ok(banking_snapshot)
4928 } else {
4929 debug!("Phase 9: Skipped (banking generation disabled)");
4930 Ok(BankingSnapshot::default())
4931 }
4932 }
4933
4934 fn phase_graph_export(
4936 &mut self,
4937 entries: &[JournalEntry],
4938 coa: &Arc<ChartOfAccounts>,
4939 stats: &mut EnhancedGenerationStatistics,
4940 ) -> SynthResult<GraphExportSnapshot> {
4941 if self.phase_config.generate_graph_export && !entries.is_empty() {
4942 info!("Phase 10: Exporting Accounting Network Graphs");
4943 match self.export_graphs(entries, coa, stats) {
4944 Ok(snapshot) => {
4945 info!(
4946 "Graph export complete: {} graphs ({} nodes, {} edges)",
4947 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4948 );
4949 Ok(snapshot)
4950 }
4951 Err(e) => {
4952 warn!("Phase 10: Graph export failed: {}", e);
4953 Ok(GraphExportSnapshot::default())
4954 }
4955 }
4956 } else {
4957 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4958 Ok(GraphExportSnapshot::default())
4959 }
4960 }
4961
4962 #[allow(clippy::too_many_arguments)]
4964 fn phase_hypergraph_export(
4965 &self,
4966 coa: &Arc<ChartOfAccounts>,
4967 entries: &[JournalEntry],
4968 document_flows: &DocumentFlowSnapshot,
4969 sourcing: &SourcingSnapshot,
4970 hr: &HrSnapshot,
4971 manufacturing: &ManufacturingSnapshot,
4972 banking: &BankingSnapshot,
4973 audit: &AuditSnapshot,
4974 financial_reporting: &FinancialReportingSnapshot,
4975 ocpm: &OcpmSnapshot,
4976 compliance: &ComplianceRegulationsSnapshot,
4977 stats: &mut EnhancedGenerationStatistics,
4978 ) -> SynthResult<()> {
4979 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4980 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4981 match self.export_hypergraph(
4982 coa,
4983 entries,
4984 document_flows,
4985 sourcing,
4986 hr,
4987 manufacturing,
4988 banking,
4989 audit,
4990 financial_reporting,
4991 ocpm,
4992 compliance,
4993 stats,
4994 ) {
4995 Ok(info) => {
4996 info!(
4997 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4998 info.node_count, info.edge_count, info.hyperedge_count
4999 );
5000 }
5001 Err(e) => {
5002 warn!("Phase 10b: Hypergraph export failed: {}", e);
5003 }
5004 }
5005 } else {
5006 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5007 }
5008 Ok(())
5009 }
5010
5011 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5017 if !self.config.llm.enabled {
5018 debug!("Phase 11: Skipped (LLM enrichment disabled)");
5019 return;
5020 }
5021
5022 info!("Phase 11: Starting LLM Enrichment");
5023 let start = std::time::Instant::now();
5024
5025 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5026 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5029 let schema_provider = &self.config.llm.provider;
5030 let api_key_env = match schema_provider.as_str() {
5031 "openai" => Some("OPENAI_API_KEY"),
5032 "anthropic" => Some("ANTHROPIC_API_KEY"),
5033 "custom" => Some("LLM_API_KEY"),
5034 _ => None,
5035 };
5036 if let Some(key_env) = api_key_env {
5037 if std::env::var(key_env).is_ok() {
5038 let llm_config = datasynth_core::llm::LlmConfig {
5039 model: self.config.llm.model.clone(),
5040 api_key_env: key_env.to_string(),
5041 ..datasynth_core::llm::LlmConfig::default()
5042 };
5043 match HttpLlmProvider::new(llm_config) {
5044 Ok(p) => Arc::new(p),
5045 Err(e) => {
5046 warn!(
5047 "Failed to create HttpLlmProvider: {}; falling back to mock",
5048 e
5049 );
5050 Arc::new(MockLlmProvider::new(self.seed))
5051 }
5052 }
5053 } else {
5054 Arc::new(MockLlmProvider::new(self.seed))
5055 }
5056 } else {
5057 Arc::new(MockLlmProvider::new(self.seed))
5058 }
5059 };
5060 let industry = format!("{:?}", self.config.global.industry);
5064
5065 let vendor_enricher =
5066 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5067 let max_vendors = self
5068 .config
5069 .llm
5070 .max_vendor_enrichments
5071 .min(self.master_data.vendors.len());
5072 let mut vendors_enriched = 0usize;
5073 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5074 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5075 Ok(name) => {
5076 vendor.name = name;
5077 vendors_enriched += 1;
5078 }
5079 Err(e) => warn!(
5080 "LLM vendor enrichment failed for {}: {}",
5081 vendor.vendor_id, e
5082 ),
5083 }
5084 }
5085
5086 let mut customers_enriched = 0usize;
5087 if self.config.llm.enrich_customers {
5088 let customer_enricher =
5089 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5090 &provider,
5091 ));
5092 let max_customers = self
5093 .config
5094 .llm
5095 .max_customer_enrichments
5096 .min(self.master_data.customers.len());
5097 for customer in self.master_data.customers.iter_mut().take(max_customers) {
5098 match customer_enricher.enrich_customer_name(
5099 &industry,
5100 "general",
5101 &customer.country,
5102 ) {
5103 Ok(name) => {
5104 customer.name = name;
5105 customers_enriched += 1;
5106 }
5107 Err(e) => warn!(
5108 "LLM customer enrichment failed for {}: {}",
5109 customer.customer_id, e
5110 ),
5111 }
5112 }
5113 }
5114
5115 let mut materials_enriched = 0usize;
5116 if self.config.llm.enrich_materials {
5117 let material_enricher =
5118 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5119 &provider,
5120 ));
5121 let max_materials = self
5122 .config
5123 .llm
5124 .max_material_enrichments
5125 .min(self.master_data.materials.len());
5126 for material in self.master_data.materials.iter_mut().take(max_materials) {
5127 let material_type = format!("{:?}", material.material_type);
5128 match material_enricher.enrich_material_description(&material_type, &industry) {
5129 Ok(desc) => {
5130 material.description = desc;
5131 materials_enriched += 1;
5132 }
5133 Err(e) => warn!(
5134 "LLM material enrichment failed for {}: {}",
5135 material.material_id, e
5136 ),
5137 }
5138 }
5139 }
5140
5141 (vendors_enriched, customers_enriched, materials_enriched)
5142 }));
5143
5144 match result {
5145 Ok((v, c, m)) => {
5146 stats.llm_vendors_enriched = v;
5147 stats.llm_customers_enriched = c;
5148 stats.llm_materials_enriched = m;
5149 let elapsed = start.elapsed();
5150 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5151 info!(
5152 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5153 v, c, m, stats.llm_enrichment_ms
5154 );
5155 }
5156 Err(_) => {
5157 let elapsed = start.elapsed();
5158 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5159 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5160 }
5161 }
5162 }
5163
5164 fn phase_diffusion_enhancement(
5176 &self,
5177 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5178 stats: &mut EnhancedGenerationStatistics,
5179 ) {
5180 if !self.config.diffusion.enabled {
5181 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5182 return;
5183 }
5184
5185 info!("Phase 12: Starting Diffusion Enhancement");
5186 let start = std::time::Instant::now();
5187
5188 let backend_choice = self.config.diffusion.backend.as_str();
5189 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5190
5191 if use_neural {
5192 #[cfg(feature = "neural")]
5193 {
5194 match self.run_neural_diffusion_phase(entries) {
5195 Ok(sample_count) => {
5196 stats.diffusion_samples_generated = sample_count;
5197 let elapsed = start.elapsed();
5198 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5199 info!(
5200 "Phase 12 complete ({}): {} samples in {}ms",
5201 backend_choice, sample_count, stats.diffusion_enhancement_ms
5202 );
5203 return;
5204 }
5205 Err(e) => {
5206 warn!(
5207 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5208 );
5209 }
5211 }
5212 }
5213 #[cfg(not(feature = "neural"))]
5214 {
5215 warn!(
5216 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5217 not compiled in — falling back to statistical. Rebuild with \
5218 `--features neural` (or `neural-cuda` for GPU) to enable.",
5219 backend_choice
5220 );
5221 }
5222 } else if !matches!(backend_choice, "statistical" | "") {
5223 warn!(
5224 "Phase 12: unknown backend '{}', falling back to statistical",
5225 backend_choice
5226 );
5227 }
5228
5229 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5231 let means = vec![5000.0, 3.0, 2.0];
5232 let stds = vec![2000.0, 1.5, 1.0];
5233
5234 let diffusion_config = DiffusionConfig {
5235 n_steps: self.config.diffusion.n_steps,
5236 seed: self.seed,
5237 ..Default::default()
5238 };
5239
5240 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5241 let n_samples = self.config.diffusion.sample_size;
5242 let n_features = 3;
5243 backend.generate(n_samples, n_features, self.seed).len()
5244 }));
5245
5246 match result {
5247 Ok(sample_count) => {
5248 stats.diffusion_samples_generated = sample_count;
5249 let elapsed = start.elapsed();
5250 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5251 info!(
5252 "Phase 12 complete (statistical): {} samples in {}ms",
5253 sample_count, stats.diffusion_enhancement_ms
5254 );
5255 }
5256 Err(_) => {
5257 let elapsed = start.elapsed();
5258 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5259 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5260 }
5261 }
5262 }
5263
5264 #[cfg(feature = "neural")]
5269 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5270 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5271
5272 if entries.is_empty() {
5273 return Err(SynthError::generation(
5274 "neural diffusion: no journal entries available as training data",
5275 ));
5276 }
5277
5278 let training_data: Vec<Vec<f64>> = entries
5279 .iter()
5280 .take(5000)
5281 .map(|je| {
5282 let total_amount: f64 = je
5283 .lines
5284 .iter()
5285 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5286 .map(|l| {
5287 use rust_decimal::prelude::ToPrimitive;
5288 l.debit_amount.to_f64().unwrap_or(0.0)
5289 })
5290 .sum();
5291 let line_count = je.lines.len() as f64;
5292 let approval_level = je
5295 .header
5296 .approval_workflow
5297 .as_ref()
5298 .map(|w| w.required_levels as f64)
5299 .unwrap_or(1.0);
5300 vec![total_amount, line_count, approval_level]
5301 })
5302 .collect();
5303
5304 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5305
5306 let cfg = &self.config.diffusion;
5307 let neural_cfg = &cfg.neural;
5308
5309 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5310 neural_cfg.checkpoint_path.as_ref()
5311 {
5312 let path = std::path::Path::new(ckpt_path);
5313 info!(
5314 " Neural diffusion: loading checkpoint from {}",
5315 path.display()
5316 );
5317 NeuralDiffusionBackend::load(path)
5318 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5319 } else {
5320 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5321 info!(
5322 " Neural diffusion: training score network on {} rows × {} features, \
5323 {} epochs, hidden_dims={:?}",
5324 training_data.len(),
5325 n_features,
5326 neural_cfg.training_epochs,
5327 neural_cfg.hidden_dims
5328 );
5329 let training_config = NeuralTrainingConfig {
5330 n_steps: cfg.n_steps,
5331 schedule: cfg.schedule.clone(),
5332 hidden_dims: neural_cfg.hidden_dims.clone(),
5333 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5334 learning_rate: neural_cfg.learning_rate,
5335 epochs: neural_cfg.training_epochs,
5336 batch_size: neural_cfg.batch_size,
5337 };
5338 let (backend, report) =
5339 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5340 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5341 info!(
5342 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5343 report.epochs_completed, report.final_loss
5344 );
5345 backend
5346 };
5347
5348 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5349 Ok(samples.len())
5350 }
5351
5352 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5359 if !self.config.causal.enabled {
5360 debug!("Phase 13: Skipped (causal generation disabled)");
5361 return;
5362 }
5363
5364 info!("Phase 13: Starting Causal Overlay");
5365 let start = std::time::Instant::now();
5366
5367 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5368 let graph = match self.config.causal.template.as_str() {
5370 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5371 _ => CausalGraph::fraud_detection_template(),
5372 };
5373
5374 let scm = StructuralCausalModel::new(graph.clone())
5375 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5376
5377 let n_samples = self.config.causal.sample_size;
5378 let samples = scm
5379 .generate(n_samples, self.seed)
5380 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5381
5382 let validation_passed = if self.config.causal.validate {
5384 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5385 if report.valid {
5386 info!(
5387 "Causal validation passed: all {} checks OK",
5388 report.checks.len()
5389 );
5390 } else {
5391 warn!(
5392 "Causal validation: {} violations detected: {:?}",
5393 report.violations.len(),
5394 report.violations
5395 );
5396 }
5397 Some(report.valid)
5398 } else {
5399 None
5400 };
5401
5402 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5403 }));
5404
5405 match result {
5406 Ok(Ok((sample_count, validation_passed))) => {
5407 stats.causal_samples_generated = sample_count;
5408 stats.causal_validation_passed = validation_passed;
5409 let elapsed = start.elapsed();
5410 stats.causal_generation_ms = elapsed.as_millis() as u64;
5411 info!(
5412 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5413 sample_count, stats.causal_generation_ms, validation_passed,
5414 );
5415 }
5416 Ok(Err(e)) => {
5417 let elapsed = start.elapsed();
5418 stats.causal_generation_ms = elapsed.as_millis() as u64;
5419 warn!("Phase 13: Causal generation failed: {}", e);
5420 }
5421 Err(_) => {
5422 let elapsed = start.elapsed();
5423 stats.causal_generation_ms = elapsed.as_millis() as u64;
5424 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5425 }
5426 }
5427 }
5428
5429 fn phase_sourcing_data(
5431 &mut self,
5432 stats: &mut EnhancedGenerationStatistics,
5433 ) -> SynthResult<SourcingSnapshot> {
5434 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5435 debug!("Phase 14: Skipped (sourcing generation disabled)");
5436 return Ok(SourcingSnapshot::default());
5437 }
5438 let degradation = self.check_resources()?;
5439 if degradation >= DegradationLevel::Reduced {
5440 debug!(
5441 "Phase skipped due to resource pressure (degradation: {:?})",
5442 degradation
5443 );
5444 return Ok(SourcingSnapshot::default());
5445 }
5446
5447 info!("Phase 14: Generating S2C Sourcing Data");
5448 let seed = self.seed;
5449
5450 let vendor_ids: Vec<String> = self
5452 .master_data
5453 .vendors
5454 .iter()
5455 .map(|v| v.vendor_id.clone())
5456 .collect();
5457 if vendor_ids.is_empty() {
5458 debug!("Phase 14: Skipped (no vendors available)");
5459 return Ok(SourcingSnapshot::default());
5460 }
5461
5462 let categories: Vec<(String, String)> = vec![
5463 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5464 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5465 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5466 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5467 ("CAT-LOG".to_string(), "Logistics".to_string()),
5468 ];
5469 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5470 .iter()
5471 .map(|(id, name)| {
5472 (
5473 id.clone(),
5474 name.clone(),
5475 rust_decimal::Decimal::from(100_000),
5476 )
5477 })
5478 .collect();
5479
5480 let company_code = self
5481 .config
5482 .companies
5483 .first()
5484 .map(|c| c.code.as_str())
5485 .unwrap_or("1000");
5486 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5487 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5488 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5489 let fiscal_year = start_date.year() as u16;
5490 let owner_ids: Vec<String> = self
5491 .master_data
5492 .employees
5493 .iter()
5494 .take(5)
5495 .map(|e| e.employee_id.clone())
5496 .collect();
5497 let owner_id = owner_ids
5498 .first()
5499 .map(std::string::String::as_str)
5500 .unwrap_or("BUYER-001");
5501
5502 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5504 let spend_analyses =
5505 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5506
5507 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5509 let sourcing_projects = if owner_ids.is_empty() {
5510 Vec::new()
5511 } else {
5512 project_gen.generate(
5513 company_code,
5514 &categories_with_spend,
5515 &owner_ids,
5516 start_date,
5517 self.config.global.period_months,
5518 )
5519 };
5520 stats.sourcing_project_count = sourcing_projects.len();
5521
5522 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5524 let mut qual_gen = QualificationGenerator::new(seed + 2);
5525 let qualifications = qual_gen.generate(
5526 company_code,
5527 &qual_vendor_ids,
5528 sourcing_projects.first().map(|p| p.project_id.as_str()),
5529 owner_id,
5530 start_date,
5531 );
5532
5533 let mut rfx_gen = RfxGenerator::new(seed + 3);
5535 let rfx_events: Vec<RfxEvent> = sourcing_projects
5536 .iter()
5537 .map(|proj| {
5538 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5539 rfx_gen.generate(
5540 company_code,
5541 &proj.project_id,
5542 &proj.category_id,
5543 &qualified_vids,
5544 owner_id,
5545 start_date,
5546 50000.0,
5547 )
5548 })
5549 .collect();
5550 stats.rfx_event_count = rfx_events.len();
5551
5552 let mut bid_gen = BidGenerator::new(seed + 4);
5554 let mut all_bids = Vec::new();
5555 for rfx in &rfx_events {
5556 let bidder_count = vendor_ids.len().clamp(2, 5);
5557 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5558 let bids = bid_gen.generate(rfx, &responding, start_date);
5559 all_bids.extend(bids);
5560 }
5561 stats.bid_count = all_bids.len();
5562
5563 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5565 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5566 .iter()
5567 .map(|rfx| {
5568 let rfx_bids: Vec<SupplierBid> = all_bids
5569 .iter()
5570 .filter(|b| b.rfx_id == rfx.rfx_id)
5571 .cloned()
5572 .collect();
5573 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5574 })
5575 .collect();
5576
5577 let mut contract_gen = ContractGenerator::new(seed + 6);
5579 let contracts: Vec<ProcurementContract> = bid_evaluations
5580 .iter()
5581 .zip(rfx_events.iter())
5582 .filter_map(|(eval, rfx)| {
5583 eval.ranked_bids.first().and_then(|winner| {
5584 all_bids
5585 .iter()
5586 .find(|b| b.bid_id == winner.bid_id)
5587 .map(|winning_bid| {
5588 contract_gen.generate_from_bid(
5589 winning_bid,
5590 Some(&rfx.sourcing_project_id),
5591 &rfx.category_id,
5592 owner_id,
5593 start_date,
5594 )
5595 })
5596 })
5597 })
5598 .collect();
5599 stats.contract_count = contracts.len();
5600
5601 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5603 let catalog_items = catalog_gen.generate(&contracts);
5604 stats.catalog_item_count = catalog_items.len();
5605
5606 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5608 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5609 .iter()
5610 .fold(
5611 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5612 |mut acc, c| {
5613 acc.entry(c.vendor_id.clone()).or_default().push(c);
5614 acc
5615 },
5616 )
5617 .into_iter()
5618 .collect();
5619 let scorecards = scorecard_gen.generate(
5620 company_code,
5621 &vendor_contracts,
5622 start_date,
5623 end_date,
5624 owner_id,
5625 );
5626 stats.scorecard_count = scorecards.len();
5627
5628 let mut sourcing_projects = sourcing_projects;
5631 for project in &mut sourcing_projects {
5632 project.rfx_ids = rfx_events
5634 .iter()
5635 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5636 .map(|rfx| rfx.rfx_id.clone())
5637 .collect();
5638
5639 project.contract_id = contracts
5641 .iter()
5642 .find(|c| {
5643 c.sourcing_project_id
5644 .as_deref()
5645 .is_some_and(|sp| sp == project.project_id)
5646 })
5647 .map(|c| c.contract_id.clone());
5648
5649 project.spend_analysis_id = spend_analyses
5651 .iter()
5652 .find(|sa| sa.category_id == project.category_id)
5653 .map(|sa| sa.category_id.clone());
5654 }
5655
5656 info!(
5657 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5658 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5659 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5660 );
5661 self.check_resources_with_log("post-sourcing")?;
5662
5663 Ok(SourcingSnapshot {
5664 spend_analyses,
5665 sourcing_projects,
5666 qualifications,
5667 rfx_events,
5668 bids: all_bids,
5669 bid_evaluations,
5670 contracts,
5671 catalog_items,
5672 scorecards,
5673 })
5674 }
5675
5676 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5682 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5683
5684 let parent_code = self
5685 .config
5686 .companies
5687 .first()
5688 .map(|c| c.code.clone())
5689 .unwrap_or_else(|| "PARENT".to_string());
5690
5691 let mut group = GroupStructure::new(parent_code);
5692
5693 for company in self.config.companies.iter().skip(1) {
5694 let sub =
5695 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5696 group.add_subsidiary(sub);
5697 }
5698
5699 group
5700 }
5701
5702 fn phase_intercompany(
5704 &mut self,
5705 journal_entries: &[JournalEntry],
5706 stats: &mut EnhancedGenerationStatistics,
5707 ) -> SynthResult<IntercompanySnapshot> {
5708 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5710 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5711 return Ok(IntercompanySnapshot::default());
5712 }
5713
5714 if self.config.companies.len() < 2 {
5716 debug!(
5717 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5718 self.config.companies.len()
5719 );
5720 return Ok(IntercompanySnapshot::default());
5721 }
5722
5723 info!("Phase 14b: Generating Intercompany Transactions");
5724
5725 let group_structure = self.build_group_structure();
5728 debug!(
5729 "Group structure built: parent={}, subsidiaries={}",
5730 group_structure.parent_entity,
5731 group_structure.subsidiaries.len()
5732 );
5733
5734 let seed = self.seed;
5735 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5736 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5737 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5738
5739 let parent_code = self.config.companies[0].code.clone();
5742 let mut ownership_structure =
5743 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5744
5745 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5746 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5747 format!("REL{:03}", i + 1),
5748 parent_code.clone(),
5749 company.code.clone(),
5750 rust_decimal::Decimal::from(100), start_date,
5752 );
5753 ownership_structure.add_relationship(relationship);
5754 }
5755
5756 let tp_method = match self.config.intercompany.transfer_pricing_method {
5758 datasynth_config::schema::TransferPricingMethod::CostPlus => {
5759 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5760 }
5761 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5762 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5763 }
5764 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5765 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5766 }
5767 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5768 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5769 }
5770 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5771 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5772 }
5773 };
5774
5775 let ic_currency = self
5777 .config
5778 .companies
5779 .first()
5780 .map(|c| c.currency.clone())
5781 .unwrap_or_else(|| "USD".to_string());
5782 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5783 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5784 transfer_pricing_method: tp_method,
5785 markup_percent: rust_decimal::Decimal::from_f64_retain(
5786 self.config.intercompany.markup_percent,
5787 )
5788 .unwrap_or(rust_decimal::Decimal::from(5)),
5789 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5790 default_currency: ic_currency,
5791 ..Default::default()
5792 };
5793
5794 let mut ic_generator = datasynth_generators::ICGenerator::new(
5796 ic_gen_config,
5797 ownership_structure.clone(),
5798 seed + 50,
5799 );
5800
5801 let transactions_per_day = 3;
5804 let matched_pairs = ic_generator.generate_transactions_for_period(
5805 start_date,
5806 end_date,
5807 transactions_per_day,
5808 );
5809
5810 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5812 debug!(
5813 "Generated {} IC seller invoices, {} IC buyer POs",
5814 ic_doc_chains.seller_invoices.len(),
5815 ic_doc_chains.buyer_orders.len()
5816 );
5817
5818 let mut seller_entries = Vec::new();
5820 let mut buyer_entries = Vec::new();
5821 let fiscal_year = start_date.year();
5822
5823 for pair in &matched_pairs {
5824 let fiscal_period = pair.posting_date.month();
5825 let (seller_je, buyer_je) =
5826 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5827 seller_entries.push(seller_je);
5828 buyer_entries.push(buyer_je);
5829 }
5830
5831 let matching_config = datasynth_generators::ICMatchingConfig {
5833 base_currency: self
5834 .config
5835 .companies
5836 .first()
5837 .map(|c| c.currency.clone())
5838 .unwrap_or_else(|| "USD".to_string()),
5839 ..Default::default()
5840 };
5841 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5842 matching_engine.load_matched_pairs(&matched_pairs);
5843 let matching_result = matching_engine.run_matching(end_date);
5844
5845 let mut elimination_entries = Vec::new();
5847 if self.config.intercompany.generate_eliminations {
5848 let elim_config = datasynth_generators::EliminationConfig {
5849 consolidation_entity: "GROUP".to_string(),
5850 base_currency: self
5851 .config
5852 .companies
5853 .first()
5854 .map(|c| c.currency.clone())
5855 .unwrap_or_else(|| "USD".to_string()),
5856 ..Default::default()
5857 };
5858
5859 let mut elim_generator =
5860 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5861
5862 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5863 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5864 matching_result
5865 .matched_balances
5866 .iter()
5867 .chain(matching_result.unmatched_balances.iter())
5868 .cloned()
5869 .collect();
5870
5871 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5883 std::collections::HashMap::new();
5884 let mut equity_amounts: std::collections::HashMap<
5885 String,
5886 std::collections::HashMap<String, rust_decimal::Decimal>,
5887 > = std::collections::HashMap::new();
5888 {
5889 use rust_decimal::Decimal;
5890 let hundred = Decimal::from(100u32);
5891 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5895 for sub in &group_structure.subsidiaries {
5896 let net_assets = {
5897 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5898 if na > Decimal::ZERO {
5899 na
5900 } else {
5901 Decimal::from(1_000_000u64)
5902 }
5903 };
5904 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5906 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5907
5908 let mut eq_map = std::collections::HashMap::new();
5911 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5912 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5913 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5914 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5915 }
5916 }
5917
5918 let journal = elim_generator.generate_eliminations(
5919 &fiscal_period,
5920 end_date,
5921 &all_balances,
5922 &matched_pairs,
5923 &investment_amounts,
5924 &equity_amounts,
5925 );
5926
5927 elimination_entries = journal.entries.clone();
5928 }
5929
5930 let matched_pair_count = matched_pairs.len();
5931 let elimination_entry_count = elimination_entries.len();
5932 let match_rate = matching_result.match_rate;
5933
5934 stats.ic_matched_pair_count = matched_pair_count;
5935 stats.ic_elimination_count = elimination_entry_count;
5936 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5937
5938 info!(
5939 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5940 matched_pair_count,
5941 stats.ic_transaction_count,
5942 seller_entries.len(),
5943 buyer_entries.len(),
5944 elimination_entry_count,
5945 match_rate * 100.0
5946 );
5947 self.check_resources_with_log("post-intercompany")?;
5948
5949 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5953 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5954 use rust_decimal::Decimal;
5955
5956 let eight_pct = Decimal::new(8, 2); group_structure
5959 .subsidiaries
5960 .iter()
5961 .filter(|sub| {
5962 sub.nci_percentage > Decimal::ZERO
5963 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5964 })
5965 .map(|sub| {
5966 let net_assets_from_jes =
5970 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5971
5972 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5973 net_assets_from_jes.round_dp(2)
5974 } else {
5975 Decimal::from(1_000_000u64)
5977 };
5978
5979 let net_income = (net_assets * eight_pct).round_dp(2);
5981
5982 NciMeasurement::compute(
5983 sub.entity_code.clone(),
5984 sub.nci_percentage,
5985 net_assets,
5986 net_income,
5987 )
5988 })
5989 .collect()
5990 };
5991
5992 if !nci_measurements.is_empty() {
5993 info!(
5994 "NCI measurements: {} subsidiaries with non-controlling interests",
5995 nci_measurements.len()
5996 );
5997 }
5998
5999 Ok(IntercompanySnapshot {
6000 group_structure: Some(group_structure),
6001 matched_pairs,
6002 seller_journal_entries: seller_entries,
6003 buyer_journal_entries: buyer_entries,
6004 elimination_entries,
6005 nci_measurements,
6006 ic_document_chains: Some(ic_doc_chains),
6007 matched_pair_count,
6008 elimination_entry_count,
6009 match_rate,
6010 })
6011 }
6012
6013 fn phase_financial_reporting(
6015 &mut self,
6016 document_flows: &DocumentFlowSnapshot,
6017 journal_entries: &[JournalEntry],
6018 coa: &Arc<ChartOfAccounts>,
6019 _hr: &HrSnapshot,
6020 _audit: &AuditSnapshot,
6021 stats: &mut EnhancedGenerationStatistics,
6022 ) -> SynthResult<FinancialReportingSnapshot> {
6023 let fs_enabled = self.phase_config.generate_financial_statements
6024 || self.config.financial_reporting.enabled;
6025 let br_enabled = self.phase_config.generate_bank_reconciliation;
6026
6027 if !fs_enabled && !br_enabled {
6028 debug!("Phase 15: Skipped (financial reporting disabled)");
6029 return Ok(FinancialReportingSnapshot::default());
6030 }
6031
6032 info!("Phase 15: Generating Financial Reporting Data");
6033
6034 let seed = self.seed;
6035 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6036 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6037
6038 let mut financial_statements = Vec::new();
6039 let mut bank_reconciliations = Vec::new();
6040 let mut trial_balances = Vec::new();
6041 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6042 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6043 Vec::new();
6044 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6046 std::collections::HashMap::new();
6047 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6049 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6051
6052 if fs_enabled {
6060 let has_journal_entries = !journal_entries.is_empty();
6061
6062 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6065 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6067
6068 let elimination_entries: Vec<&JournalEntry> = journal_entries
6070 .iter()
6071 .filter(|je| je.header.is_elimination)
6072 .collect();
6073
6074 for period in 0..self.config.global.period_months {
6076 let period_start = start_date + chrono::Months::new(period);
6077 let period_end =
6078 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6079 let fiscal_year = period_end.year() as u16;
6080 let fiscal_period = period_end.month() as u8;
6081 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6082
6083 let mut entity_tb_map: std::collections::HashMap<
6086 String,
6087 std::collections::HashMap<String, rust_decimal::Decimal>,
6088 > = std::collections::HashMap::new();
6089
6090 for (company_idx, company) in self.config.companies.iter().enumerate() {
6092 let company_code = company.code.as_str();
6093 let currency = company.currency.as_str();
6094 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6097 let mut company_fs_gen =
6098 FinancialStatementGenerator::new(seed + company_seed_offset);
6099
6100 if has_journal_entries {
6101 let tb_entries = Self::build_cumulative_trial_balance(
6102 journal_entries,
6103 coa,
6104 company_code,
6105 start_date,
6106 period_end,
6107 fiscal_year,
6108 fiscal_period,
6109 );
6110
6111 let entity_cat_map =
6113 entity_tb_map.entry(company_code.to_string()).or_default();
6114 for tb_entry in &tb_entries {
6115 let net = tb_entry.debit_balance - tb_entry.credit_balance;
6116 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6117 }
6118
6119 let stmts = company_fs_gen.generate(
6120 company_code,
6121 currency,
6122 &tb_entries,
6123 period_start,
6124 period_end,
6125 fiscal_year,
6126 fiscal_period,
6127 None,
6128 "SYS-AUTOCLOSE",
6129 );
6130
6131 let mut entity_stmts = Vec::new();
6132 for stmt in stmts {
6133 if stmt.statement_type == StatementType::CashFlowStatement {
6134 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6135 let cf_items = Self::build_cash_flow_from_trial_balances(
6136 &tb_entries,
6137 None,
6138 net_income,
6139 );
6140 entity_stmts.push(FinancialStatement {
6141 cash_flow_items: cf_items,
6142 ..stmt
6143 });
6144 } else {
6145 entity_stmts.push(stmt);
6146 }
6147 }
6148
6149 financial_statements.extend(entity_stmts.clone());
6151
6152 standalone_statements
6154 .entry(company_code.to_string())
6155 .or_default()
6156 .extend(entity_stmts);
6157
6158 if company_idx == 0 {
6161 trial_balances.push(PeriodTrialBalance {
6162 fiscal_year,
6163 fiscal_period,
6164 period_start,
6165 period_end,
6166 entries: tb_entries,
6167 });
6168 }
6169 } else {
6170 let tb_entries = Self::build_trial_balance_from_entries(
6172 journal_entries,
6173 coa,
6174 company_code,
6175 fiscal_year,
6176 fiscal_period,
6177 );
6178
6179 let stmts = company_fs_gen.generate(
6180 company_code,
6181 currency,
6182 &tb_entries,
6183 period_start,
6184 period_end,
6185 fiscal_year,
6186 fiscal_period,
6187 None,
6188 "SYS-AUTOCLOSE",
6189 );
6190 financial_statements.extend(stmts.clone());
6191 standalone_statements
6192 .entry(company_code.to_string())
6193 .or_default()
6194 .extend(stmts);
6195
6196 if company_idx == 0 && !tb_entries.is_empty() {
6197 trial_balances.push(PeriodTrialBalance {
6198 fiscal_year,
6199 fiscal_period,
6200 period_start,
6201 period_end,
6202 entries: tb_entries,
6203 });
6204 }
6205 }
6206 }
6207
6208 let group_currency = self
6211 .config
6212 .companies
6213 .first()
6214 .map(|c| c.currency.as_str())
6215 .unwrap_or("USD");
6216
6217 let period_eliminations: Vec<JournalEntry> = elimination_entries
6219 .iter()
6220 .filter(|je| {
6221 je.header.fiscal_year == fiscal_year
6222 && je.header.fiscal_period == fiscal_period
6223 })
6224 .map(|je| (*je).clone())
6225 .collect();
6226
6227 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6228 &entity_tb_map,
6229 &period_eliminations,
6230 &period_label,
6231 );
6232
6233 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6236 .line_items
6237 .iter()
6238 .map(|li| {
6239 let net = li.post_elimination_total;
6240 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6241 (net, rust_decimal::Decimal::ZERO)
6242 } else {
6243 (rust_decimal::Decimal::ZERO, -net)
6244 };
6245 datasynth_generators::TrialBalanceEntry {
6246 account_code: li.account_category.clone(),
6247 account_name: li.account_category.clone(),
6248 category: li.account_category.clone(),
6249 debit_balance: debit,
6250 credit_balance: credit,
6251 }
6252 })
6253 .collect();
6254
6255 let mut cons_stmts = cons_gen.generate(
6256 "GROUP",
6257 group_currency,
6258 &cons_tb,
6259 period_start,
6260 period_end,
6261 fiscal_year,
6262 fiscal_period,
6263 None,
6264 "SYS-AUTOCLOSE",
6265 );
6266
6267 let bs_categories: &[&str] = &[
6271 "CASH",
6272 "RECEIVABLES",
6273 "INVENTORY",
6274 "FIXEDASSETS",
6275 "PAYABLES",
6276 "ACCRUEDLIABILITIES",
6277 "LONGTERMDEBT",
6278 "EQUITY",
6279 ];
6280 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6281 cons_line_items.into_iter().partition(|li| {
6282 let upper = li.label.to_uppercase();
6283 bs_categories.iter().any(|c| upper == *c)
6284 });
6285
6286 for stmt in &mut cons_stmts {
6287 stmt.is_consolidated = true;
6288 match stmt.statement_type {
6289 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6290 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6291 _ => {} }
6293 }
6294
6295 consolidated_statements.extend(cons_stmts);
6296 consolidation_schedules.push(schedule);
6297 }
6298
6299 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6305 info!(
6306 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6307 stats.financial_statement_count,
6308 consolidated_statements.len(),
6309 has_journal_entries
6310 );
6311
6312 let entity_seeds: Vec<SegmentSeed> = self
6317 .config
6318 .companies
6319 .iter()
6320 .map(|c| SegmentSeed {
6321 code: c.code.clone(),
6322 name: c.name.clone(),
6323 currency: c.currency.clone(),
6324 })
6325 .collect();
6326
6327 let mut seg_gen = SegmentGenerator::new(seed + 30);
6328
6329 for period in 0..self.config.global.period_months {
6334 let period_end =
6335 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6336 let fiscal_year = period_end.year() as u16;
6337 let fiscal_period = period_end.month() as u8;
6338 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6339
6340 use datasynth_core::models::StatementType;
6341
6342 let cons_is = consolidated_statements.iter().find(|s| {
6344 s.fiscal_year == fiscal_year
6345 && s.fiscal_period == fiscal_period
6346 && s.statement_type == StatementType::IncomeStatement
6347 });
6348 let cons_bs = consolidated_statements.iter().find(|s| {
6349 s.fiscal_year == fiscal_year
6350 && s.fiscal_period == fiscal_period
6351 && s.statement_type == StatementType::BalanceSheet
6352 });
6353
6354 let is_stmt = cons_is.or_else(|| {
6356 financial_statements.iter().find(|s| {
6357 s.fiscal_year == fiscal_year
6358 && s.fiscal_period == fiscal_period
6359 && s.statement_type == StatementType::IncomeStatement
6360 })
6361 });
6362 let bs_stmt = cons_bs.or_else(|| {
6363 financial_statements.iter().find(|s| {
6364 s.fiscal_year == fiscal_year
6365 && s.fiscal_period == fiscal_period
6366 && s.statement_type == StatementType::BalanceSheet
6367 })
6368 });
6369
6370 let consolidated_revenue = is_stmt
6371 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6372 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6374
6375 let consolidated_profit = is_stmt
6376 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6377 .map(|li| li.amount)
6378 .unwrap_or(rust_decimal::Decimal::ZERO);
6379
6380 let consolidated_assets = bs_stmt
6381 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6382 .map(|li| li.amount)
6383 .unwrap_or(rust_decimal::Decimal::ZERO);
6384
6385 if consolidated_revenue == rust_decimal::Decimal::ZERO
6387 && consolidated_assets == rust_decimal::Decimal::ZERO
6388 {
6389 continue;
6390 }
6391
6392 let group_code = self
6393 .config
6394 .companies
6395 .first()
6396 .map(|c| c.code.as_str())
6397 .unwrap_or("GROUP");
6398
6399 let total_depr: rust_decimal::Decimal = journal_entries
6402 .iter()
6403 .filter(|je| je.header.document_type == "CL")
6404 .flat_map(|je| je.lines.iter())
6405 .filter(|l| l.gl_account.starts_with("6000"))
6406 .map(|l| l.debit_amount)
6407 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6408 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6409 Some(total_depr)
6410 } else {
6411 None
6412 };
6413
6414 let (segs, recon) = seg_gen.generate(
6415 group_code,
6416 &period_label,
6417 consolidated_revenue,
6418 consolidated_profit,
6419 consolidated_assets,
6420 &entity_seeds,
6421 depr_param,
6422 );
6423 segment_reports.extend(segs);
6424 segment_reconciliations.push(recon);
6425 }
6426
6427 info!(
6428 "Segment reports generated: {} segments, {} reconciliations",
6429 segment_reports.len(),
6430 segment_reconciliations.len()
6431 );
6432 }
6433
6434 if br_enabled && !document_flows.payments.is_empty() {
6436 let employee_ids: Vec<String> = self
6437 .master_data
6438 .employees
6439 .iter()
6440 .map(|e| e.employee_id.clone())
6441 .collect();
6442 let mut br_gen =
6443 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6444
6445 for company in &self.config.companies {
6447 let company_payments: Vec<PaymentReference> = document_flows
6448 .payments
6449 .iter()
6450 .filter(|p| p.header.company_code == company.code)
6451 .map(|p| PaymentReference {
6452 id: p.header.document_id.clone(),
6453 amount: if p.is_vendor { p.amount } else { -p.amount },
6454 date: p.header.document_date,
6455 reference: p
6456 .check_number
6457 .clone()
6458 .or_else(|| p.wire_reference.clone())
6459 .unwrap_or_else(|| p.header.document_id.clone()),
6460 })
6461 .collect();
6462
6463 if company_payments.is_empty() {
6464 continue;
6465 }
6466
6467 let bank_account_id = format!("{}-MAIN", company.code);
6468
6469 for period in 0..self.config.global.period_months {
6471 let period_start = start_date + chrono::Months::new(period);
6472 let period_end =
6473 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6474
6475 let period_payments: Vec<PaymentReference> = company_payments
6476 .iter()
6477 .filter(|p| p.date >= period_start && p.date <= period_end)
6478 .cloned()
6479 .collect();
6480
6481 let recon = br_gen.generate(
6482 &company.code,
6483 &bank_account_id,
6484 period_start,
6485 period_end,
6486 &company.currency,
6487 &period_payments,
6488 );
6489 bank_reconciliations.push(recon);
6490 }
6491 }
6492 info!(
6493 "Bank reconciliations generated: {} reconciliations",
6494 bank_reconciliations.len()
6495 );
6496 }
6497
6498 stats.bank_reconciliation_count = bank_reconciliations.len();
6499 self.check_resources_with_log("post-financial-reporting")?;
6500
6501 if !trial_balances.is_empty() {
6502 info!(
6503 "Period-close trial balances captured: {} periods",
6504 trial_balances.len()
6505 );
6506 }
6507
6508 let notes_to_financial_statements = Vec::new();
6512
6513 Ok(FinancialReportingSnapshot {
6514 financial_statements,
6515 standalone_statements,
6516 consolidated_statements,
6517 consolidation_schedules,
6518 bank_reconciliations,
6519 trial_balances,
6520 segment_reports,
6521 segment_reconciliations,
6522 notes_to_financial_statements,
6523 })
6524 }
6525
6526 fn generate_notes_to_financial_statements(
6533 &self,
6534 financial_reporting: &mut FinancialReportingSnapshot,
6535 accounting_standards: &AccountingStandardsSnapshot,
6536 tax: &TaxSnapshot,
6537 hr: &HrSnapshot,
6538 audit: &AuditSnapshot,
6539 treasury: &TreasurySnapshot,
6540 ) {
6541 use datasynth_config::schema::AccountingFrameworkConfig;
6542 use datasynth_core::models::StatementType;
6543 use datasynth_generators::period_close::notes_generator::{
6544 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6545 };
6546
6547 let seed = self.seed;
6548 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6549 {
6550 Ok(d) => d,
6551 Err(_) => return,
6552 };
6553
6554 let mut notes_gen = NotesGenerator::new(seed + 4235);
6555
6556 for company in &self.config.companies {
6557 let last_period_end = start_date
6558 + chrono::Months::new(self.config.global.period_months)
6559 - chrono::Days::new(1);
6560 let fiscal_year = last_period_end.year() as u16;
6561
6562 let entity_is = financial_reporting
6564 .standalone_statements
6565 .get(&company.code)
6566 .and_then(|stmts| {
6567 stmts.iter().find(|s| {
6568 s.fiscal_year == fiscal_year
6569 && s.statement_type == StatementType::IncomeStatement
6570 })
6571 });
6572 let entity_bs = financial_reporting
6573 .standalone_statements
6574 .get(&company.code)
6575 .and_then(|stmts| {
6576 stmts.iter().find(|s| {
6577 s.fiscal_year == fiscal_year
6578 && s.statement_type == StatementType::BalanceSheet
6579 })
6580 });
6581
6582 let revenue_amount = entity_is
6584 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6585 .map(|li| li.amount);
6586 let ppe_gross = entity_bs
6587 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6588 .map(|li| li.amount);
6589
6590 let framework = match self
6591 .config
6592 .accounting_standards
6593 .framework
6594 .unwrap_or_default()
6595 {
6596 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6597 "IFRS".to_string()
6598 }
6599 _ => "US GAAP".to_string(),
6600 };
6601
6602 let (entity_dta, entity_dtl) = {
6605 let mut dta = rust_decimal::Decimal::ZERO;
6606 let mut dtl = rust_decimal::Decimal::ZERO;
6607 for rf in &tax.deferred_tax.rollforwards {
6608 if rf.entity_code == company.code {
6609 dta += rf.closing_dta;
6610 dtl += rf.closing_dtl;
6611 }
6612 }
6613 (
6614 if dta > rust_decimal::Decimal::ZERO {
6615 Some(dta)
6616 } else {
6617 None
6618 },
6619 if dtl > rust_decimal::Decimal::ZERO {
6620 Some(dtl)
6621 } else {
6622 None
6623 },
6624 )
6625 };
6626
6627 let entity_provisions: Vec<_> = accounting_standards
6630 .provisions
6631 .iter()
6632 .filter(|p| p.entity_code == company.code)
6633 .collect();
6634 let provision_count = entity_provisions.len();
6635 let total_provisions = if provision_count > 0 {
6636 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6637 } else {
6638 None
6639 };
6640
6641 let entity_pension_plan_count = hr
6643 .pension_plans
6644 .iter()
6645 .filter(|p| p.entity_code == company.code)
6646 .count();
6647 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6648 let sum: rust_decimal::Decimal = hr
6649 .pension_disclosures
6650 .iter()
6651 .filter(|d| {
6652 hr.pension_plans
6653 .iter()
6654 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6655 })
6656 .map(|d| d.net_pension_liability)
6657 .sum();
6658 let plan_assets_sum: rust_decimal::Decimal = hr
6659 .pension_plan_assets
6660 .iter()
6661 .filter(|a| {
6662 hr.pension_plans
6663 .iter()
6664 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6665 })
6666 .map(|a| a.fair_value_closing)
6667 .sum();
6668 if entity_pension_plan_count > 0 {
6669 Some(sum + plan_assets_sum)
6670 } else {
6671 None
6672 }
6673 };
6674 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6675 let sum: rust_decimal::Decimal = hr
6676 .pension_plan_assets
6677 .iter()
6678 .filter(|a| {
6679 hr.pension_plans
6680 .iter()
6681 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6682 })
6683 .map(|a| a.fair_value_closing)
6684 .sum();
6685 if entity_pension_plan_count > 0 {
6686 Some(sum)
6687 } else {
6688 None
6689 }
6690 };
6691
6692 let rp_count = audit.related_party_transactions.len();
6695 let se_count = audit.subsequent_events.len();
6696 let adjusting_count = audit
6697 .subsequent_events
6698 .iter()
6699 .filter(|e| {
6700 matches!(
6701 e.classification,
6702 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6703 )
6704 })
6705 .count();
6706
6707 let ctx = NotesGeneratorContext {
6708 entity_code: company.code.clone(),
6709 framework,
6710 period: format!("FY{}", fiscal_year),
6711 period_end: last_period_end,
6712 currency: company.currency.clone(),
6713 revenue_amount,
6714 total_ppe_gross: ppe_gross,
6715 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6716 deferred_tax_asset: entity_dta,
6718 deferred_tax_liability: entity_dtl,
6719 provision_count,
6721 total_provisions,
6722 pension_plan_count: entity_pension_plan_count,
6724 total_dbo: entity_total_dbo,
6725 total_plan_assets: entity_total_plan_assets,
6726 related_party_transaction_count: rp_count,
6728 subsequent_event_count: se_count,
6729 adjusting_event_count: adjusting_count,
6730 ..NotesGeneratorContext::default()
6731 };
6732
6733 let entity_notes = notes_gen.generate(&ctx);
6734 let standard_note_count = entity_notes.len() as u32;
6735 info!(
6736 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6737 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6738 );
6739 financial_reporting
6740 .notes_to_financial_statements
6741 .extend(entity_notes);
6742
6743 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6745 .debt_instruments
6746 .iter()
6747 .filter(|d| d.entity_id == company.code)
6748 .map(|d| {
6749 (
6750 format!("{:?}", d.instrument_type),
6751 d.principal,
6752 d.maturity_date.to_string(),
6753 )
6754 })
6755 .collect();
6756
6757 let hedge_count = treasury.hedge_relationships.len();
6758 let effective_hedges = treasury
6759 .hedge_relationships
6760 .iter()
6761 .filter(|h| h.is_effective)
6762 .count();
6763 let total_notional: rust_decimal::Decimal = treasury
6764 .hedging_instruments
6765 .iter()
6766 .map(|h| h.notional_amount)
6767 .sum();
6768 let total_fair_value: rust_decimal::Decimal = treasury
6769 .hedging_instruments
6770 .iter()
6771 .map(|h| h.fair_value)
6772 .sum();
6773
6774 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6776 .provisions
6777 .iter()
6778 .filter(|p| p.entity_code == company.code)
6779 .map(|p| p.id.as_str())
6780 .collect();
6781 let provision_movements: Vec<(
6782 String,
6783 rust_decimal::Decimal,
6784 rust_decimal::Decimal,
6785 rust_decimal::Decimal,
6786 )> = accounting_standards
6787 .provision_movements
6788 .iter()
6789 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6790 .map(|m| {
6791 let prov_type = accounting_standards
6792 .provisions
6793 .iter()
6794 .find(|p| p.id == m.provision_id)
6795 .map(|p| format!("{:?}", p.provision_type))
6796 .unwrap_or_else(|| "Unknown".to_string());
6797 (prov_type, m.opening, m.additions, m.closing)
6798 })
6799 .collect();
6800
6801 let enhanced_ctx = EnhancedNotesContext {
6802 entity_code: company.code.clone(),
6803 period: format!("FY{}", fiscal_year),
6804 currency: company.currency.clone(),
6805 finished_goods_value: rust_decimal::Decimal::ZERO,
6807 wip_value: rust_decimal::Decimal::ZERO,
6808 raw_materials_value: rust_decimal::Decimal::ZERO,
6809 debt_instruments,
6810 hedge_count,
6811 effective_hedges,
6812 total_notional,
6813 total_fair_value,
6814 provision_movements,
6815 };
6816
6817 let enhanced_notes =
6818 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6819 if !enhanced_notes.is_empty() {
6820 info!(
6821 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6822 company.code,
6823 enhanced_notes.len(),
6824 enhanced_ctx.debt_instruments.len(),
6825 hedge_count,
6826 enhanced_ctx.provision_movements.len(),
6827 );
6828 financial_reporting
6829 .notes_to_financial_statements
6830 .extend(enhanced_notes);
6831 }
6832 }
6833 }
6834
6835 fn build_trial_balance_from_entries(
6841 journal_entries: &[JournalEntry],
6842 coa: &ChartOfAccounts,
6843 company_code: &str,
6844 fiscal_year: u16,
6845 fiscal_period: u8,
6846 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6847 use rust_decimal::Decimal;
6848
6849 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6851 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6852
6853 for je in journal_entries {
6854 if je.header.company_code != company_code
6856 || je.header.fiscal_year != fiscal_year
6857 || je.header.fiscal_period != fiscal_period
6858 {
6859 continue;
6860 }
6861
6862 for line in &je.lines {
6863 let acct = &line.gl_account;
6864 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6865 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6866 }
6867 }
6868
6869 let mut all_accounts: Vec<&String> = account_debits
6871 .keys()
6872 .chain(account_credits.keys())
6873 .collect::<std::collections::HashSet<_>>()
6874 .into_iter()
6875 .collect();
6876 all_accounts.sort();
6877
6878 let mut entries = Vec::new();
6879
6880 for acct_number in all_accounts {
6881 let debit = account_debits
6882 .get(acct_number)
6883 .copied()
6884 .unwrap_or(Decimal::ZERO);
6885 let credit = account_credits
6886 .get(acct_number)
6887 .copied()
6888 .unwrap_or(Decimal::ZERO);
6889
6890 if debit.is_zero() && credit.is_zero() {
6891 continue;
6892 }
6893
6894 let account_name = coa
6896 .get_account(acct_number)
6897 .map(|gl| gl.short_description.clone())
6898 .unwrap_or_else(|| format!("Account {acct_number}"));
6899
6900 let category = Self::category_from_account_code(acct_number);
6905
6906 entries.push(datasynth_generators::TrialBalanceEntry {
6907 account_code: acct_number.clone(),
6908 account_name,
6909 category,
6910 debit_balance: debit,
6911 credit_balance: credit,
6912 });
6913 }
6914
6915 entries
6916 }
6917
6918 fn build_cumulative_trial_balance(
6925 journal_entries: &[JournalEntry],
6926 coa: &ChartOfAccounts,
6927 company_code: &str,
6928 start_date: NaiveDate,
6929 period_end: NaiveDate,
6930 fiscal_year: u16,
6931 fiscal_period: u8,
6932 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6933 use rust_decimal::Decimal;
6934
6935 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6937 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6938
6939 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6941 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6942
6943 for je in journal_entries {
6944 if je.header.company_code != company_code {
6945 continue;
6946 }
6947
6948 for line in &je.lines {
6949 let acct = &line.gl_account;
6950 let category = Self::category_from_account_code(acct);
6951 let is_bs_account = matches!(
6952 category.as_str(),
6953 "Cash"
6954 | "Receivables"
6955 | "Inventory"
6956 | "FixedAssets"
6957 | "Payables"
6958 | "AccruedLiabilities"
6959 | "LongTermDebt"
6960 | "Equity"
6961 );
6962
6963 if is_bs_account {
6964 if je.header.document_date <= period_end
6966 && je.header.document_date >= start_date
6967 {
6968 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6969 line.debit_amount;
6970 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6971 line.credit_amount;
6972 }
6973 } else {
6974 if je.header.fiscal_year == fiscal_year
6976 && je.header.fiscal_period == fiscal_period
6977 {
6978 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6979 line.debit_amount;
6980 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6981 line.credit_amount;
6982 }
6983 }
6984 }
6985 }
6986
6987 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6989 all_accounts.extend(bs_debits.keys().cloned());
6990 all_accounts.extend(bs_credits.keys().cloned());
6991 all_accounts.extend(is_debits.keys().cloned());
6992 all_accounts.extend(is_credits.keys().cloned());
6993
6994 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6995 sorted_accounts.sort();
6996
6997 let mut entries = Vec::new();
6998
6999 for acct_number in &sorted_accounts {
7000 let category = Self::category_from_account_code(acct_number);
7001 let is_bs_account = matches!(
7002 category.as_str(),
7003 "Cash"
7004 | "Receivables"
7005 | "Inventory"
7006 | "FixedAssets"
7007 | "Payables"
7008 | "AccruedLiabilities"
7009 | "LongTermDebt"
7010 | "Equity"
7011 );
7012
7013 let (debit, credit) = if is_bs_account {
7014 (
7015 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7016 bs_credits
7017 .get(acct_number)
7018 .copied()
7019 .unwrap_or(Decimal::ZERO),
7020 )
7021 } else {
7022 (
7023 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7024 is_credits
7025 .get(acct_number)
7026 .copied()
7027 .unwrap_or(Decimal::ZERO),
7028 )
7029 };
7030
7031 if debit.is_zero() && credit.is_zero() {
7032 continue;
7033 }
7034
7035 let account_name = coa
7036 .get_account(acct_number)
7037 .map(|gl| gl.short_description.clone())
7038 .unwrap_or_else(|| format!("Account {acct_number}"));
7039
7040 entries.push(datasynth_generators::TrialBalanceEntry {
7041 account_code: acct_number.clone(),
7042 account_name,
7043 category,
7044 debit_balance: debit,
7045 credit_balance: credit,
7046 });
7047 }
7048
7049 entries
7050 }
7051
7052 fn build_cash_flow_from_trial_balances(
7057 current_tb: &[datasynth_generators::TrialBalanceEntry],
7058 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7059 net_income: rust_decimal::Decimal,
7060 ) -> Vec<CashFlowItem> {
7061 use rust_decimal::Decimal;
7062
7063 let aggregate =
7065 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7066 let mut map: HashMap<String, Decimal> = HashMap::new();
7067 for entry in tb {
7068 let net = entry.debit_balance - entry.credit_balance;
7069 *map.entry(entry.category.clone()).or_default() += net;
7070 }
7071 map
7072 };
7073
7074 let current = aggregate(current_tb);
7075 let prior = prior_tb.map(aggregate);
7076
7077 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7079 *map.get(key).unwrap_or(&Decimal::ZERO)
7080 };
7081
7082 let change = |key: &str| -> Decimal {
7084 let curr = get(¤t, key);
7085 match &prior {
7086 Some(p) => curr - get(p, key),
7087 None => curr,
7088 }
7089 };
7090
7091 let fixed_asset_change = change("FixedAssets");
7094 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7095 -fixed_asset_change
7096 } else {
7097 Decimal::ZERO
7098 };
7099
7100 let ar_change = change("Receivables");
7102 let inventory_change = change("Inventory");
7103 let ap_change = change("Payables");
7105 let accrued_change = change("AccruedLiabilities");
7106
7107 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7108 + (-ap_change)
7109 + (-accrued_change);
7110
7111 let capex = if fixed_asset_change > Decimal::ZERO {
7113 -fixed_asset_change
7114 } else {
7115 Decimal::ZERO
7116 };
7117 let investing_cf = capex;
7118
7119 let debt_change = -change("LongTermDebt");
7121 let equity_change = -change("Equity");
7122 let financing_cf = debt_change + equity_change;
7123
7124 let net_change = operating_cf + investing_cf + financing_cf;
7125
7126 vec![
7127 CashFlowItem {
7128 item_code: "CF-NI".to_string(),
7129 label: "Net Income".to_string(),
7130 category: CashFlowCategory::Operating,
7131 amount: net_income,
7132 amount_prior: None,
7133 sort_order: 1,
7134 is_total: false,
7135 },
7136 CashFlowItem {
7137 item_code: "CF-DEP".to_string(),
7138 label: "Depreciation & Amortization".to_string(),
7139 category: CashFlowCategory::Operating,
7140 amount: depreciation_addback,
7141 amount_prior: None,
7142 sort_order: 2,
7143 is_total: false,
7144 },
7145 CashFlowItem {
7146 item_code: "CF-AR".to_string(),
7147 label: "Change in Accounts Receivable".to_string(),
7148 category: CashFlowCategory::Operating,
7149 amount: -ar_change,
7150 amount_prior: None,
7151 sort_order: 3,
7152 is_total: false,
7153 },
7154 CashFlowItem {
7155 item_code: "CF-AP".to_string(),
7156 label: "Change in Accounts Payable".to_string(),
7157 category: CashFlowCategory::Operating,
7158 amount: -ap_change,
7159 amount_prior: None,
7160 sort_order: 4,
7161 is_total: false,
7162 },
7163 CashFlowItem {
7164 item_code: "CF-INV".to_string(),
7165 label: "Change in Inventory".to_string(),
7166 category: CashFlowCategory::Operating,
7167 amount: -inventory_change,
7168 amount_prior: None,
7169 sort_order: 5,
7170 is_total: false,
7171 },
7172 CashFlowItem {
7173 item_code: "CF-OP".to_string(),
7174 label: "Net Cash from Operating Activities".to_string(),
7175 category: CashFlowCategory::Operating,
7176 amount: operating_cf,
7177 amount_prior: None,
7178 sort_order: 6,
7179 is_total: true,
7180 },
7181 CashFlowItem {
7182 item_code: "CF-CAPEX".to_string(),
7183 label: "Capital Expenditures".to_string(),
7184 category: CashFlowCategory::Investing,
7185 amount: capex,
7186 amount_prior: None,
7187 sort_order: 7,
7188 is_total: false,
7189 },
7190 CashFlowItem {
7191 item_code: "CF-INV-T".to_string(),
7192 label: "Net Cash from Investing Activities".to_string(),
7193 category: CashFlowCategory::Investing,
7194 amount: investing_cf,
7195 amount_prior: None,
7196 sort_order: 8,
7197 is_total: true,
7198 },
7199 CashFlowItem {
7200 item_code: "CF-DEBT".to_string(),
7201 label: "Net Borrowings / (Repayments)".to_string(),
7202 category: CashFlowCategory::Financing,
7203 amount: debt_change,
7204 amount_prior: None,
7205 sort_order: 9,
7206 is_total: false,
7207 },
7208 CashFlowItem {
7209 item_code: "CF-EQ".to_string(),
7210 label: "Equity Changes".to_string(),
7211 category: CashFlowCategory::Financing,
7212 amount: equity_change,
7213 amount_prior: None,
7214 sort_order: 10,
7215 is_total: false,
7216 },
7217 CashFlowItem {
7218 item_code: "CF-FIN-T".to_string(),
7219 label: "Net Cash from Financing Activities".to_string(),
7220 category: CashFlowCategory::Financing,
7221 amount: financing_cf,
7222 amount_prior: None,
7223 sort_order: 11,
7224 is_total: true,
7225 },
7226 CashFlowItem {
7227 item_code: "CF-NET".to_string(),
7228 label: "Net Change in Cash".to_string(),
7229 category: CashFlowCategory::Operating,
7230 amount: net_change,
7231 amount_prior: None,
7232 sort_order: 12,
7233 is_total: true,
7234 },
7235 ]
7236 }
7237
7238 fn calculate_net_income_from_tb(
7242 tb: &[datasynth_generators::TrialBalanceEntry],
7243 ) -> rust_decimal::Decimal {
7244 use rust_decimal::Decimal;
7245
7246 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7247 for entry in tb {
7248 let net = entry.debit_balance - entry.credit_balance;
7249 *aggregated.entry(entry.category.clone()).or_default() += net;
7250 }
7251
7252 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7253 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7254 let opex = *aggregated
7255 .get("OperatingExpenses")
7256 .unwrap_or(&Decimal::ZERO);
7257 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7258 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7259
7260 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7263 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7265 operating_income - tax
7266 }
7267
7268 fn category_from_account_code(code: &str) -> String {
7275 let prefix: String = code.chars().take(2).collect();
7276 match prefix.as_str() {
7277 "10" => "Cash",
7278 "11" => "Receivables",
7279 "12" | "13" | "14" => "Inventory",
7280 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7281 "20" => "Payables",
7282 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7283 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7284 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7285 "40" | "41" | "42" | "43" | "44" => "Revenue",
7286 "50" | "51" | "52" => "CostOfSales",
7287 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7288 "OperatingExpenses"
7289 }
7290 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7291 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7292 _ => "OperatingExpenses",
7293 }
7294 .to_string()
7295 }
7296
7297 fn phase_hr_data(
7299 &mut self,
7300 stats: &mut EnhancedGenerationStatistics,
7301 ) -> SynthResult<HrSnapshot> {
7302 if !self.phase_config.generate_hr {
7303 debug!("Phase 16: Skipped (HR generation disabled)");
7304 return Ok(HrSnapshot::default());
7305 }
7306
7307 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7308
7309 let seed = self.seed;
7310 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7311 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7312 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7313 let company_code = self
7314 .config
7315 .companies
7316 .first()
7317 .map(|c| c.code.as_str())
7318 .unwrap_or("1000");
7319 let currency = self
7320 .config
7321 .companies
7322 .first()
7323 .map(|c| c.currency.as_str())
7324 .unwrap_or("USD");
7325
7326 let employee_ids: Vec<String> = self
7327 .master_data
7328 .employees
7329 .iter()
7330 .map(|e| e.employee_id.clone())
7331 .collect();
7332
7333 if employee_ids.is_empty() {
7334 debug!("Phase 16: Skipped (no employees available)");
7335 return Ok(HrSnapshot::default());
7336 }
7337
7338 let cost_center_ids: Vec<String> = self
7341 .master_data
7342 .employees
7343 .iter()
7344 .filter_map(|e| e.cost_center.clone())
7345 .collect::<std::collections::HashSet<_>>()
7346 .into_iter()
7347 .collect();
7348
7349 let mut snapshot = HrSnapshot::default();
7350
7351 if self.config.hr.payroll.enabled {
7353 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7354 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7355
7356 let payroll_pack = self.primary_pack();
7358
7359 payroll_gen.set_country_pack(payroll_pack.clone());
7362
7363 let employees_with_salary: Vec<(
7364 String,
7365 rust_decimal::Decimal,
7366 Option<String>,
7367 Option<String>,
7368 )> = self
7369 .master_data
7370 .employees
7371 .iter()
7372 .map(|e| {
7373 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7376 e.base_salary
7377 } else {
7378 rust_decimal::Decimal::from(60_000)
7379 };
7380 (
7381 e.employee_id.clone(),
7382 annual, e.cost_center.clone(),
7384 e.department_id.clone(),
7385 )
7386 })
7387 .collect();
7388
7389 let change_history = &self.master_data.employee_change_history;
7392 let has_changes = !change_history.is_empty();
7393 if has_changes {
7394 debug!(
7395 "Payroll will incorporate {} employee change events",
7396 change_history.len()
7397 );
7398 }
7399
7400 for month in 0..self.config.global.period_months {
7401 let period_start = start_date + chrono::Months::new(month);
7402 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7403 let (run, items) = if has_changes {
7404 payroll_gen.generate_with_changes(
7405 company_code,
7406 &employees_with_salary,
7407 period_start,
7408 period_end,
7409 currency,
7410 change_history,
7411 )
7412 } else {
7413 payroll_gen.generate(
7414 company_code,
7415 &employees_with_salary,
7416 period_start,
7417 period_end,
7418 currency,
7419 )
7420 };
7421 snapshot.payroll_runs.push(run);
7422 snapshot.payroll_run_count += 1;
7423 snapshot.payroll_line_item_count += items.len();
7424 snapshot.payroll_line_items.extend(items);
7425 }
7426 }
7427
7428 if self.config.hr.time_attendance.enabled {
7430 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7431 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7432 if let Some(ctx) = &self.temporal_context {
7436 time_gen.set_temporal_context(Arc::clone(ctx));
7437 }
7438 let entries = time_gen.generate(
7439 &employee_ids,
7440 start_date,
7441 end_date,
7442 &self.config.hr.time_attendance,
7443 );
7444 snapshot.time_entry_count = entries.len();
7445 snapshot.time_entries = entries;
7446 }
7447
7448 if self.config.hr.expenses.enabled {
7450 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7451 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7452 expense_gen.set_country_pack(self.primary_pack().clone());
7453 if let Some(ctx) = &self.temporal_context {
7456 expense_gen.set_temporal_context(Arc::clone(ctx));
7457 }
7458 let company_currency = self
7459 .config
7460 .companies
7461 .first()
7462 .map(|c| c.currency.as_str())
7463 .unwrap_or("USD");
7464 let reports = expense_gen.generate_with_currency(
7465 &employee_ids,
7466 start_date,
7467 end_date,
7468 &self.config.hr.expenses,
7469 company_currency,
7470 );
7471 snapshot.expense_report_count = reports.len();
7472 snapshot.expense_reports = reports;
7473 }
7474
7475 if self.config.hr.payroll.enabled {
7477 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7478 let employee_pairs: Vec<(String, String)> = self
7479 .master_data
7480 .employees
7481 .iter()
7482 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7483 .collect();
7484 let enrollments =
7485 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7486 snapshot.benefit_enrollment_count = enrollments.len();
7487 snapshot.benefit_enrollments = enrollments;
7488 }
7489
7490 if self.phase_config.generate_hr {
7492 let entity_name = self
7493 .config
7494 .companies
7495 .first()
7496 .map(|c| c.name.as_str())
7497 .unwrap_or("Entity");
7498 let period_months = self.config.global.period_months;
7499 let period_label = {
7500 let y = start_date.year();
7501 let m = start_date.month();
7502 if period_months >= 12 {
7503 format!("FY{y}")
7504 } else {
7505 format!("{y}-{m:02}")
7506 }
7507 };
7508 let reporting_date =
7509 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7510
7511 let avg_salary: Option<rust_decimal::Decimal> = {
7516 let employee_count = employee_ids.len();
7517 if self.config.hr.payroll.enabled
7518 && employee_count > 0
7519 && !snapshot.payroll_runs.is_empty()
7520 {
7521 let total_gross: rust_decimal::Decimal = snapshot
7523 .payroll_runs
7524 .iter()
7525 .filter(|r| r.company_code == company_code)
7526 .map(|r| r.total_gross)
7527 .sum();
7528 if total_gross > rust_decimal::Decimal::ZERO {
7529 let annual_total = if period_months > 0 && period_months < 12 {
7531 total_gross * rust_decimal::Decimal::from(12u32)
7532 / rust_decimal::Decimal::from(period_months)
7533 } else {
7534 total_gross
7535 };
7536 Some(
7537 (annual_total / rust_decimal::Decimal::from(employee_count))
7538 .round_dp(2),
7539 )
7540 } else {
7541 None
7542 }
7543 } else {
7544 None
7545 }
7546 };
7547
7548 let mut pension_gen =
7549 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7550 let pension_snap = pension_gen.generate(
7551 company_code,
7552 entity_name,
7553 &period_label,
7554 reporting_date,
7555 employee_ids.len(),
7556 currency,
7557 avg_salary,
7558 period_months,
7559 );
7560 snapshot.pension_plan_count = pension_snap.plans.len();
7561 snapshot.pension_plans = pension_snap.plans;
7562 snapshot.pension_obligations = pension_snap.obligations;
7563 snapshot.pension_plan_assets = pension_snap.plan_assets;
7564 snapshot.pension_disclosures = pension_snap.disclosures;
7565 snapshot.pension_journal_entries = pension_snap.journal_entries;
7570 }
7571
7572 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7574 let period_months = self.config.global.period_months;
7575 let period_label = {
7576 let y = start_date.year();
7577 let m = start_date.month();
7578 if period_months >= 12 {
7579 format!("FY{y}")
7580 } else {
7581 format!("{y}-{m:02}")
7582 }
7583 };
7584 let reporting_date =
7585 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7586
7587 let mut stock_comp_gen =
7588 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7589 let stock_snap = stock_comp_gen.generate(
7590 company_code,
7591 &employee_ids,
7592 start_date,
7593 &period_label,
7594 reporting_date,
7595 currency,
7596 );
7597 snapshot.stock_grant_count = stock_snap.grants.len();
7598 snapshot.stock_grants = stock_snap.grants;
7599 snapshot.stock_comp_expenses = stock_snap.expenses;
7600 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7601 }
7602
7603 stats.payroll_run_count = snapshot.payroll_run_count;
7604 stats.time_entry_count = snapshot.time_entry_count;
7605 stats.expense_report_count = snapshot.expense_report_count;
7606 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7607 stats.pension_plan_count = snapshot.pension_plan_count;
7608 stats.stock_grant_count = snapshot.stock_grant_count;
7609
7610 info!(
7611 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7612 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7613 snapshot.time_entry_count, snapshot.expense_report_count,
7614 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7615 snapshot.stock_grant_count
7616 );
7617 self.check_resources_with_log("post-hr")?;
7618
7619 Ok(snapshot)
7620 }
7621
7622 fn phase_accounting_standards(
7624 &mut self,
7625 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7626 journal_entries: &[JournalEntry],
7627 stats: &mut EnhancedGenerationStatistics,
7628 ) -> SynthResult<AccountingStandardsSnapshot> {
7629 if !self.phase_config.generate_accounting_standards {
7630 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7631 return Ok(AccountingStandardsSnapshot::default());
7632 }
7633 info!("Phase 17: Generating Accounting Standards Data");
7634
7635 let seed = self.seed;
7636 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7637 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7638 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7639 let company_code = self
7640 .config
7641 .companies
7642 .first()
7643 .map(|c| c.code.as_str())
7644 .unwrap_or("1000");
7645 let currency = self
7646 .config
7647 .companies
7648 .first()
7649 .map(|c| c.currency.as_str())
7650 .unwrap_or("USD");
7651
7652 let framework = match self.config.accounting_standards.framework {
7657 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7658 datasynth_standards::framework::AccountingFramework::UsGaap
7659 }
7660 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7661 datasynth_standards::framework::AccountingFramework::Ifrs
7662 }
7663 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7664 datasynth_standards::framework::AccountingFramework::DualReporting
7665 }
7666 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7667 datasynth_standards::framework::AccountingFramework::FrenchGaap
7668 }
7669 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7670 datasynth_standards::framework::AccountingFramework::GermanGaap
7671 }
7672 None => {
7673 let pack = self.primary_pack();
7675 let pack_fw = pack.accounting.framework.as_str();
7676 match pack_fw {
7677 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7678 "dual_reporting" => {
7679 datasynth_standards::framework::AccountingFramework::DualReporting
7680 }
7681 "french_gaap" => {
7682 datasynth_standards::framework::AccountingFramework::FrenchGaap
7683 }
7684 "german_gaap" | "hgb" => {
7685 datasynth_standards::framework::AccountingFramework::GermanGaap
7686 }
7687 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7689 }
7690 }
7691 };
7692
7693 let mut snapshot = AccountingStandardsSnapshot::default();
7694
7695 if self.config.accounting_standards.revenue_recognition.enabled {
7697 let customer_ids: Vec<String> = self
7698 .master_data
7699 .customers
7700 .iter()
7701 .map(|c| c.customer_id.clone())
7702 .collect();
7703
7704 if !customer_ids.is_empty() {
7705 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7706 let contracts = rev_gen.generate(
7707 company_code,
7708 &customer_ids,
7709 start_date,
7710 end_date,
7711 currency,
7712 &self.config.accounting_standards.revenue_recognition,
7713 framework,
7714 );
7715 snapshot.revenue_contract_count = contracts.len();
7716 snapshot.contracts = contracts;
7717 }
7718 }
7719
7720 if self.config.accounting_standards.impairment.enabled {
7722 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7723 .master_data
7724 .assets
7725 .iter()
7726 .map(|a| {
7727 (
7728 a.asset_id.clone(),
7729 a.description.clone(),
7730 a.acquisition_cost,
7731 )
7732 })
7733 .collect();
7734
7735 if !asset_data.is_empty() {
7736 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7737 let tests = imp_gen.generate(
7738 company_code,
7739 &asset_data,
7740 end_date,
7741 &self.config.accounting_standards.impairment,
7742 framework,
7743 );
7744 snapshot.impairment_test_count = tests.len();
7745 snapshot.impairment_tests = tests;
7746 }
7747 }
7748
7749 if self
7751 .config
7752 .accounting_standards
7753 .business_combinations
7754 .enabled
7755 {
7756 let bc_config = &self.config.accounting_standards.business_combinations;
7757 let framework_str = match framework {
7758 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7759 _ => "US_GAAP",
7760 };
7761 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7762 let bc_snap = bc_gen.generate(
7763 company_code,
7764 currency,
7765 start_date,
7766 end_date,
7767 bc_config.acquisition_count,
7768 framework_str,
7769 );
7770 snapshot.business_combination_count = bc_snap.combinations.len();
7771 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7772 snapshot.business_combinations = bc_snap.combinations;
7773 }
7774
7775 if self
7777 .config
7778 .accounting_standards
7779 .expected_credit_loss
7780 .enabled
7781 {
7782 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7783 let framework_str = match framework {
7784 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7785 _ => "ASC_326",
7786 };
7787
7788 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7791
7792 let mut ecl_gen = EclGenerator::new(seed + 43);
7793
7794 let bucket_exposures: Vec<(
7796 datasynth_core::models::subledger::ar::AgingBucket,
7797 rust_decimal::Decimal,
7798 )> = if ar_aging_reports.is_empty() {
7799 use datasynth_core::models::subledger::ar::AgingBucket;
7801 vec![
7802 (
7803 AgingBucket::Current,
7804 rust_decimal::Decimal::from(500_000_u32),
7805 ),
7806 (
7807 AgingBucket::Days1To30,
7808 rust_decimal::Decimal::from(120_000_u32),
7809 ),
7810 (
7811 AgingBucket::Days31To60,
7812 rust_decimal::Decimal::from(45_000_u32),
7813 ),
7814 (
7815 AgingBucket::Days61To90,
7816 rust_decimal::Decimal::from(15_000_u32),
7817 ),
7818 (
7819 AgingBucket::Over90Days,
7820 rust_decimal::Decimal::from(8_000_u32),
7821 ),
7822 ]
7823 } else {
7824 use datasynth_core::models::subledger::ar::AgingBucket;
7825 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7827 std::collections::HashMap::new();
7828 for report in ar_aging_reports {
7829 for (bucket, amount) in &report.bucket_totals {
7830 *totals.entry(*bucket).or_default() += amount;
7831 }
7832 }
7833 AgingBucket::all()
7834 .into_iter()
7835 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7836 .collect()
7837 };
7838
7839 let ecl_snap = ecl_gen.generate(
7840 company_code,
7841 end_date,
7842 &bucket_exposures,
7843 ecl_config,
7844 &period_label,
7845 framework_str,
7846 );
7847
7848 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7849 snapshot.ecl_models = ecl_snap.ecl_models;
7850 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7851 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7852 }
7853
7854 {
7856 let framework_str = match framework {
7857 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7858 _ => "US_GAAP",
7859 };
7860
7861 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7866 .max(rust_decimal::Decimal::from(100_000_u32));
7867
7868 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7869
7870 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7871 let prov_snap = prov_gen.generate(
7872 company_code,
7873 currency,
7874 revenue_proxy,
7875 end_date,
7876 &period_label,
7877 framework_str,
7878 None, );
7880
7881 snapshot.provision_count = prov_snap.provisions.len();
7882 snapshot.provisions = prov_snap.provisions;
7883 snapshot.provision_movements = prov_snap.movements;
7884 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7885 snapshot.provision_journal_entries = prov_snap.journal_entries;
7886 }
7887
7888 {
7892 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7893
7894 let presentation_currency = self
7895 .config
7896 .global
7897 .presentation_currency
7898 .clone()
7899 .unwrap_or_else(|| self.config.global.group_currency.clone());
7900
7901 let mut rate_table = FxRateTable::new(&presentation_currency);
7904
7905 let base_rates = base_rates_usd();
7909 for (ccy, rate) in &base_rates {
7910 rate_table.add_rate(FxRate::new(
7911 ccy,
7912 "USD",
7913 RateType::Closing,
7914 end_date,
7915 *rate,
7916 "SYNTHETIC",
7917 ));
7918 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7921 rate_table.add_rate(FxRate::new(
7922 ccy,
7923 "USD",
7924 RateType::Average,
7925 end_date,
7926 avg,
7927 "SYNTHETIC",
7928 ));
7929 }
7930
7931 let mut translation_results = Vec::new();
7932 for company in &self.config.companies {
7933 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7936 .max(rust_decimal::Decimal::from(100_000_u32));
7937
7938 let func_ccy = company
7939 .functional_currency
7940 .clone()
7941 .unwrap_or_else(|| company.currency.clone());
7942
7943 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7944 &company.code,
7945 &func_ccy,
7946 &presentation_currency,
7947 &ias21_period_label,
7948 end_date,
7949 company_revenue,
7950 &rate_table,
7951 );
7952 translation_results.push(result);
7953 }
7954
7955 snapshot.currency_translation_count = translation_results.len();
7956 snapshot.currency_translation_results = translation_results;
7957 }
7958
7959 stats.revenue_contract_count = snapshot.revenue_contract_count;
7960 stats.impairment_test_count = snapshot.impairment_test_count;
7961 stats.business_combination_count = snapshot.business_combination_count;
7962 stats.ecl_model_count = snapshot.ecl_model_count;
7963 stats.provision_count = snapshot.provision_count;
7964
7965 if self.config.accounting_standards.leases.enabled {
7969 use datasynth_generators::standards::LeaseGenerator;
7970 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7971 .unwrap_or_else(|_| {
7972 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7973 });
7974 let framework =
7975 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7976 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7977 for company in &self.config.companies {
7978 let leases = lease_gen.generate(
7979 &company.code,
7980 start_date,
7981 &self.config.accounting_standards.leases,
7982 framework,
7983 );
7984 snapshot.lease_count += leases.len();
7985 snapshot.leases.extend(leases);
7986 }
7987 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7988 }
7989
7990 if self.config.accounting_standards.fair_value.enabled {
7994 use datasynth_generators::standards::FairValueGenerator;
7995 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7996 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7997 + chrono::Months::new(self.config.global.period_months);
7998 let framework =
7999 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8000 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8001 for company in &self.config.companies {
8002 let measurements = fv_gen.generate(
8003 &company.code,
8004 end_date,
8005 &company.currency,
8006 &self.config.accounting_standards.fair_value,
8007 framework,
8008 );
8009 snapshot.fair_value_measurement_count += measurements.len();
8010 snapshot.fair_value_measurements.extend(measurements);
8011 }
8012 info!(
8013 "v3.3.1 fair value measurements: {}",
8014 snapshot.fair_value_measurement_count
8015 );
8016 }
8017
8018 if self.config.accounting_standards.generate_differences
8022 && matches!(
8023 self.config.accounting_standards.framework,
8024 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8025 )
8026 {
8027 use datasynth_generators::standards::FrameworkReconciliationGenerator;
8028 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8029 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8030 + chrono::Months::new(self.config.global.period_months);
8031 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8032 for company in &self.config.companies {
8033 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8034 snapshot.framework_difference_count += records.len();
8035 snapshot.framework_differences.extend(records);
8036 snapshot.framework_reconciliations.push(reconciliation);
8037 }
8038 info!(
8039 "v3.3.1 framework reconciliation: {} differences across {} entities",
8040 snapshot.framework_difference_count,
8041 snapshot.framework_reconciliations.len()
8042 );
8043 }
8044
8045 info!(
8046 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8047 snapshot.revenue_contract_count,
8048 snapshot.impairment_test_count,
8049 snapshot.business_combination_count,
8050 snapshot.ecl_model_count,
8051 snapshot.provision_count,
8052 snapshot.currency_translation_count,
8053 snapshot.lease_count,
8054 snapshot.fair_value_measurement_count,
8055 snapshot.framework_difference_count,
8056 );
8057 self.check_resources_with_log("post-accounting-standards")?;
8058
8059 Ok(snapshot)
8060 }
8061
8062 fn resolve_accounting_framework(
8066 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8067 ) -> datasynth_standards::framework::AccountingFramework {
8068 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8069 use datasynth_standards::framework::AccountingFramework as Fw;
8070 match cfg {
8071 Some(Cfg::Ifrs) => Fw::Ifrs,
8072 Some(Cfg::DualReporting) => Fw::DualReporting,
8073 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8074 Some(Cfg::GermanGaap) => Fw::GermanGaap,
8075 _ => Fw::UsGaap,
8076 }
8077 }
8078
8079 fn phase_manufacturing(
8081 &mut self,
8082 stats: &mut EnhancedGenerationStatistics,
8083 ) -> SynthResult<ManufacturingSnapshot> {
8084 if !self.phase_config.generate_manufacturing {
8085 debug!("Phase 18: Skipped (manufacturing generation disabled)");
8086 return Ok(ManufacturingSnapshot::default());
8087 }
8088 info!("Phase 18: Generating Manufacturing Data");
8089
8090 let seed = self.seed;
8091 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8092 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8093 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8094 let company_code = self
8095 .config
8096 .companies
8097 .first()
8098 .map(|c| c.code.as_str())
8099 .unwrap_or("1000");
8100
8101 let material_data: Vec<(String, String)> = self
8102 .master_data
8103 .materials
8104 .iter()
8105 .map(|m| (m.material_id.clone(), m.description.clone()))
8106 .collect();
8107
8108 if material_data.is_empty() {
8109 debug!("Phase 18: Skipped (no materials available)");
8110 return Ok(ManufacturingSnapshot::default());
8111 }
8112
8113 let mut snapshot = ManufacturingSnapshot::default();
8114
8115 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8117 if let Some(ctx) = &self.temporal_context {
8119 prod_gen.set_temporal_context(Arc::clone(ctx));
8120 }
8121 let production_orders = prod_gen.generate(
8122 company_code,
8123 &material_data,
8124 start_date,
8125 end_date,
8126 &self.config.manufacturing.production_orders,
8127 &self.config.manufacturing.costing,
8128 &self.config.manufacturing.routing,
8129 );
8130 snapshot.production_order_count = production_orders.len();
8131
8132 let inspection_data: Vec<(String, String, String)> = production_orders
8134 .iter()
8135 .map(|po| {
8136 (
8137 po.order_id.clone(),
8138 po.material_id.clone(),
8139 po.material_description.clone(),
8140 )
8141 })
8142 .collect();
8143
8144 snapshot.production_orders = production_orders;
8145
8146 if !inspection_data.is_empty() {
8147 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8148 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8149 snapshot.quality_inspection_count = inspections.len();
8150 snapshot.quality_inspections = inspections;
8151 }
8152
8153 let storage_locations: Vec<(String, String)> = material_data
8155 .iter()
8156 .enumerate()
8157 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8158 .collect();
8159
8160 let employee_ids: Vec<String> = self
8161 .master_data
8162 .employees
8163 .iter()
8164 .map(|e| e.employee_id.clone())
8165 .collect();
8166 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8167 .with_employee_pool(employee_ids);
8168 let mut cycle_count_total = 0usize;
8169 for month in 0..self.config.global.period_months {
8170 let count_date = start_date + chrono::Months::new(month);
8171 let items_per_count = storage_locations.len().clamp(10, 50);
8172 let cc = cc_gen.generate(
8173 company_code,
8174 &storage_locations,
8175 count_date,
8176 items_per_count,
8177 );
8178 snapshot.cycle_counts.push(cc);
8179 cycle_count_total += 1;
8180 }
8181 snapshot.cycle_count_count = cycle_count_total;
8182
8183 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8185 let bom_components = bom_gen.generate(company_code, &material_data);
8186 snapshot.bom_component_count = bom_components.len();
8187 snapshot.bom_components = bom_components;
8188
8189 let currency = self
8191 .config
8192 .companies
8193 .first()
8194 .map(|c| c.currency.as_str())
8195 .unwrap_or("USD");
8196 let production_order_ids: Vec<String> = snapshot
8197 .production_orders
8198 .iter()
8199 .map(|po| po.order_id.clone())
8200 .collect();
8201 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8202 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8203 company_code,
8204 &material_data,
8205 start_date,
8206 end_date,
8207 2,
8208 currency,
8209 &production_order_ids,
8210 );
8211 snapshot.inventory_movement_count = inventory_movements.len();
8212 snapshot.inventory_movements = inventory_movements;
8213
8214 stats.production_order_count = snapshot.production_order_count;
8215 stats.quality_inspection_count = snapshot.quality_inspection_count;
8216 stats.cycle_count_count = snapshot.cycle_count_count;
8217 stats.bom_component_count = snapshot.bom_component_count;
8218 stats.inventory_movement_count = snapshot.inventory_movement_count;
8219
8220 info!(
8221 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8222 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8223 snapshot.bom_component_count, snapshot.inventory_movement_count
8224 );
8225 self.check_resources_with_log("post-manufacturing")?;
8226
8227 Ok(snapshot)
8228 }
8229
8230 fn phase_sales_kpi_budgets(
8232 &mut self,
8233 coa: &Arc<ChartOfAccounts>,
8234 financial_reporting: &FinancialReportingSnapshot,
8235 stats: &mut EnhancedGenerationStatistics,
8236 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8237 if !self.phase_config.generate_sales_kpi_budgets {
8238 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8239 return Ok(SalesKpiBudgetsSnapshot::default());
8240 }
8241 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8242
8243 let seed = self.seed;
8244 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8245 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8246 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8247 let company_code = self
8248 .config
8249 .companies
8250 .first()
8251 .map(|c| c.code.as_str())
8252 .unwrap_or("1000");
8253
8254 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8255
8256 if self.config.sales_quotes.enabled {
8258 let customer_data: Vec<(String, String)> = self
8259 .master_data
8260 .customers
8261 .iter()
8262 .map(|c| (c.customer_id.clone(), c.name.clone()))
8263 .collect();
8264 let material_data: Vec<(String, String)> = self
8265 .master_data
8266 .materials
8267 .iter()
8268 .map(|m| (m.material_id.clone(), m.description.clone()))
8269 .collect();
8270
8271 if !customer_data.is_empty() && !material_data.is_empty() {
8272 let employee_ids: Vec<String> = self
8273 .master_data
8274 .employees
8275 .iter()
8276 .map(|e| e.employee_id.clone())
8277 .collect();
8278 let customer_ids: Vec<String> = self
8279 .master_data
8280 .customers
8281 .iter()
8282 .map(|c| c.customer_id.clone())
8283 .collect();
8284 let company_currency = self
8285 .config
8286 .companies
8287 .first()
8288 .map(|c| c.currency.as_str())
8289 .unwrap_or("USD");
8290
8291 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8292 .with_pools(employee_ids, customer_ids);
8293 let quotes = quote_gen.generate_with_currency(
8294 company_code,
8295 &customer_data,
8296 &material_data,
8297 start_date,
8298 end_date,
8299 &self.config.sales_quotes,
8300 company_currency,
8301 );
8302 snapshot.sales_quote_count = quotes.len();
8303 snapshot.sales_quotes = quotes;
8304 }
8305 }
8306
8307 if self.config.financial_reporting.management_kpis.enabled {
8309 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8310 let mut kpis = kpi_gen.generate(
8311 company_code,
8312 start_date,
8313 end_date,
8314 &self.config.financial_reporting.management_kpis,
8315 );
8316
8317 {
8319 use rust_decimal::Decimal;
8320
8321 if let Some(income_stmt) =
8322 financial_reporting.financial_statements.iter().find(|fs| {
8323 fs.statement_type == StatementType::IncomeStatement
8324 && fs.company_code == company_code
8325 })
8326 {
8327 let total_revenue: Decimal = income_stmt
8329 .line_items
8330 .iter()
8331 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8332 .map(|li| li.amount)
8333 .sum();
8334 let total_cogs: Decimal = income_stmt
8335 .line_items
8336 .iter()
8337 .filter(|li| {
8338 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8339 && !li.is_total
8340 })
8341 .map(|li| li.amount.abs())
8342 .sum();
8343 let total_opex: Decimal = income_stmt
8344 .line_items
8345 .iter()
8346 .filter(|li| {
8347 li.section.contains("Expense")
8348 && !li.is_total
8349 && !li.section.contains("Cost")
8350 })
8351 .map(|li| li.amount.abs())
8352 .sum();
8353
8354 if total_revenue > Decimal::ZERO {
8355 let hundred = Decimal::from(100);
8356 let gross_margin_pct =
8357 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8358 let operating_income = total_revenue - total_cogs - total_opex;
8359 let op_margin_pct =
8360 (operating_income * hundred / total_revenue).round_dp(2);
8361
8362 for kpi in &mut kpis {
8364 if kpi.name == "Gross Margin" {
8365 kpi.value = gross_margin_pct;
8366 } else if kpi.name == "Operating Margin" {
8367 kpi.value = op_margin_pct;
8368 }
8369 }
8370 }
8371 }
8372
8373 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8375 fs.statement_type == StatementType::BalanceSheet
8376 && fs.company_code == company_code
8377 }) {
8378 let current_assets: Decimal = bs
8379 .line_items
8380 .iter()
8381 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8382 .map(|li| li.amount)
8383 .sum();
8384 let current_liabilities: Decimal = bs
8385 .line_items
8386 .iter()
8387 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8388 .map(|li| li.amount.abs())
8389 .sum();
8390
8391 if current_liabilities > Decimal::ZERO {
8392 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8393 for kpi in &mut kpis {
8394 if kpi.name == "Current Ratio" {
8395 kpi.value = current_ratio;
8396 }
8397 }
8398 }
8399 }
8400 }
8401
8402 snapshot.kpi_count = kpis.len();
8403 snapshot.kpis = kpis;
8404 }
8405
8406 if self.config.financial_reporting.budgets.enabled {
8408 let account_data: Vec<(String, String)> = coa
8409 .accounts
8410 .iter()
8411 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8412 .collect();
8413
8414 if !account_data.is_empty() {
8415 let fiscal_year = start_date.year() as u32;
8416 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8417 let budget = budget_gen.generate(
8418 company_code,
8419 fiscal_year,
8420 &account_data,
8421 &self.config.financial_reporting.budgets,
8422 );
8423 snapshot.budget_line_count = budget.line_items.len();
8424 snapshot.budgets.push(budget);
8425 }
8426 }
8427
8428 stats.sales_quote_count = snapshot.sales_quote_count;
8429 stats.kpi_count = snapshot.kpi_count;
8430 stats.budget_line_count = snapshot.budget_line_count;
8431
8432 info!(
8433 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8434 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8435 );
8436 self.check_resources_with_log("post-sales-kpi-budgets")?;
8437
8438 Ok(snapshot)
8439 }
8440
8441 fn compute_pre_tax_income(
8448 company_code: &str,
8449 journal_entries: &[JournalEntry],
8450 ) -> rust_decimal::Decimal {
8451 use datasynth_core::accounts::AccountCategory;
8452 use rust_decimal::Decimal;
8453
8454 let mut total_revenue = Decimal::ZERO;
8455 let mut total_expenses = Decimal::ZERO;
8456
8457 for je in journal_entries {
8458 if je.header.company_code != company_code {
8459 continue;
8460 }
8461 for line in &je.lines {
8462 let cat = AccountCategory::from_account(&line.gl_account);
8463 match cat {
8464 AccountCategory::Revenue => {
8465 total_revenue += line.credit_amount - line.debit_amount;
8466 }
8467 AccountCategory::Cogs
8468 | AccountCategory::OperatingExpense
8469 | AccountCategory::OtherIncomeExpense => {
8470 total_expenses += line.debit_amount - line.credit_amount;
8471 }
8472 _ => {}
8473 }
8474 }
8475 }
8476
8477 let pti = (total_revenue - total_expenses).round_dp(2);
8478 if pti == rust_decimal::Decimal::ZERO {
8479 rust_decimal::Decimal::from(1_000_000u32)
8482 } else {
8483 pti
8484 }
8485 }
8486
8487 fn phase_tax_generation(
8489 &mut self,
8490 document_flows: &DocumentFlowSnapshot,
8491 journal_entries: &[JournalEntry],
8492 stats: &mut EnhancedGenerationStatistics,
8493 ) -> SynthResult<TaxSnapshot> {
8494 if !self.phase_config.generate_tax {
8495 debug!("Phase 20: Skipped (tax generation disabled)");
8496 return Ok(TaxSnapshot::default());
8497 }
8498 info!("Phase 20: Generating Tax Data");
8499
8500 let seed = self.seed;
8501 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8502 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8503 let fiscal_year = start_date.year();
8504 let company_code = self
8505 .config
8506 .companies
8507 .first()
8508 .map(|c| c.code.as_str())
8509 .unwrap_or("1000");
8510
8511 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8512 seed + 370,
8513 self.config.tax.clone(),
8514 );
8515
8516 let pack = self.primary_pack().clone();
8517 let (jurisdictions, codes) =
8518 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8519
8520 let mut provisions = Vec::new();
8522 if self.config.tax.provisions.enabled {
8523 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8524 for company in &self.config.companies {
8525 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8526 let statutory_rate = rust_decimal::Decimal::new(
8527 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8528 2,
8529 );
8530 let provision = provision_gen.generate(
8531 &company.code,
8532 start_date,
8533 pre_tax_income,
8534 statutory_rate,
8535 );
8536 provisions.push(provision);
8537 }
8538 }
8539
8540 let mut tax_lines = Vec::new();
8542 if !codes.is_empty() {
8543 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8544 datasynth_generators::TaxLineGeneratorConfig::default(),
8545 codes.clone(),
8546 seed + 372,
8547 );
8548
8549 let buyer_country = self
8552 .config
8553 .companies
8554 .first()
8555 .map(|c| c.country.as_str())
8556 .unwrap_or("US");
8557 for vi in &document_flows.vendor_invoices {
8558 let lines = tax_line_gen.generate_for_document(
8559 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8560 &vi.header.document_id,
8561 buyer_country, buyer_country,
8563 vi.payable_amount,
8564 vi.header.document_date,
8565 None,
8566 );
8567 tax_lines.extend(lines);
8568 }
8569
8570 for ci in &document_flows.customer_invoices {
8572 let lines = tax_line_gen.generate_for_document(
8573 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8574 &ci.header.document_id,
8575 buyer_country, buyer_country,
8577 ci.total_gross_amount,
8578 ci.header.document_date,
8579 None,
8580 );
8581 tax_lines.extend(lines);
8582 }
8583 }
8584
8585 let deferred_tax = {
8587 let companies: Vec<(&str, &str)> = self
8588 .config
8589 .companies
8590 .iter()
8591 .map(|c| (c.code.as_str(), c.country.as_str()))
8592 .collect();
8593 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8594 deferred_gen.generate(&companies, start_date, journal_entries)
8595 };
8596
8597 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8600 std::collections::HashMap::new();
8601 for vi in &document_flows.vendor_invoices {
8602 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8603 }
8604 for ci in &document_flows.customer_invoices {
8605 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8606 }
8607
8608 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8611 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8612 &tax_lines,
8613 company_code,
8614 &doc_dates,
8615 end_date,
8616 );
8617 debug!("Generated {} tax posting JEs", jes.len());
8618 jes
8619 } else {
8620 Vec::new()
8621 };
8622
8623 let snapshot = TaxSnapshot {
8624 jurisdiction_count: jurisdictions.len(),
8625 code_count: codes.len(),
8626 jurisdictions,
8627 codes,
8628 tax_provisions: provisions,
8629 tax_lines,
8630 tax_returns: Vec::new(),
8631 withholding_records: Vec::new(),
8632 tax_anomaly_labels: Vec::new(),
8633 deferred_tax,
8634 tax_posting_journal_entries,
8635 };
8636
8637 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8638 stats.tax_code_count = snapshot.code_count;
8639 stats.tax_provision_count = snapshot.tax_provisions.len();
8640 stats.tax_line_count = snapshot.tax_lines.len();
8641
8642 info!(
8643 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8644 snapshot.jurisdiction_count,
8645 snapshot.code_count,
8646 snapshot.tax_provisions.len(),
8647 snapshot.deferred_tax.temporary_differences.len(),
8648 snapshot.deferred_tax.journal_entries.len(),
8649 snapshot.tax_posting_journal_entries.len(),
8650 );
8651 self.check_resources_with_log("post-tax")?;
8652
8653 Ok(snapshot)
8654 }
8655
8656 fn phase_esg_generation(
8658 &mut self,
8659 document_flows: &DocumentFlowSnapshot,
8660 manufacturing: &ManufacturingSnapshot,
8661 stats: &mut EnhancedGenerationStatistics,
8662 ) -> SynthResult<EsgSnapshot> {
8663 if !self.phase_config.generate_esg {
8664 debug!("Phase 21: Skipped (ESG generation disabled)");
8665 return Ok(EsgSnapshot::default());
8666 }
8667 let degradation = self.check_resources()?;
8668 if degradation >= DegradationLevel::Reduced {
8669 debug!(
8670 "Phase skipped due to resource pressure (degradation: {:?})",
8671 degradation
8672 );
8673 return Ok(EsgSnapshot::default());
8674 }
8675 info!("Phase 21: Generating ESG Data");
8676
8677 let seed = self.seed;
8678 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8679 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8680 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8681 let entity_id = self
8682 .config
8683 .companies
8684 .first()
8685 .map(|c| c.code.as_str())
8686 .unwrap_or("1000");
8687
8688 let esg_cfg = &self.config.esg;
8689 let mut snapshot = EsgSnapshot::default();
8690
8691 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8693 esg_cfg.environmental.energy.clone(),
8694 seed + 80,
8695 );
8696 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8697
8698 let facility_count = esg_cfg.environmental.energy.facility_count;
8700 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8701 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8702
8703 let mut waste_gen = datasynth_generators::WasteGenerator::new(
8705 seed + 82,
8706 esg_cfg.environmental.waste.diversion_target,
8707 facility_count,
8708 );
8709 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8710
8711 let mut emission_gen =
8713 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8714
8715 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8717 .iter()
8718 .map(|e| datasynth_generators::EnergyInput {
8719 facility_id: e.facility_id.clone(),
8720 energy_type: match e.energy_source {
8721 EnergySourceType::NaturalGas => {
8722 datasynth_generators::EnergyInputType::NaturalGas
8723 }
8724 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8725 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8726 _ => datasynth_generators::EnergyInputType::Electricity,
8727 },
8728 consumption_kwh: e.consumption_kwh,
8729 period: e.period,
8730 })
8731 .collect();
8732
8733 if !manufacturing.production_orders.is_empty() {
8735 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8736 &manufacturing.production_orders,
8737 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
8740 if !mfg_energy.is_empty() {
8741 info!(
8742 "ESG: {} energy inputs derived from {} production orders",
8743 mfg_energy.len(),
8744 manufacturing.production_orders.len(),
8745 );
8746 energy_inputs.extend(mfg_energy);
8747 }
8748 }
8749
8750 let mut emissions = Vec::new();
8751 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8752 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8753
8754 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8756 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8757 for payment in &document_flows.payments {
8758 if payment.is_vendor {
8759 *totals
8760 .entry(payment.business_partner_id.clone())
8761 .or_default() += payment.amount;
8762 }
8763 }
8764 totals
8765 };
8766 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8767 .master_data
8768 .vendors
8769 .iter()
8770 .map(|v| {
8771 let spend = vendor_payment_totals
8772 .get(&v.vendor_id)
8773 .copied()
8774 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8775 datasynth_generators::VendorSpendInput {
8776 vendor_id: v.vendor_id.clone(),
8777 category: format!("{:?}", v.vendor_type).to_lowercase(),
8778 spend,
8779 country: v.country.clone(),
8780 }
8781 })
8782 .collect();
8783 if !vendor_spend.is_empty() {
8784 emissions.extend(emission_gen.generate_scope3_purchased_goods(
8785 entity_id,
8786 &vendor_spend,
8787 start_date,
8788 end_date,
8789 ));
8790 }
8791
8792 let headcount = self.master_data.employees.len() as u32;
8794 if headcount > 0 {
8795 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8796 emissions.extend(emission_gen.generate_scope3_business_travel(
8797 entity_id,
8798 travel_spend,
8799 start_date,
8800 ));
8801 emissions
8802 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8803 }
8804
8805 snapshot.emission_count = emissions.len();
8806 snapshot.emissions = emissions;
8807 snapshot.energy = energy_records;
8808
8809 let mut workforce_gen =
8811 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8812 let total_headcount = headcount.max(100);
8813 snapshot.diversity =
8814 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8815 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8816
8817 if !self.master_data.employees.is_empty() {
8819 let hr_diversity = workforce_gen.generate_diversity_from_employees(
8820 entity_id,
8821 &self.master_data.employees,
8822 end_date,
8823 );
8824 if !hr_diversity.is_empty() {
8825 info!(
8826 "ESG: {} diversity metrics derived from {} actual employees",
8827 hr_diversity.len(),
8828 self.master_data.employees.len(),
8829 );
8830 snapshot.diversity.extend(hr_diversity);
8831 }
8832 }
8833
8834 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8835 entity_id,
8836 facility_count,
8837 start_date,
8838 end_date,
8839 );
8840
8841 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
8844 entity_id,
8845 &snapshot.safety_incidents,
8846 total_hours,
8847 start_date,
8848 );
8849 snapshot.safety_metrics = vec![safety_metric];
8850
8851 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8853 seed + 85,
8854 esg_cfg.governance.board_size,
8855 esg_cfg.governance.independence_target,
8856 );
8857 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8858
8859 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8861 esg_cfg.supply_chain_esg.clone(),
8862 seed + 86,
8863 );
8864 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8865 .master_data
8866 .vendors
8867 .iter()
8868 .map(|v| datasynth_generators::VendorInput {
8869 vendor_id: v.vendor_id.clone(),
8870 country: v.country.clone(),
8871 industry: format!("{:?}", v.vendor_type).to_lowercase(),
8872 quality_score: None,
8873 })
8874 .collect();
8875 snapshot.supplier_assessments =
8876 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8877
8878 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8880 seed + 87,
8881 esg_cfg.reporting.clone(),
8882 esg_cfg.climate_scenarios.clone(),
8883 );
8884 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8885 snapshot.disclosures = disclosure_gen.generate_disclosures(
8886 entity_id,
8887 &snapshot.materiality,
8888 start_date,
8889 end_date,
8890 );
8891 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8892 snapshot.disclosure_count = snapshot.disclosures.len();
8893
8894 if esg_cfg.anomaly_rate > 0.0 {
8896 let mut anomaly_injector =
8897 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8898 let mut labels = Vec::new();
8899 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8900 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8901 labels.extend(
8902 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8903 );
8904 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8905 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8906 snapshot.anomaly_labels = labels;
8907 }
8908
8909 stats.esg_emission_count = snapshot.emission_count;
8910 stats.esg_disclosure_count = snapshot.disclosure_count;
8911
8912 info!(
8913 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8914 snapshot.emission_count,
8915 snapshot.disclosure_count,
8916 snapshot.supplier_assessments.len()
8917 );
8918 self.check_resources_with_log("post-esg")?;
8919
8920 Ok(snapshot)
8921 }
8922
8923 fn phase_treasury_data(
8925 &mut self,
8926 document_flows: &DocumentFlowSnapshot,
8927 subledger: &SubledgerSnapshot,
8928 intercompany: &IntercompanySnapshot,
8929 stats: &mut EnhancedGenerationStatistics,
8930 ) -> SynthResult<TreasurySnapshot> {
8931 if !self.phase_config.generate_treasury {
8932 debug!("Phase 22: Skipped (treasury generation disabled)");
8933 return Ok(TreasurySnapshot::default());
8934 }
8935 let degradation = self.check_resources()?;
8936 if degradation >= DegradationLevel::Reduced {
8937 debug!(
8938 "Phase skipped due to resource pressure (degradation: {:?})",
8939 degradation
8940 );
8941 return Ok(TreasurySnapshot::default());
8942 }
8943 info!("Phase 22: Generating Treasury Data");
8944
8945 let seed = self.seed;
8946 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8947 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8948 let currency = self
8949 .config
8950 .companies
8951 .first()
8952 .map(|c| c.currency.as_str())
8953 .unwrap_or("USD");
8954 let entity_id = self
8955 .config
8956 .companies
8957 .first()
8958 .map(|c| c.code.as_str())
8959 .unwrap_or("1000");
8960
8961 let mut snapshot = TreasurySnapshot::default();
8962
8963 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8965 self.config.treasury.debt.clone(),
8966 seed + 90,
8967 );
8968 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8969
8970 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8972 self.config.treasury.hedging.clone(),
8973 seed + 91,
8974 );
8975 for debt in &snapshot.debt_instruments {
8976 if debt.rate_type == InterestRateType::Variable {
8977 let swap = hedge_gen.generate_ir_swap(
8978 currency,
8979 debt.principal,
8980 debt.origination_date,
8981 debt.maturity_date,
8982 );
8983 snapshot.hedging_instruments.push(swap);
8984 }
8985 }
8986
8987 {
8990 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8991 for payment in &document_flows.payments {
8992 if payment.currency != currency {
8993 let entry = fx_map
8994 .entry(payment.currency.clone())
8995 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8996 entry.0 += payment.amount;
8997 if payment.header.document_date > entry.1 {
8999 entry.1 = payment.header.document_date;
9000 }
9001 }
9002 }
9003 if !fx_map.is_empty() {
9004 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9005 .into_iter()
9006 .map(|(foreign_ccy, (net_amount, settlement_date))| {
9007 datasynth_generators::treasury::FxExposure {
9008 currency_pair: format!("{foreign_ccy}/{currency}"),
9009 foreign_currency: foreign_ccy,
9010 net_amount,
9011 settlement_date,
9012 description: "AP payment FX exposure".to_string(),
9013 }
9014 })
9015 .collect();
9016 let (fx_instruments, fx_relationships) =
9017 hedge_gen.generate(start_date, &fx_exposures);
9018 snapshot.hedging_instruments.extend(fx_instruments);
9019 snapshot.hedge_relationships.extend(fx_relationships);
9020 }
9021 }
9022
9023 if self.config.treasury.anomaly_rate > 0.0 {
9025 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9026 seed + 92,
9027 self.config.treasury.anomaly_rate,
9028 );
9029 let mut labels = Vec::new();
9030 labels.extend(
9031 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9032 );
9033 snapshot.treasury_anomaly_labels = labels;
9034 }
9035
9036 if self.config.treasury.cash_positioning.enabled {
9038 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9039
9040 for payment in &document_flows.payments {
9042 cash_flows.push(datasynth_generators::treasury::CashFlow {
9043 date: payment.header.document_date,
9044 account_id: format!("{entity_id}-MAIN"),
9045 amount: payment.amount,
9046 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9047 });
9048 }
9049
9050 for chain in &document_flows.o2c_chains {
9052 if let Some(ref receipt) = chain.customer_receipt {
9053 cash_flows.push(datasynth_generators::treasury::CashFlow {
9054 date: receipt.header.document_date,
9055 account_id: format!("{entity_id}-MAIN"),
9056 amount: receipt.amount,
9057 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9058 });
9059 }
9060 for receipt in &chain.remainder_receipts {
9062 cash_flows.push(datasynth_generators::treasury::CashFlow {
9063 date: receipt.header.document_date,
9064 account_id: format!("{entity_id}-MAIN"),
9065 amount: receipt.amount,
9066 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9067 });
9068 }
9069 }
9070
9071 if !cash_flows.is_empty() {
9072 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9073 self.config.treasury.cash_positioning.clone(),
9074 seed + 93,
9075 );
9076 let account_id = format!("{entity_id}-MAIN");
9077 snapshot.cash_positions = cash_gen.generate(
9078 entity_id,
9079 &account_id,
9080 currency,
9081 &cash_flows,
9082 start_date,
9083 start_date + chrono::Months::new(self.config.global.period_months),
9084 rust_decimal::Decimal::new(1_000_000, 0), );
9086 }
9087 }
9088
9089 if self.config.treasury.cash_forecasting.enabled {
9091 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9092
9093 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9095 .ar_invoices
9096 .iter()
9097 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9098 .map(|inv| {
9099 let days_past_due = if inv.due_date < end_date {
9100 (end_date - inv.due_date).num_days().max(0) as u32
9101 } else {
9102 0
9103 };
9104 datasynth_generators::treasury::ArAgingItem {
9105 expected_date: inv.due_date,
9106 amount: inv.amount_remaining,
9107 days_past_due,
9108 document_id: inv.invoice_number.clone(),
9109 }
9110 })
9111 .collect();
9112
9113 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9115 .ap_invoices
9116 .iter()
9117 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9118 .map(|inv| datasynth_generators::treasury::ApAgingItem {
9119 payment_date: inv.due_date,
9120 amount: inv.amount_remaining,
9121 document_id: inv.invoice_number.clone(),
9122 })
9123 .collect();
9124
9125 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9126 self.config.treasury.cash_forecasting.clone(),
9127 seed + 94,
9128 );
9129 let forecast = forecast_gen.generate(
9130 entity_id,
9131 currency,
9132 end_date,
9133 &ar_items,
9134 &ap_items,
9135 &[], );
9137 snapshot.cash_forecasts.push(forecast);
9138 }
9139
9140 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9142 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9143 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9144 self.config.treasury.cash_pooling.clone(),
9145 seed + 95,
9146 );
9147
9148 let account_ids: Vec<String> = snapshot
9150 .cash_positions
9151 .iter()
9152 .map(|cp| cp.bank_account_id.clone())
9153 .collect::<std::collections::HashSet<_>>()
9154 .into_iter()
9155 .collect();
9156
9157 if let Some(pool) =
9158 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9159 {
9160 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9162 for cp in &snapshot.cash_positions {
9163 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9164 }
9165
9166 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9167 latest_balances
9168 .into_iter()
9169 .filter(|(id, _)| pool.participant_accounts.contains(id))
9170 .map(
9171 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9172 account_id: id,
9173 balance,
9174 },
9175 )
9176 .collect();
9177
9178 let sweeps =
9179 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9180 snapshot.cash_pool_sweeps = sweeps;
9181 snapshot.cash_pools.push(pool);
9182 }
9183 }
9184
9185 if self.config.treasury.bank_guarantees.enabled {
9187 let vendor_names: Vec<String> = self
9188 .master_data
9189 .vendors
9190 .iter()
9191 .map(|v| v.name.clone())
9192 .collect();
9193 if !vendor_names.is_empty() {
9194 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9195 self.config.treasury.bank_guarantees.clone(),
9196 seed + 96,
9197 );
9198 snapshot.bank_guarantees =
9199 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9200 }
9201 }
9202
9203 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9205 let entity_ids: Vec<String> = self
9206 .config
9207 .companies
9208 .iter()
9209 .map(|c| c.code.clone())
9210 .collect();
9211 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9212 .matched_pairs
9213 .iter()
9214 .map(|mp| {
9215 (
9216 mp.seller_company.clone(),
9217 mp.buyer_company.clone(),
9218 mp.amount,
9219 )
9220 })
9221 .collect();
9222 if entity_ids.len() >= 2 {
9223 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9224 self.config.treasury.netting.clone(),
9225 seed + 97,
9226 );
9227 snapshot.netting_runs = netting_gen.generate(
9228 &entity_ids,
9229 currency,
9230 start_date,
9231 self.config.global.period_months,
9232 &ic_amounts,
9233 );
9234 }
9235 }
9236
9237 {
9239 use datasynth_generators::treasury::TreasuryAccounting;
9240
9241 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9242 let mut treasury_jes = Vec::new();
9243
9244 if !snapshot.debt_instruments.is_empty() {
9246 let debt_jes =
9247 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9248 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9249 treasury_jes.extend(debt_jes);
9250 }
9251
9252 if !snapshot.hedging_instruments.is_empty() {
9254 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9255 &snapshot.hedging_instruments,
9256 &snapshot.hedge_relationships,
9257 end_date,
9258 entity_id,
9259 );
9260 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9261 treasury_jes.extend(hedge_jes);
9262 }
9263
9264 if !snapshot.cash_pool_sweeps.is_empty() {
9266 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9267 &snapshot.cash_pool_sweeps,
9268 entity_id,
9269 );
9270 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9271 treasury_jes.extend(sweep_jes);
9272 }
9273
9274 if !treasury_jes.is_empty() {
9275 debug!("Total treasury journal entries: {}", treasury_jes.len());
9276 }
9277 snapshot.journal_entries = treasury_jes;
9278 }
9279
9280 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9281 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9282 stats.cash_position_count = snapshot.cash_positions.len();
9283 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9284 stats.cash_pool_count = snapshot.cash_pools.len();
9285
9286 info!(
9287 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9288 snapshot.debt_instruments.len(),
9289 snapshot.hedging_instruments.len(),
9290 snapshot.cash_positions.len(),
9291 snapshot.cash_forecasts.len(),
9292 snapshot.cash_pools.len(),
9293 snapshot.bank_guarantees.len(),
9294 snapshot.netting_runs.len(),
9295 snapshot.journal_entries.len(),
9296 );
9297 self.check_resources_with_log("post-treasury")?;
9298
9299 Ok(snapshot)
9300 }
9301
9302 fn phase_project_accounting(
9304 &mut self,
9305 document_flows: &DocumentFlowSnapshot,
9306 hr: &HrSnapshot,
9307 stats: &mut EnhancedGenerationStatistics,
9308 ) -> SynthResult<ProjectAccountingSnapshot> {
9309 if !self.phase_config.generate_project_accounting {
9310 debug!("Phase 23: Skipped (project accounting disabled)");
9311 return Ok(ProjectAccountingSnapshot::default());
9312 }
9313 let degradation = self.check_resources()?;
9314 if degradation >= DegradationLevel::Reduced {
9315 debug!(
9316 "Phase skipped due to resource pressure (degradation: {:?})",
9317 degradation
9318 );
9319 return Ok(ProjectAccountingSnapshot::default());
9320 }
9321 info!("Phase 23: Generating Project Accounting Data");
9322
9323 let seed = self.seed;
9324 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9325 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9326 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9327 let company_code = self
9328 .config
9329 .companies
9330 .first()
9331 .map(|c| c.code.as_str())
9332 .unwrap_or("1000");
9333
9334 let mut snapshot = ProjectAccountingSnapshot::default();
9335
9336 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9338 self.config.project_accounting.clone(),
9339 seed + 95,
9340 );
9341 let pool = project_gen.generate(company_code, start_date, end_date);
9342 snapshot.projects = pool.projects.clone();
9343
9344 {
9346 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9347 Vec::new();
9348
9349 for te in &hr.time_entries {
9351 let total_hours = te.hours_regular + te.hours_overtime;
9352 if total_hours > 0.0 {
9353 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9354 id: te.entry_id.clone(),
9355 entity_id: company_code.to_string(),
9356 date: te.date,
9357 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9358 .unwrap_or(rust_decimal::Decimal::ZERO),
9359 source_type: CostSourceType::TimeEntry,
9360 hours: Some(
9361 rust_decimal::Decimal::from_f64_retain(total_hours)
9362 .unwrap_or(rust_decimal::Decimal::ZERO),
9363 ),
9364 });
9365 }
9366 }
9367
9368 for er in &hr.expense_reports {
9370 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9371 id: er.report_id.clone(),
9372 entity_id: company_code.to_string(),
9373 date: er.submission_date,
9374 amount: er.total_amount,
9375 source_type: CostSourceType::ExpenseReport,
9376 hours: None,
9377 });
9378 }
9379
9380 for po in &document_flows.purchase_orders {
9382 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9383 id: po.header.document_id.clone(),
9384 entity_id: company_code.to_string(),
9385 date: po.header.document_date,
9386 amount: po.total_net_amount,
9387 source_type: CostSourceType::PurchaseOrder,
9388 hours: None,
9389 });
9390 }
9391
9392 for vi in &document_flows.vendor_invoices {
9394 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9395 id: vi.header.document_id.clone(),
9396 entity_id: company_code.to_string(),
9397 date: vi.header.document_date,
9398 amount: vi.payable_amount,
9399 source_type: CostSourceType::VendorInvoice,
9400 hours: None,
9401 });
9402 }
9403
9404 if !source_docs.is_empty() && !pool.projects.is_empty() {
9405 let mut cost_gen =
9406 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9407 self.config.project_accounting.cost_allocation.clone(),
9408 seed + 99,
9409 );
9410 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9411 }
9412 }
9413
9414 if self.config.project_accounting.change_orders.enabled {
9416 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9417 self.config.project_accounting.change_orders.clone(),
9418 seed + 96,
9419 );
9420 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9421 }
9422
9423 if self.config.project_accounting.milestones.enabled {
9425 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9426 self.config.project_accounting.milestones.clone(),
9427 seed + 97,
9428 );
9429 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9430 }
9431
9432 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9434 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9435 self.config.project_accounting.earned_value.clone(),
9436 seed + 98,
9437 );
9438 snapshot.earned_value_metrics =
9439 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9440 }
9441
9442 if self.config.project_accounting.revenue_recognition.enabled
9444 && !snapshot.projects.is_empty()
9445 && !snapshot.cost_lines.is_empty()
9446 {
9447 use datasynth_generators::project_accounting::RevenueGenerator;
9448 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9449 let avg_contract_value =
9450 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9451 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9452
9453 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9456 snapshot
9457 .projects
9458 .iter()
9459 .filter(|p| {
9460 matches!(
9461 p.project_type,
9462 datasynth_core::models::ProjectType::Customer
9463 )
9464 })
9465 .map(|p| {
9466 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9467 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9468 } else {
9470 avg_contract_value
9471 };
9472 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9474 })
9475 .collect();
9476
9477 if !contract_values.is_empty() {
9478 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9479 snapshot.revenue_records = rev_gen.generate(
9480 &snapshot.projects,
9481 &snapshot.cost_lines,
9482 &contract_values,
9483 start_date,
9484 end_date,
9485 );
9486 debug!(
9487 "Generated {} revenue recognition records for {} customer projects",
9488 snapshot.revenue_records.len(),
9489 contract_values.len()
9490 );
9491 }
9492 }
9493
9494 stats.project_count = snapshot.projects.len();
9495 stats.project_change_order_count = snapshot.change_orders.len();
9496 stats.project_cost_line_count = snapshot.cost_lines.len();
9497
9498 info!(
9499 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9500 snapshot.projects.len(),
9501 snapshot.change_orders.len(),
9502 snapshot.milestones.len(),
9503 snapshot.earned_value_metrics.len()
9504 );
9505 self.check_resources_with_log("post-project-accounting")?;
9506
9507 Ok(snapshot)
9508 }
9509
9510 fn phase_evolution_events(
9512 &mut self,
9513 stats: &mut EnhancedGenerationStatistics,
9514 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9515 if !self.phase_config.generate_evolution_events {
9516 debug!("Phase 24: Skipped (evolution events disabled)");
9517 return Ok((Vec::new(), Vec::new()));
9518 }
9519 info!("Phase 24: Generating Process Evolution + Organizational Events");
9520
9521 let seed = self.seed;
9522 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9523 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9524 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9525
9526 let mut proc_gen =
9528 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9529 seed + 100,
9530 );
9531 let process_events = proc_gen.generate_events(start_date, end_date);
9532
9533 let company_codes: Vec<String> = self
9535 .config
9536 .companies
9537 .iter()
9538 .map(|c| c.code.clone())
9539 .collect();
9540 let mut org_gen =
9541 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9542 seed + 101,
9543 );
9544 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9545
9546 stats.process_evolution_event_count = process_events.len();
9547 stats.organizational_event_count = org_events.len();
9548
9549 info!(
9550 "Evolution events generated: {} process evolution, {} organizational",
9551 process_events.len(),
9552 org_events.len()
9553 );
9554 self.check_resources_with_log("post-evolution-events")?;
9555
9556 Ok((process_events, org_events))
9557 }
9558
9559 fn phase_disruption_events(
9562 &self,
9563 stats: &mut EnhancedGenerationStatistics,
9564 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9565 if !self.config.organizational_events.enabled {
9566 debug!("Phase 24b: Skipped (organizational events disabled)");
9567 return Ok(Vec::new());
9568 }
9569 info!("Phase 24b: Generating Disruption Events");
9570
9571 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9572 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9573 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9574
9575 let company_codes: Vec<String> = self
9576 .config
9577 .companies
9578 .iter()
9579 .map(|c| c.code.clone())
9580 .collect();
9581
9582 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9583 let events = gen.generate(start_date, end_date, &company_codes);
9584
9585 stats.disruption_event_count = events.len();
9586 info!("Disruption events generated: {} events", events.len());
9587 self.check_resources_with_log("post-disruption-events")?;
9588
9589 Ok(events)
9590 }
9591
9592 fn phase_counterfactuals(
9599 &self,
9600 journal_entries: &[JournalEntry],
9601 stats: &mut EnhancedGenerationStatistics,
9602 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9603 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9604 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9605 return Ok(Vec::new());
9606 }
9607 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9608
9609 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9610
9611 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9612
9613 let specs = [
9615 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9616 CounterfactualSpec::ShiftDate { days: -14 },
9617 CounterfactualSpec::SelfApprove,
9618 CounterfactualSpec::SplitTransaction { split_count: 3 },
9619 ];
9620
9621 let pairs: Vec<_> = journal_entries
9622 .iter()
9623 .enumerate()
9624 .map(|(i, je)| {
9625 let spec = &specs[i % specs.len()];
9626 gen.generate(je, spec)
9627 })
9628 .collect();
9629
9630 stats.counterfactual_pair_count = pairs.len();
9631 info!(
9632 "Counterfactual pairs generated: {} pairs from {} journal entries",
9633 pairs.len(),
9634 journal_entries.len()
9635 );
9636 self.check_resources_with_log("post-counterfactuals")?;
9637
9638 Ok(pairs)
9639 }
9640
9641 fn phase_red_flags(
9648 &self,
9649 anomaly_labels: &AnomalyLabels,
9650 document_flows: &DocumentFlowSnapshot,
9651 stats: &mut EnhancedGenerationStatistics,
9652 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9653 if !self.config.fraud.enabled {
9654 debug!("Phase 26: Skipped (fraud generation disabled)");
9655 return Ok(Vec::new());
9656 }
9657 info!("Phase 26: Generating Fraud Red-Flag Indicators");
9658
9659 use datasynth_generators::fraud::RedFlagGenerator;
9660
9661 let generator = RedFlagGenerator::new();
9662 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9663
9664 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9666 .labels
9667 .iter()
9668 .filter(|label| label.anomaly_type.is_intentional())
9669 .map(|label| label.document_id.as_str())
9670 .collect();
9671
9672 let mut flags = Vec::new();
9673
9674 for chain in &document_flows.p2p_chains {
9676 let doc_id = &chain.purchase_order.header.document_id;
9677 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9678 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9679 }
9680
9681 for chain in &document_flows.o2c_chains {
9683 let doc_id = &chain.sales_order.header.document_id;
9684 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9685 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9686 }
9687
9688 stats.red_flag_count = flags.len();
9689 info!(
9690 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9691 flags.len(),
9692 document_flows.p2p_chains.len(),
9693 document_flows.o2c_chains.len(),
9694 fraud_doc_ids.len()
9695 );
9696 self.check_resources_with_log("post-red-flags")?;
9697
9698 Ok(flags)
9699 }
9700
9701 fn phase_collusion_rings(
9707 &mut self,
9708 stats: &mut EnhancedGenerationStatistics,
9709 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9710 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9711 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9712 return Ok(Vec::new());
9713 }
9714 info!("Phase 26b: Generating Collusion Rings");
9715
9716 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9717 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9718 let months = self.config.global.period_months;
9719
9720 let employee_ids: Vec<String> = self
9721 .master_data
9722 .employees
9723 .iter()
9724 .map(|e| e.employee_id.clone())
9725 .collect();
9726 let vendor_ids: Vec<String> = self
9727 .master_data
9728 .vendors
9729 .iter()
9730 .map(|v| v.vendor_id.clone())
9731 .collect();
9732
9733 let mut generator =
9734 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9735 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9736
9737 stats.collusion_ring_count = rings.len();
9738 info!(
9739 "Collusion rings generated: {} rings, total members: {}",
9740 rings.len(),
9741 rings
9742 .iter()
9743 .map(datasynth_generators::fraud::CollusionRing::size)
9744 .sum::<usize>()
9745 );
9746 self.check_resources_with_log("post-collusion-rings")?;
9747
9748 Ok(rings)
9749 }
9750
9751 fn phase_temporal_attributes(
9756 &mut self,
9757 stats: &mut EnhancedGenerationStatistics,
9758 ) -> SynthResult<
9759 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9760 > {
9761 if !self.config.temporal_attributes.enabled {
9762 debug!("Phase 27: Skipped (temporal attributes disabled)");
9763 return Ok(Vec::new());
9764 }
9765 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9766
9767 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9768 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9769
9770 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9774 || self.config.temporal_attributes.enabled;
9775 let temporal_config = {
9776 let ta = &self.config.temporal_attributes;
9777 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9778 .enabled(ta.enabled)
9779 .closed_probability(ta.valid_time.closed_probability)
9780 .avg_validity_days(ta.valid_time.avg_validity_days)
9781 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9782 .with_version_chains(if generate_version_chains {
9783 ta.avg_versions_per_entity
9784 } else {
9785 1.0
9786 })
9787 .build()
9788 };
9789 let temporal_config = if self
9791 .config
9792 .temporal_attributes
9793 .transaction_time
9794 .allow_backdating
9795 {
9796 let mut c = temporal_config;
9797 c.transaction_time.allow_backdating = true;
9798 c.transaction_time.backdating_probability = self
9799 .config
9800 .temporal_attributes
9801 .transaction_time
9802 .backdating_probability;
9803 c.transaction_time.max_backdate_days = self
9804 .config
9805 .temporal_attributes
9806 .transaction_time
9807 .max_backdate_days;
9808 c
9809 } else {
9810 temporal_config
9811 };
9812 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9813 temporal_config,
9814 self.seed + 130,
9815 start_date,
9816 );
9817
9818 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9819 self.seed + 130,
9820 datasynth_core::GeneratorType::Vendor,
9821 );
9822
9823 let chains: Vec<_> = self
9824 .master_data
9825 .vendors
9826 .iter()
9827 .map(|vendor| {
9828 let id = uuid_factory.next();
9829 gen.generate_version_chain(vendor.clone(), id)
9830 })
9831 .collect();
9832
9833 stats.temporal_version_chain_count = chains.len();
9834 info!("Temporal version chains generated: {} chains", chains.len());
9835 self.check_resources_with_log("post-temporal-attributes")?;
9836
9837 Ok(chains)
9838 }
9839
9840 fn phase_entity_relationships(
9850 &self,
9851 journal_entries: &[JournalEntry],
9852 document_flows: &DocumentFlowSnapshot,
9853 stats: &mut EnhancedGenerationStatistics,
9854 ) -> SynthResult<(
9855 Option<datasynth_core::models::EntityGraph>,
9856 Vec<datasynth_core::models::CrossProcessLink>,
9857 )> {
9858 use datasynth_generators::relationships::{
9859 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9860 TransactionSummary,
9861 };
9862
9863 let rs_enabled = self.config.relationship_strength.enabled;
9864 let cpl_enabled = self.config.cross_process_links.enabled
9865 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9866
9867 if !rs_enabled && !cpl_enabled {
9868 debug!(
9869 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9870 );
9871 return Ok((None, Vec::new()));
9872 }
9873
9874 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9875
9876 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9877 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9878
9879 let company_code = self
9880 .config
9881 .companies
9882 .first()
9883 .map(|c| c.code.as_str())
9884 .unwrap_or("1000");
9885
9886 let gen_config = EntityGraphConfig {
9888 enabled: rs_enabled,
9889 cross_process: datasynth_generators::relationships::CrossProcessConfig {
9890 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9891 enable_return_flows: false,
9892 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9893 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9894 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9896 1.0
9897 } else {
9898 0.30
9899 },
9900 ..Default::default()
9901 },
9902 strength_config: datasynth_generators::relationships::StrengthConfig {
9903 transaction_volume_weight: self
9904 .config
9905 .relationship_strength
9906 .calculation
9907 .transaction_volume_weight,
9908 transaction_count_weight: self
9909 .config
9910 .relationship_strength
9911 .calculation
9912 .transaction_count_weight,
9913 duration_weight: self
9914 .config
9915 .relationship_strength
9916 .calculation
9917 .relationship_duration_weight,
9918 recency_weight: self.config.relationship_strength.calculation.recency_weight,
9919 mutual_connections_weight: self
9920 .config
9921 .relationship_strength
9922 .calculation
9923 .mutual_connections_weight,
9924 recency_half_life_days: self
9925 .config
9926 .relationship_strength
9927 .calculation
9928 .recency_half_life_days,
9929 },
9930 ..Default::default()
9931 };
9932
9933 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9934
9935 let entity_graph = if rs_enabled {
9937 let vendor_summaries: Vec<EntitySummary> = self
9939 .master_data
9940 .vendors
9941 .iter()
9942 .map(|v| {
9943 EntitySummary::new(
9944 &v.vendor_id,
9945 &v.name,
9946 datasynth_core::models::GraphEntityType::Vendor,
9947 start_date,
9948 )
9949 })
9950 .collect();
9951
9952 let customer_summaries: Vec<EntitySummary> = self
9953 .master_data
9954 .customers
9955 .iter()
9956 .map(|c| {
9957 EntitySummary::new(
9958 &c.customer_id,
9959 &c.name,
9960 datasynth_core::models::GraphEntityType::Customer,
9961 start_date,
9962 )
9963 })
9964 .collect();
9965
9966 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9971 std::collections::HashMap::new();
9972
9973 for je in journal_entries {
9974 let cc = je.header.company_code.clone();
9975 let posting_date = je.header.posting_date;
9976 for line in &je.lines {
9977 if let Some(ref tp) = line.trading_partner {
9978 let amount = if line.debit_amount > line.credit_amount {
9979 line.debit_amount
9980 } else {
9981 line.credit_amount
9982 };
9983 let entry = txn_summaries
9984 .entry((cc.clone(), tp.clone()))
9985 .or_insert_with(|| TransactionSummary {
9986 total_volume: rust_decimal::Decimal::ZERO,
9987 transaction_count: 0,
9988 first_transaction_date: posting_date,
9989 last_transaction_date: posting_date,
9990 related_entities: std::collections::HashSet::new(),
9991 });
9992 entry.total_volume += amount;
9993 entry.transaction_count += 1;
9994 if posting_date < entry.first_transaction_date {
9995 entry.first_transaction_date = posting_date;
9996 }
9997 if posting_date > entry.last_transaction_date {
9998 entry.last_transaction_date = posting_date;
9999 }
10000 entry.related_entities.insert(cc.clone());
10001 }
10002 }
10003 }
10004
10005 for chain in &document_flows.p2p_chains {
10008 let cc = chain.purchase_order.header.company_code.clone();
10009 let vendor_id = chain.purchase_order.vendor_id.clone();
10010 let po_date = chain.purchase_order.header.document_date;
10011 let amount = chain.purchase_order.total_net_amount;
10012
10013 let entry = txn_summaries
10014 .entry((cc.clone(), vendor_id))
10015 .or_insert_with(|| TransactionSummary {
10016 total_volume: rust_decimal::Decimal::ZERO,
10017 transaction_count: 0,
10018 first_transaction_date: po_date,
10019 last_transaction_date: po_date,
10020 related_entities: std::collections::HashSet::new(),
10021 });
10022 entry.total_volume += amount;
10023 entry.transaction_count += 1;
10024 if po_date < entry.first_transaction_date {
10025 entry.first_transaction_date = po_date;
10026 }
10027 if po_date > entry.last_transaction_date {
10028 entry.last_transaction_date = po_date;
10029 }
10030 entry.related_entities.insert(cc);
10031 }
10032
10033 for chain in &document_flows.o2c_chains {
10035 let cc = chain.sales_order.header.company_code.clone();
10036 let customer_id = chain.sales_order.customer_id.clone();
10037 let so_date = chain.sales_order.header.document_date;
10038 let amount = chain.sales_order.total_net_amount;
10039
10040 let entry = txn_summaries
10041 .entry((cc.clone(), customer_id))
10042 .or_insert_with(|| TransactionSummary {
10043 total_volume: rust_decimal::Decimal::ZERO,
10044 transaction_count: 0,
10045 first_transaction_date: so_date,
10046 last_transaction_date: so_date,
10047 related_entities: std::collections::HashSet::new(),
10048 });
10049 entry.total_volume += amount;
10050 entry.transaction_count += 1;
10051 if so_date < entry.first_transaction_date {
10052 entry.first_transaction_date = so_date;
10053 }
10054 if so_date > entry.last_transaction_date {
10055 entry.last_transaction_date = so_date;
10056 }
10057 entry.related_entities.insert(cc);
10058 }
10059
10060 let as_of_date = journal_entries
10061 .last()
10062 .map(|je| je.header.posting_date)
10063 .unwrap_or(start_date);
10064
10065 let graph = gen.generate_entity_graph(
10066 company_code,
10067 as_of_date,
10068 &vendor_summaries,
10069 &customer_summaries,
10070 &txn_summaries,
10071 );
10072
10073 info!(
10074 "Entity relationship graph: {} nodes, {} edges",
10075 graph.nodes.len(),
10076 graph.edges.len()
10077 );
10078 stats.entity_relationship_node_count = graph.nodes.len();
10079 stats.entity_relationship_edge_count = graph.edges.len();
10080 Some(graph)
10081 } else {
10082 None
10083 };
10084
10085 let cross_process_links = if cpl_enabled {
10087 let gr_refs: Vec<GoodsReceiptRef> = document_flows
10089 .p2p_chains
10090 .iter()
10091 .flat_map(|chain| {
10092 let vendor_id = chain.purchase_order.vendor_id.clone();
10093 let cc = chain.purchase_order.header.company_code.clone();
10094 chain.goods_receipts.iter().flat_map(move |gr| {
10095 gr.items.iter().filter_map({
10096 let doc_id = gr.header.document_id.clone();
10097 let v_id = vendor_id.clone();
10098 let company = cc.clone();
10099 let receipt_date = gr.header.document_date;
10100 move |item| {
10101 item.base
10102 .material_id
10103 .as_ref()
10104 .map(|mat_id| GoodsReceiptRef {
10105 document_id: doc_id.clone(),
10106 material_id: mat_id.clone(),
10107 quantity: item.base.quantity,
10108 receipt_date,
10109 vendor_id: v_id.clone(),
10110 company_code: company.clone(),
10111 })
10112 }
10113 })
10114 })
10115 })
10116 .collect();
10117
10118 let del_refs: Vec<DeliveryRef> = document_flows
10120 .o2c_chains
10121 .iter()
10122 .flat_map(|chain| {
10123 let customer_id = chain.sales_order.customer_id.clone();
10124 let cc = chain.sales_order.header.company_code.clone();
10125 chain.deliveries.iter().flat_map(move |del| {
10126 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10127 del.items.iter().filter_map({
10128 let doc_id = del.header.document_id.clone();
10129 let c_id = customer_id.clone();
10130 let company = cc.clone();
10131 move |item| {
10132 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10133 document_id: doc_id.clone(),
10134 material_id: mat_id.clone(),
10135 quantity: item.base.quantity,
10136 delivery_date,
10137 customer_id: c_id.clone(),
10138 company_code: company.clone(),
10139 })
10140 }
10141 })
10142 })
10143 })
10144 .collect();
10145
10146 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10147 info!("Cross-process links generated: {} links", links.len());
10148 stats.cross_process_link_count = links.len();
10149 links
10150 } else {
10151 Vec::new()
10152 };
10153
10154 self.check_resources_with_log("post-entity-relationships")?;
10155 Ok((entity_graph, cross_process_links))
10156 }
10157
10158 fn phase_industry_data(
10160 &self,
10161 stats: &mut EnhancedGenerationStatistics,
10162 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10163 if !self.config.industry_specific.enabled {
10164 return None;
10165 }
10166 info!("Phase 29: Generating industry-specific data");
10167 let output = datasynth_generators::industry::factory::generate_industry_output(
10168 self.config.global.industry,
10169 );
10170 stats.industry_gl_account_count = output.gl_accounts.len();
10171 info!(
10172 "Industry data generated: {} GL accounts for {:?}",
10173 output.gl_accounts.len(),
10174 self.config.global.industry
10175 );
10176 Some(output)
10177 }
10178
10179 fn phase_opening_balances(
10181 &mut self,
10182 coa: &Arc<ChartOfAccounts>,
10183 stats: &mut EnhancedGenerationStatistics,
10184 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10185 if !self.config.balance.generate_opening_balances {
10186 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10187 return Ok(Vec::new());
10188 }
10189 info!("Phase 3b: Generating Opening Balances");
10190
10191 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10192 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10193 let fiscal_year = start_date.year();
10194
10195 if let Some(ctx) = &self.shard_context {
10206 if !ctx.opening_balances.is_empty() {
10207 debug!(
10208 "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10209 ctx.opening_balances.len()
10210 );
10211 let mut results = Vec::new();
10212 for company in &self.config.companies {
10213 let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10214 .opening_balances
10215 .iter()
10216 .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10217 .collect();
10218 let total_assets = ctx
10219 .opening_balances
10220 .iter()
10221 .filter(|ob| {
10222 matches!(
10223 ob.account_type,
10224 AccountType::Asset | AccountType::ContraAsset
10225 )
10226 })
10227 .map(|ob| ob.net_balance())
10228 .sum::<rust_decimal::Decimal>();
10229 let total_liabilities = ctx
10230 .opening_balances
10231 .iter()
10232 .filter(|ob| {
10233 matches!(
10234 ob.account_type,
10235 AccountType::Liability | AccountType::ContraLiability
10236 )
10237 })
10238 .map(|ob| ob.net_balance())
10239 .sum::<rust_decimal::Decimal>();
10240 let total_equity = ctx
10241 .opening_balances
10242 .iter()
10243 .filter(|ob| {
10244 matches!(
10245 ob.account_type,
10246 AccountType::Equity | AccountType::ContraEquity
10247 )
10248 })
10249 .map(|ob| ob.net_balance())
10250 .sum::<rust_decimal::Decimal>();
10251 let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10252 < rust_decimal::Decimal::ONE;
10253 results.push(GeneratedOpeningBalance {
10254 company_code: company.code.clone(),
10255 as_of_date: start_date,
10256 balances,
10257 total_assets,
10258 total_liabilities,
10259 total_equity,
10260 is_balanced,
10261 calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10262 current_ratio: None,
10263 quick_ratio: None,
10264 debt_to_equity: None,
10265 working_capital: rust_decimal::Decimal::ZERO,
10266 },
10267 });
10268 }
10269 stats.opening_balance_count = results.len();
10270 info!(
10271 "Phase 3b: opening-balance carryover applied ({} companies)",
10272 results.len()
10273 );
10274 self.check_resources_with_log("post-opening-balances")?;
10275 return Ok(results);
10276 }
10277 }
10278
10279 let industry = match self.config.global.industry {
10280 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10281 IndustrySector::Retail => IndustryType::Retail,
10282 IndustrySector::FinancialServices => IndustryType::Financial,
10283 IndustrySector::Healthcare => IndustryType::Healthcare,
10284 IndustrySector::Technology => IndustryType::Technology,
10285 _ => IndustryType::Manufacturing,
10286 };
10287
10288 let config = datasynth_generators::OpeningBalanceConfig {
10289 industry,
10290 ..Default::default()
10291 };
10292 let mut gen =
10293 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10294
10295 let mut results = Vec::new();
10296 for company in &self.config.companies {
10297 let spec = OpeningBalanceSpec::new(
10298 company.code.clone(),
10299 start_date,
10300 fiscal_year,
10301 company.currency.clone(),
10302 rust_decimal::Decimal::new(10_000_000, 0),
10303 industry,
10304 );
10305 let ob = gen.generate(&spec, coa, start_date, &company.code);
10306 results.push(ob);
10307 }
10308
10309 stats.opening_balance_count = results.len();
10310 info!("Opening balances generated: {} companies", results.len());
10311 self.check_resources_with_log("post-opening-balances")?;
10312
10313 Ok(results)
10314 }
10315
10316 fn phase_subledger_reconciliation(
10318 &mut self,
10319 subledger: &SubledgerSnapshot,
10320 entries: &[JournalEntry],
10321 stats: &mut EnhancedGenerationStatistics,
10322 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10323 if !self.config.balance.reconcile_subledgers {
10324 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10325 return Ok(Vec::new());
10326 }
10327 info!("Phase 9b: Reconciling GL to subledger balances");
10328
10329 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10330 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10331 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10332
10333 let tracker_config = BalanceTrackerConfig {
10335 validate_on_each_entry: false,
10336 track_history: false,
10337 fail_on_validation_error: false,
10338 ..Default::default()
10339 };
10340 let recon_currency = self
10341 .config
10342 .companies
10343 .first()
10344 .map(|c| c.currency.clone())
10345 .unwrap_or_else(|| "USD".to_string());
10346 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10347 let validation_errors = tracker.apply_entries(entries);
10348 if !validation_errors.is_empty() {
10349 warn!(
10350 error_count = validation_errors.len(),
10351 "Balance tracker encountered validation errors during subledger reconciliation"
10352 );
10353 for err in &validation_errors {
10354 debug!("Balance validation error: {:?}", err);
10355 }
10356 }
10357
10358 let mut engine = datasynth_generators::ReconciliationEngine::new(
10359 datasynth_generators::ReconciliationConfig::default(),
10360 );
10361
10362 let mut results = Vec::new();
10363 let company_code = self
10364 .config
10365 .companies
10366 .first()
10367 .map(|c| c.code.as_str())
10368 .unwrap_or("1000");
10369
10370 if !subledger.ar_invoices.is_empty() {
10372 let gl_balance = tracker
10373 .get_account_balance(
10374 company_code,
10375 datasynth_core::accounts::control_accounts::AR_CONTROL,
10376 )
10377 .map(|b| b.closing_balance)
10378 .unwrap_or_default();
10379 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10380 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10381 }
10382
10383 if !subledger.ap_invoices.is_empty() {
10385 let gl_balance = tracker
10386 .get_account_balance(
10387 company_code,
10388 datasynth_core::accounts::control_accounts::AP_CONTROL,
10389 )
10390 .map(|b| b.closing_balance)
10391 .unwrap_or_default();
10392 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10393 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10394 }
10395
10396 if !subledger.fa_records.is_empty() {
10398 let gl_asset_balance = tracker
10399 .get_account_balance(
10400 company_code,
10401 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10402 )
10403 .map(|b| b.closing_balance)
10404 .unwrap_or_default();
10405 let gl_accum_depr_balance = tracker
10406 .get_account_balance(
10407 company_code,
10408 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10409 )
10410 .map(|b| b.closing_balance)
10411 .unwrap_or_default();
10412 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10413 subledger.fa_records.iter().collect();
10414 let (asset_recon, depr_recon) = engine.reconcile_fa(
10415 company_code,
10416 end_date,
10417 gl_asset_balance,
10418 gl_accum_depr_balance,
10419 &fa_refs,
10420 );
10421 results.push(asset_recon);
10422 results.push(depr_recon);
10423 }
10424
10425 if !subledger.inventory_positions.is_empty() {
10427 let gl_balance = tracker
10428 .get_account_balance(
10429 company_code,
10430 datasynth_core::accounts::control_accounts::INVENTORY,
10431 )
10432 .map(|b| b.closing_balance)
10433 .unwrap_or_default();
10434 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10435 subledger.inventory_positions.iter().collect();
10436 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10437 }
10438
10439 stats.subledger_reconciliation_count = results.len();
10440 let passed = results.iter().filter(|r| r.is_balanced()).count();
10441 let failed = results.len() - passed;
10442 info!(
10443 "Subledger reconciliation: {} checks, {} passed, {} failed",
10444 results.len(),
10445 passed,
10446 failed
10447 );
10448 self.check_resources_with_log("post-subledger-reconciliation")?;
10449
10450 Ok(results)
10451 }
10452
10453 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10455 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10456
10457 let coa_framework = self.resolve_coa_framework();
10458
10459 let mut gen = ChartOfAccountsGenerator::new(
10460 self.config.chart_of_accounts.complexity,
10461 self.config.global.industry,
10462 self.seed,
10463 )
10464 .with_coa_framework(coa_framework)
10465 .with_expand_industry_subaccounts(
10467 self.config.chart_of_accounts.expand_industry_subaccounts,
10468 );
10469
10470 let mut built = gen.generate();
10471 if self.config.accounting_standards.enabled {
10475 use datasynth_config::schema::AccountingFrameworkConfig;
10476 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10477 match f {
10478 AccountingFrameworkConfig::UsGaap => "us_gaap",
10479 AccountingFrameworkConfig::Ifrs => "ifrs",
10480 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10481 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10482 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10483 }
10484 .to_string()
10485 });
10486 }
10487 let coa = Arc::new(built);
10488 self.coa = Some(Arc::clone(&coa));
10489
10490 if let Some(pb) = pb {
10491 pb.finish_with_message("Chart of Accounts complete");
10492 }
10493
10494 Ok(coa)
10495 }
10496
10497 fn generate_master_data(&mut self) -> SynthResult<()> {
10499 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10500 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10501 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10502
10503 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
10505
10506 let pack = self.primary_pack().clone();
10508
10509 let vendors_per_company = self.phase_config.vendors_per_company;
10511 let customers_per_company = self.phase_config.customers_per_company;
10512 let materials_per_company = self.phase_config.materials_per_company;
10513 let assets_per_company = self.phase_config.assets_per_company;
10514 let coa_framework = self.resolve_coa_framework();
10515
10516 let per_company_results: Vec<_> = self
10519 .config
10520 .companies
10521 .par_iter()
10522 .enumerate()
10523 .map(|(i, company)| {
10524 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10525 let pack = pack.clone();
10526
10527 let mut vendor_gen = VendorGenerator::new(company_seed);
10529 vendor_gen.set_country_pack(pack.clone());
10530 vendor_gen.set_coa_framework(coa_framework);
10531 vendor_gen.set_counter_offset(i * vendors_per_company);
10532 vendor_gen.set_template_provider(self.template_provider.clone());
10535 if self.config.vendor_network.enabled {
10537 let vn = &self.config.vendor_network;
10538 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10539 enabled: true,
10540 depth: vn.depth,
10541 tier1_count: datasynth_generators::TierCountConfig::new(
10542 vn.tier1.min,
10543 vn.tier1.max,
10544 ),
10545 tier2_per_parent: datasynth_generators::TierCountConfig::new(
10546 vn.tier2_per_parent.min,
10547 vn.tier2_per_parent.max,
10548 ),
10549 tier3_per_parent: datasynth_generators::TierCountConfig::new(
10550 vn.tier3_per_parent.min,
10551 vn.tier3_per_parent.max,
10552 ),
10553 cluster_distribution: datasynth_generators::ClusterDistribution {
10554 reliable_strategic: vn.clusters.reliable_strategic,
10555 standard_operational: vn.clusters.standard_operational,
10556 transactional: vn.clusters.transactional,
10557 problematic: vn.clusters.problematic,
10558 },
10559 concentration_limits: datasynth_generators::ConcentrationLimits {
10560 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10561 max_top5: vn.dependencies.top_5_concentration,
10562 },
10563 ..datasynth_generators::VendorNetworkConfig::default()
10564 });
10565 }
10566 let vendor_pool =
10567 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10568
10569 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10571 customer_gen.set_country_pack(pack.clone());
10572 customer_gen.set_coa_framework(coa_framework);
10573 customer_gen.set_counter_offset(i * customers_per_company);
10574 customer_gen.set_template_provider(self.template_provider.clone());
10576 if self.config.customer_segmentation.enabled {
10578 let cs = &self.config.customer_segmentation;
10579 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10580 enabled: true,
10581 segment_distribution: datasynth_generators::SegmentDistribution {
10582 enterprise: cs.value_segments.enterprise.customer_share,
10583 mid_market: cs.value_segments.mid_market.customer_share,
10584 smb: cs.value_segments.smb.customer_share,
10585 consumer: cs.value_segments.consumer.customer_share,
10586 },
10587 referral_config: datasynth_generators::ReferralConfig {
10588 enabled: cs.networks.referrals.enabled,
10589 referral_rate: cs.networks.referrals.referral_rate,
10590 ..Default::default()
10591 },
10592 hierarchy_config: datasynth_generators::HierarchyConfig {
10593 enabled: cs.networks.corporate_hierarchies.enabled,
10594 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10595 ..Default::default()
10596 },
10597 ..Default::default()
10598 };
10599 customer_gen.set_segmentation_config(seg_cfg);
10600 }
10601 let customer_pool = customer_gen.generate_customer_pool(
10602 customers_per_company,
10603 &company.code,
10604 start_date,
10605 );
10606
10607 let mut material_gen = MaterialGenerator::new(company_seed + 200);
10609 material_gen.set_country_pack(pack.clone());
10610 material_gen.set_counter_offset(i * materials_per_company);
10611 material_gen.set_template_provider(self.template_provider.clone());
10613 let material_pool = material_gen.generate_material_pool(
10614 materials_per_company,
10615 &company.code,
10616 start_date,
10617 );
10618
10619 let mut asset_gen = AssetGenerator::new(company_seed + 300);
10621 asset_gen.set_template_provider(self.template_provider.clone());
10623 let asset_pool = asset_gen.generate_asset_pool(
10624 assets_per_company,
10625 &company.code,
10626 (start_date, end_date),
10627 );
10628
10629 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10631 employee_gen.set_country_pack(pack);
10632 employee_gen.set_template_provider(self.template_provider.clone());
10634 let employee_pool =
10635 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10636
10637 let employee_change_history =
10639 employee_gen.generate_all_change_history(&employee_pool, end_date);
10640
10641 let employee_ids: Vec<String> = employee_pool
10643 .employees
10644 .iter()
10645 .map(|e| e.employee_id.clone())
10646 .collect();
10647 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10648 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10649
10650 let mut pc_gen =
10653 datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10654 let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10655
10656 (
10657 vendor_pool.vendors,
10658 customer_pool.customers,
10659 material_pool.materials,
10660 asset_pool.assets,
10661 employee_pool.employees,
10662 employee_change_history,
10663 cost_centers,
10664 profit_centers,
10665 )
10666 })
10667 .collect();
10668
10669 for (
10671 vendors,
10672 customers,
10673 materials,
10674 assets,
10675 employees,
10676 change_history,
10677 cost_centers,
10678 profit_centers,
10679 ) in per_company_results
10680 {
10681 self.master_data.vendors.extend(vendors);
10682 self.master_data.customers.extend(customers);
10683 self.master_data.materials.extend(materials);
10684 self.master_data.assets.extend(assets);
10685 self.master_data.employees.extend(employees);
10686 self.master_data.cost_centers.extend(cost_centers);
10687 self.master_data.profit_centers.extend(profit_centers);
10688 self.master_data
10689 .employee_change_history
10690 .extend(change_history);
10691 }
10692
10693 {
10697 use datasynth_core::models::IndustrySector;
10698 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10699 let industry = match self.config.global.industry {
10700 IndustrySector::Manufacturing => "manufacturing",
10701 IndustrySector::Retail => "retail",
10702 IndustrySector::FinancialServices => "financial_services",
10703 IndustrySector::Technology => "technology",
10704 IndustrySector::Healthcare => "healthcare",
10705 _ => "other",
10706 };
10707 for (i, company) in self.config.companies.iter().enumerate() {
10708 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10709 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10710 let profile = profile_gen.generate(&company.code, industry);
10711 self.master_data.organizational_profiles.push(profile);
10712 }
10713 }
10714
10715 if let Some(pb) = &pb {
10716 pb.inc(total);
10717 }
10718 if let Some(pb) = pb {
10719 pb.finish_with_message("Master data generation complete");
10720 }
10721
10722 Ok(())
10723 }
10724
10725 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10727 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10728 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10729
10730 let months = (self.config.global.period_months as usize).max(1);
10733 let p2p_count = self
10734 .phase_config
10735 .p2p_chains
10736 .min(self.master_data.vendors.len() * 2 * months);
10737 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10738
10739 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10741 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10742 p2p_gen.set_country_pack(self.primary_pack().clone());
10743 if let Some(ctx) = &self.temporal_context {
10747 p2p_gen.set_temporal_context(Arc::clone(ctx));
10748 }
10749
10750 for i in 0..p2p_count {
10751 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10752 let materials: Vec<&Material> = self
10753 .master_data
10754 .materials
10755 .iter()
10756 .skip(i % self.master_data.materials.len().max(1))
10757 .take(2.min(self.master_data.materials.len()))
10758 .collect();
10759
10760 if materials.is_empty() {
10761 continue;
10762 }
10763
10764 let company = &self.config.companies[i % self.config.companies.len()];
10765 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10766 let fiscal_period = po_date.month() as u8;
10767 let created_by = if self.master_data.employees.is_empty() {
10768 "SYSTEM"
10769 } else {
10770 self.master_data.employees[i % self.master_data.employees.len()]
10771 .user_id
10772 .as_str()
10773 };
10774
10775 let chain = p2p_gen.generate_chain(
10776 &company.code,
10777 vendor,
10778 &materials,
10779 po_date,
10780 start_date.year() as u16,
10781 fiscal_period,
10782 created_by,
10783 );
10784
10785 flows.purchase_orders.push(chain.purchase_order.clone());
10787 flows.goods_receipts.extend(chain.goods_receipts.clone());
10788 if let Some(vi) = &chain.vendor_invoice {
10789 flows.vendor_invoices.push(vi.clone());
10790 }
10791 if let Some(payment) = &chain.payment {
10792 flows.payments.push(payment.clone());
10793 }
10794 for remainder in &chain.remainder_payments {
10795 flows.payments.push(remainder.clone());
10796 }
10797 flows.p2p_chains.push(chain);
10798
10799 if let Some(pb) = &pb {
10800 pb.inc(1);
10801 }
10802 }
10803
10804 if let Some(pb) = pb {
10805 pb.finish_with_message("P2P document flows complete");
10806 }
10807
10808 let o2c_count = self
10811 .phase_config
10812 .o2c_chains
10813 .min(self.master_data.customers.len() * 2 * months);
10814 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10815
10816 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10818 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10819 o2c_gen.set_country_pack(self.primary_pack().clone());
10820 if let Some(ctx) = &self.temporal_context {
10822 o2c_gen.set_temporal_context(Arc::clone(ctx));
10823 }
10824
10825 for i in 0..o2c_count {
10826 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10827 let materials: Vec<&Material> = self
10828 .master_data
10829 .materials
10830 .iter()
10831 .skip(i % self.master_data.materials.len().max(1))
10832 .take(2.min(self.master_data.materials.len()))
10833 .collect();
10834
10835 if materials.is_empty() {
10836 continue;
10837 }
10838
10839 let company = &self.config.companies[i % self.config.companies.len()];
10840 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10841 let fiscal_period = so_date.month() as u8;
10842 let created_by = if self.master_data.employees.is_empty() {
10843 "SYSTEM"
10844 } else {
10845 self.master_data.employees[i % self.master_data.employees.len()]
10846 .user_id
10847 .as_str()
10848 };
10849
10850 let chain = o2c_gen.generate_chain(
10851 &company.code,
10852 customer,
10853 &materials,
10854 so_date,
10855 start_date.year() as u16,
10856 fiscal_period,
10857 created_by,
10858 );
10859
10860 flows.sales_orders.push(chain.sales_order.clone());
10862 flows.deliveries.extend(chain.deliveries.clone());
10863 if let Some(ci) = &chain.customer_invoice {
10864 flows.customer_invoices.push(ci.clone());
10865 }
10866 if let Some(receipt) = &chain.customer_receipt {
10867 flows.payments.push(receipt.clone());
10868 }
10869 for receipt in &chain.remainder_receipts {
10871 flows.payments.push(receipt.clone());
10872 }
10873 flows.o2c_chains.push(chain);
10874
10875 if let Some(pb) = &pb {
10876 pb.inc(1);
10877 }
10878 }
10879
10880 if let Some(pb) = pb {
10881 pb.finish_with_message("O2C document flows complete");
10882 }
10883
10884 {
10888 let mut refs = Vec::new();
10889 for doc in &flows.purchase_orders {
10890 refs.extend(doc.header.document_references.iter().cloned());
10891 }
10892 for doc in &flows.goods_receipts {
10893 refs.extend(doc.header.document_references.iter().cloned());
10894 }
10895 for doc in &flows.vendor_invoices {
10896 refs.extend(doc.header.document_references.iter().cloned());
10897 }
10898 for doc in &flows.sales_orders {
10899 refs.extend(doc.header.document_references.iter().cloned());
10900 }
10901 for doc in &flows.deliveries {
10902 refs.extend(doc.header.document_references.iter().cloned());
10903 }
10904 for doc in &flows.customer_invoices {
10905 refs.extend(doc.header.document_references.iter().cloned());
10906 }
10907 for doc in &flows.payments {
10908 refs.extend(doc.header.document_references.iter().cloned());
10909 }
10910 debug!(
10911 "Collected {} document cross-references from document headers",
10912 refs.len()
10913 );
10914 flows.document_references = refs;
10915 }
10916
10917 Ok(())
10918 }
10919
10920 fn generate_journal_entries(
10922 &mut self,
10923 coa: &Arc<ChartOfAccounts>,
10924 ) -> SynthResult<Vec<JournalEntry>> {
10925 use datasynth_core::traits::ParallelGenerator;
10926
10927 let total = self.calculate_total_transactions();
10928 let pb = self.create_progress_bar(total, "Generating Journal Entries");
10929
10930 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10931 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10932 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10933
10934 let company_codes: Vec<String> = self
10935 .config
10936 .companies
10937 .iter()
10938 .map(|c| c.code.clone())
10939 .collect();
10940
10941 let mut generator = JournalEntryGenerator::new_with_params(
10942 self.config.transactions.clone(),
10943 Arc::clone(coa),
10944 company_codes,
10945 start_date,
10946 end_date,
10947 self.seed,
10948 );
10949 let bp = &self.config.business_processes;
10952 generator.set_business_process_weights(
10953 bp.o2c_weight,
10954 bp.p2p_weight,
10955 bp.r2r_weight,
10956 bp.h2r_weight,
10957 bp.a2r_weight,
10958 );
10959 generator
10964 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10965 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10966 let generator = generator;
10967
10968 let je_pack = self.primary_pack();
10972
10973 let mut generator = generator
10974 .with_master_data(
10975 &self.master_data.vendors,
10976 &self.master_data.customers,
10977 &self.master_data.materials,
10978 )
10979 .with_country_pack_names(je_pack)
10980 .with_country_pack_temporal(
10981 self.config.temporal_patterns.clone(),
10982 self.seed + 200,
10983 je_pack,
10984 )
10985 .with_persona_errors(true)
10986 .with_fraud_config(self.config.fraud.clone());
10987
10988 let temporal_enabled = self.config.temporal.enabled;
10993 let regimes_enabled = self.config.distributions.regime_changes.enabled;
10994 if temporal_enabled || regimes_enabled {
10995 let mut drift_config = if temporal_enabled {
10996 self.config.temporal.to_core_config()
10997 } else {
10998 datasynth_core::distributions::DriftConfig::default()
11001 };
11002 if regimes_enabled {
11003 self.config
11004 .distributions
11005 .regime_changes
11006 .apply_to(&mut drift_config, start_date);
11007 }
11008 generator = generator.with_drift_config(drift_config, self.seed + 100);
11009 }
11010
11011 self.check_memory_limit()?;
11013
11014 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11016
11017 let entries = if total >= 10_000 && num_threads > 1 {
11021 let sub_generators = generator.split(num_threads);
11024 let entries_per_thread = total as usize / num_threads;
11025 let remainder = total as usize % num_threads;
11026
11027 let batches: Vec<Vec<JournalEntry>> = sub_generators
11028 .into_par_iter()
11029 .enumerate()
11030 .map(|(i, mut gen)| {
11031 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11032 gen.generate_batch(count)
11033 })
11034 .collect();
11035
11036 let entries = JournalEntryGenerator::merge_results(batches);
11038
11039 if let Some(pb) = &pb {
11040 pb.inc(total);
11041 }
11042 entries
11043 } else {
11044 let mut entries = Vec::with_capacity(total as usize);
11046 for _ in 0..total {
11047 let entry = generator.generate();
11048 entries.push(entry);
11049 if let Some(pb) = &pb {
11050 pb.inc(1);
11051 }
11052 }
11053 entries
11054 };
11055
11056 if let Some(pb) = pb {
11057 pb.finish_with_message("Journal entries complete");
11058 }
11059
11060 Ok(entries)
11061 }
11062
11063 fn generate_jes_from_document_flows(
11068 &mut self,
11069 flows: &DocumentFlowSnapshot,
11070 ) -> SynthResult<Vec<JournalEntry>> {
11071 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11072 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11073
11074 let je_config = match self.resolve_coa_framework() {
11075 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11076 CoAFramework::GermanSkr04 => {
11077 let fa = datasynth_core::FrameworkAccounts::german_gaap();
11078 DocumentFlowJeConfig::from(&fa)
11079 }
11080 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11081 };
11082
11083 let populate_fec = je_config.populate_fec_fields;
11084 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11085
11086 if populate_fec {
11090 let mut aux_lookup = std::collections::HashMap::new();
11091 for vendor in &self.master_data.vendors {
11092 if let Some(ref aux) = vendor.auxiliary_gl_account {
11093 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11094 }
11095 }
11096 for customer in &self.master_data.customers {
11097 if let Some(ref aux) = customer.auxiliary_gl_account {
11098 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11099 }
11100 }
11101 if !aux_lookup.is_empty() {
11102 generator.set_auxiliary_account_lookup(aux_lookup);
11103 }
11104 }
11105
11106 let mut entries = Vec::new();
11107
11108 for chain in &flows.p2p_chains {
11110 let chain_entries = generator.generate_from_p2p_chain(chain);
11111 entries.extend(chain_entries);
11112 if let Some(pb) = &pb {
11113 pb.inc(1);
11114 }
11115 }
11116
11117 for chain in &flows.o2c_chains {
11119 let chain_entries = generator.generate_from_o2c_chain(chain);
11120 entries.extend(chain_entries);
11121 if let Some(pb) = &pb {
11122 pb.inc(1);
11123 }
11124 }
11125
11126 if let Some(pb) = pb {
11127 pb.finish_with_message(format!(
11128 "Generated {} JEs from document flows",
11129 entries.len()
11130 ));
11131 }
11132
11133 Ok(entries)
11134 }
11135
11136 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11142 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11143
11144 let mut jes = Vec::with_capacity(payroll_runs.len());
11145
11146 for run in payroll_runs {
11147 let mut je = JournalEntry::new_simple(
11148 format!("JE-PAYROLL-{}", run.payroll_id),
11149 run.company_code.clone(),
11150 run.run_date,
11151 format!("Payroll {}", run.payroll_id),
11152 );
11153
11154 je.add_line(JournalEntryLine {
11156 line_number: 1,
11157 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11158 debit_amount: run.total_gross,
11159 reference: Some(run.payroll_id.clone()),
11160 text: Some(format!(
11161 "Payroll {} ({} employees)",
11162 run.payroll_id, run.employee_count
11163 )),
11164 ..Default::default()
11165 });
11166
11167 je.add_line(JournalEntryLine {
11169 line_number: 2,
11170 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11171 credit_amount: run.total_gross,
11172 reference: Some(run.payroll_id.clone()),
11173 ..Default::default()
11174 });
11175
11176 jes.push(je);
11177 }
11178
11179 jes
11180 }
11181
11182 fn link_document_flows_to_subledgers(
11187 &mut self,
11188 flows: &DocumentFlowSnapshot,
11189 ) -> SynthResult<SubledgerSnapshot> {
11190 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11191 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11192
11193 let vendor_names: std::collections::HashMap<String, String> = self
11195 .master_data
11196 .vendors
11197 .iter()
11198 .map(|v| (v.vendor_id.clone(), v.name.clone()))
11199 .collect();
11200 let customer_names: std::collections::HashMap<String, String> = self
11201 .master_data
11202 .customers
11203 .iter()
11204 .map(|c| (c.customer_id.clone(), c.name.clone()))
11205 .collect();
11206
11207 let mut linker = DocumentFlowLinker::new()
11208 .with_vendor_names(vendor_names)
11209 .with_customer_names(customer_names);
11210
11211 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11213 if let Some(pb) = &pb {
11214 pb.inc(flows.vendor_invoices.len() as u64);
11215 }
11216
11217 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11219 if let Some(pb) = &pb {
11220 pb.inc(flows.customer_invoices.len() as u64);
11221 }
11222
11223 if let Some(pb) = pb {
11224 pb.finish_with_message(format!(
11225 "Linked {} AP and {} AR invoices",
11226 ap_invoices.len(),
11227 ar_invoices.len()
11228 ));
11229 }
11230
11231 Ok(SubledgerSnapshot {
11232 ap_invoices,
11233 ar_invoices,
11234 fa_records: Vec::new(),
11235 inventory_positions: Vec::new(),
11236 inventory_movements: Vec::new(),
11237 ar_aging_reports: Vec::new(),
11239 ap_aging_reports: Vec::new(),
11240 depreciation_runs: Vec::new(),
11242 inventory_valuations: Vec::new(),
11243 dunning_runs: Vec::new(),
11245 dunning_letters: Vec::new(),
11246 })
11247 }
11248
11249 #[allow(clippy::too_many_arguments)]
11254 fn generate_ocpm_events(
11255 &mut self,
11256 flows: &DocumentFlowSnapshot,
11257 sourcing: &SourcingSnapshot,
11258 hr: &HrSnapshot,
11259 manufacturing: &ManufacturingSnapshot,
11260 banking: &BankingSnapshot,
11261 audit: &AuditSnapshot,
11262 financial_reporting: &FinancialReportingSnapshot,
11263 ) -> SynthResult<OcpmSnapshot> {
11264 let total_chains = flows.p2p_chains.len()
11265 + flows.o2c_chains.len()
11266 + sourcing.sourcing_projects.len()
11267 + hr.payroll_runs.len()
11268 + manufacturing.production_orders.len()
11269 + banking.customers.len()
11270 + audit.engagements.len()
11271 + financial_reporting.bank_reconciliations.len();
11272 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11273
11274 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11276 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11277
11278 let ocpm_config = OcpmGeneratorConfig {
11280 generate_p2p: true,
11281 generate_o2c: true,
11282 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11283 generate_h2r: !hr.payroll_runs.is_empty(),
11284 generate_mfg: !manufacturing.production_orders.is_empty(),
11285 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11286 generate_bank: !banking.customers.is_empty(),
11287 generate_audit: !audit.engagements.is_empty(),
11288 happy_path_rate: 0.75,
11289 exception_path_rate: 0.20,
11290 error_path_rate: 0.05,
11291 add_duration_variability: true,
11292 duration_std_dev_factor: 0.3,
11293 };
11294 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11295 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11296
11297 let available_users: Vec<String> = self
11299 .master_data
11300 .employees
11301 .iter()
11302 .take(20)
11303 .map(|e| e.user_id.clone())
11304 .collect();
11305
11306 let fallback_date =
11308 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11309 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11310 .unwrap_or(fallback_date);
11311 let base_midnight = base_date
11312 .and_hms_opt(0, 0, 0)
11313 .expect("midnight is always valid");
11314 let base_datetime =
11315 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11316
11317 let add_result = |event_log: &mut OcpmEventLog,
11319 result: datasynth_ocpm::CaseGenerationResult| {
11320 for event in result.events {
11321 event_log.add_event(event);
11322 }
11323 for object in result.objects {
11324 event_log.add_object(object);
11325 }
11326 for relationship in result.relationships {
11327 event_log.add_relationship(relationship);
11328 }
11329 for corr in result.correlation_events {
11330 event_log.add_correlation_event(corr);
11331 }
11332 event_log.add_case(result.case_trace);
11333 };
11334
11335 for chain in &flows.p2p_chains {
11337 let po = &chain.purchase_order;
11338 let documents = P2pDocuments::new(
11339 &po.header.document_id,
11340 &po.vendor_id,
11341 &po.header.company_code,
11342 po.total_net_amount,
11343 &po.header.currency,
11344 &ocpm_uuid_factory,
11345 )
11346 .with_goods_receipt(
11347 chain
11348 .goods_receipts
11349 .first()
11350 .map(|gr| gr.header.document_id.as_str())
11351 .unwrap_or(""),
11352 &ocpm_uuid_factory,
11353 )
11354 .with_invoice(
11355 chain
11356 .vendor_invoice
11357 .as_ref()
11358 .map(|vi| vi.header.document_id.as_str())
11359 .unwrap_or(""),
11360 &ocpm_uuid_factory,
11361 )
11362 .with_payment(
11363 chain
11364 .payment
11365 .as_ref()
11366 .map(|p| p.header.document_id.as_str())
11367 .unwrap_or(""),
11368 &ocpm_uuid_factory,
11369 );
11370
11371 let start_time =
11372 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11373 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11374 add_result(&mut event_log, result);
11375
11376 if let Some(pb) = &pb {
11377 pb.inc(1);
11378 }
11379 }
11380
11381 for chain in &flows.o2c_chains {
11383 let so = &chain.sales_order;
11384 let documents = O2cDocuments::new(
11385 &so.header.document_id,
11386 &so.customer_id,
11387 &so.header.company_code,
11388 so.total_net_amount,
11389 &so.header.currency,
11390 &ocpm_uuid_factory,
11391 )
11392 .with_delivery(
11393 chain
11394 .deliveries
11395 .first()
11396 .map(|d| d.header.document_id.as_str())
11397 .unwrap_or(""),
11398 &ocpm_uuid_factory,
11399 )
11400 .with_invoice(
11401 chain
11402 .customer_invoice
11403 .as_ref()
11404 .map(|ci| ci.header.document_id.as_str())
11405 .unwrap_or(""),
11406 &ocpm_uuid_factory,
11407 )
11408 .with_receipt(
11409 chain
11410 .customer_receipt
11411 .as_ref()
11412 .map(|r| r.header.document_id.as_str())
11413 .unwrap_or(""),
11414 &ocpm_uuid_factory,
11415 );
11416
11417 let start_time =
11418 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11419 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11420 add_result(&mut event_log, result);
11421
11422 if let Some(pb) = &pb {
11423 pb.inc(1);
11424 }
11425 }
11426
11427 for project in &sourcing.sourcing_projects {
11429 let vendor_id = sourcing
11431 .contracts
11432 .iter()
11433 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11434 .map(|c| c.vendor_id.clone())
11435 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11436 .or_else(|| {
11437 self.master_data
11438 .vendors
11439 .first()
11440 .map(|v| v.vendor_id.clone())
11441 })
11442 .unwrap_or_else(|| "V000".to_string());
11443 let mut docs = S2cDocuments::new(
11444 &project.project_id,
11445 &vendor_id,
11446 &project.company_code,
11447 project.estimated_annual_spend,
11448 &ocpm_uuid_factory,
11449 );
11450 if let Some(rfx) = sourcing
11452 .rfx_events
11453 .iter()
11454 .find(|r| r.sourcing_project_id == project.project_id)
11455 {
11456 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11457 if let Some(bid) = sourcing.bids.iter().find(|b| {
11459 b.rfx_id == rfx.rfx_id
11460 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11461 }) {
11462 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11463 }
11464 }
11465 if let Some(contract) = sourcing
11467 .contracts
11468 .iter()
11469 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11470 {
11471 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11472 }
11473 let start_time = base_datetime - chrono::Duration::days(90);
11474 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11475 add_result(&mut event_log, result);
11476
11477 if let Some(pb) = &pb {
11478 pb.inc(1);
11479 }
11480 }
11481
11482 for run in &hr.payroll_runs {
11484 let employee_id = hr
11486 .payroll_line_items
11487 .iter()
11488 .find(|li| li.payroll_id == run.payroll_id)
11489 .map(|li| li.employee_id.as_str())
11490 .unwrap_or("EMP000");
11491 let docs = H2rDocuments::new(
11492 &run.payroll_id,
11493 employee_id,
11494 &run.company_code,
11495 run.total_gross,
11496 &ocpm_uuid_factory,
11497 )
11498 .with_time_entries(
11499 hr.time_entries
11500 .iter()
11501 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11502 .take(5)
11503 .map(|t| t.entry_id.as_str())
11504 .collect(),
11505 );
11506 let start_time = base_datetime - chrono::Duration::days(30);
11507 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11508 add_result(&mut event_log, result);
11509
11510 if let Some(pb) = &pb {
11511 pb.inc(1);
11512 }
11513 }
11514
11515 for order in &manufacturing.production_orders {
11517 let mut docs = MfgDocuments::new(
11518 &order.order_id,
11519 &order.material_id,
11520 &order.company_code,
11521 order.planned_quantity,
11522 &ocpm_uuid_factory,
11523 )
11524 .with_operations(
11525 order
11526 .operations
11527 .iter()
11528 .map(|o| format!("OP-{:04}", o.operation_number))
11529 .collect::<Vec<_>>()
11530 .iter()
11531 .map(std::string::String::as_str)
11532 .collect(),
11533 );
11534 if let Some(insp) = manufacturing
11536 .quality_inspections
11537 .iter()
11538 .find(|i| i.reference_id == order.order_id)
11539 {
11540 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11541 }
11542 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11544 cc.items
11545 .iter()
11546 .any(|item| item.material_id == order.material_id)
11547 }) {
11548 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11549 }
11550 let start_time = base_datetime - chrono::Duration::days(60);
11551 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11552 add_result(&mut event_log, result);
11553
11554 if let Some(pb) = &pb {
11555 pb.inc(1);
11556 }
11557 }
11558
11559 for customer in &banking.customers {
11561 let customer_id_str = customer.customer_id.to_string();
11562 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11563 if let Some(account) = banking
11565 .accounts
11566 .iter()
11567 .find(|a| a.primary_owner_id == customer.customer_id)
11568 {
11569 let account_id_str = account.account_id.to_string();
11570 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11571 let txn_strs: Vec<String> = banking
11573 .transactions
11574 .iter()
11575 .filter(|t| t.account_id == account.account_id)
11576 .take(10)
11577 .map(|t| t.transaction_id.to_string())
11578 .collect();
11579 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11580 let txn_amounts: Vec<rust_decimal::Decimal> = banking
11581 .transactions
11582 .iter()
11583 .filter(|t| t.account_id == account.account_id)
11584 .take(10)
11585 .map(|t| t.amount)
11586 .collect();
11587 if !txn_ids.is_empty() {
11588 docs = docs.with_transactions(txn_ids, txn_amounts);
11589 }
11590 }
11591 let start_time = base_datetime - chrono::Duration::days(180);
11592 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11593 add_result(&mut event_log, result);
11594
11595 if let Some(pb) = &pb {
11596 pb.inc(1);
11597 }
11598 }
11599
11600 for engagement in &audit.engagements {
11602 let engagement_id_str = engagement.engagement_id.to_string();
11603 let docs = AuditDocuments::new(
11604 &engagement_id_str,
11605 &engagement.client_entity_id,
11606 &ocpm_uuid_factory,
11607 )
11608 .with_workpapers(
11609 audit
11610 .workpapers
11611 .iter()
11612 .filter(|w| w.engagement_id == engagement.engagement_id)
11613 .take(10)
11614 .map(|w| w.workpaper_id.to_string())
11615 .collect::<Vec<_>>()
11616 .iter()
11617 .map(std::string::String::as_str)
11618 .collect(),
11619 )
11620 .with_evidence(
11621 audit
11622 .evidence
11623 .iter()
11624 .filter(|e| e.engagement_id == engagement.engagement_id)
11625 .take(10)
11626 .map(|e| e.evidence_id.to_string())
11627 .collect::<Vec<_>>()
11628 .iter()
11629 .map(std::string::String::as_str)
11630 .collect(),
11631 )
11632 .with_risks(
11633 audit
11634 .risk_assessments
11635 .iter()
11636 .filter(|r| r.engagement_id == engagement.engagement_id)
11637 .take(5)
11638 .map(|r| r.risk_id.to_string())
11639 .collect::<Vec<_>>()
11640 .iter()
11641 .map(std::string::String::as_str)
11642 .collect(),
11643 )
11644 .with_findings(
11645 audit
11646 .findings
11647 .iter()
11648 .filter(|f| f.engagement_id == engagement.engagement_id)
11649 .take(5)
11650 .map(|f| f.finding_id.to_string())
11651 .collect::<Vec<_>>()
11652 .iter()
11653 .map(std::string::String::as_str)
11654 .collect(),
11655 )
11656 .with_judgments(
11657 audit
11658 .judgments
11659 .iter()
11660 .filter(|j| j.engagement_id == engagement.engagement_id)
11661 .take(5)
11662 .map(|j| j.judgment_id.to_string())
11663 .collect::<Vec<_>>()
11664 .iter()
11665 .map(std::string::String::as_str)
11666 .collect(),
11667 );
11668 let start_time = base_datetime - chrono::Duration::days(120);
11669 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11670 add_result(&mut event_log, result);
11671
11672 if let Some(pb) = &pb {
11673 pb.inc(1);
11674 }
11675 }
11676
11677 for recon in &financial_reporting.bank_reconciliations {
11679 let docs = BankReconDocuments::new(
11680 &recon.reconciliation_id,
11681 &recon.bank_account_id,
11682 &recon.company_code,
11683 recon.bank_ending_balance,
11684 &ocpm_uuid_factory,
11685 )
11686 .with_statement_lines(
11687 recon
11688 .statement_lines
11689 .iter()
11690 .take(20)
11691 .map(|l| l.line_id.as_str())
11692 .collect(),
11693 )
11694 .with_reconciling_items(
11695 recon
11696 .reconciling_items
11697 .iter()
11698 .take(10)
11699 .map(|i| i.item_id.as_str())
11700 .collect(),
11701 );
11702 let start_time = base_datetime - chrono::Duration::days(30);
11703 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11704 add_result(&mut event_log, result);
11705
11706 if let Some(pb) = &pb {
11707 pb.inc(1);
11708 }
11709 }
11710
11711 event_log.compute_variants();
11713
11714 let summary = event_log.summary();
11715
11716 if let Some(pb) = pb {
11717 pb.finish_with_message(format!(
11718 "Generated {} OCPM events, {} objects",
11719 summary.event_count, summary.object_count
11720 ));
11721 }
11722
11723 Ok(OcpmSnapshot {
11724 event_count: summary.event_count,
11725 object_count: summary.object_count,
11726 case_count: summary.case_count,
11727 event_log: Some(event_log),
11728 })
11729 }
11730
11731 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11733 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11734
11735 let total_rate = if self.config.anomaly_injection.enabled {
11738 self.config.anomaly_injection.rates.total_rate
11739 } else if self.config.fraud.enabled {
11740 self.config.fraud.fraud_rate
11741 } else {
11742 0.02
11743 };
11744
11745 let fraud_rate = if self.config.anomaly_injection.enabled {
11746 self.config.anomaly_injection.rates.fraud_rate
11747 } else {
11748 AnomalyRateConfig::default().fraud_rate
11749 };
11750
11751 let error_rate = if self.config.anomaly_injection.enabled {
11752 self.config.anomaly_injection.rates.error_rate
11753 } else {
11754 AnomalyRateConfig::default().error_rate
11755 };
11756
11757 let process_issue_rate = if self.config.anomaly_injection.enabled {
11758 self.config.anomaly_injection.rates.process_rate
11759 } else {
11760 AnomalyRateConfig::default().process_issue_rate
11761 };
11762
11763 let anomaly_config = AnomalyInjectorConfig {
11764 rates: AnomalyRateConfig {
11765 total_rate,
11766 fraud_rate,
11767 error_rate,
11768 process_issue_rate,
11769 ..Default::default()
11770 },
11771 seed: self.seed + 5000,
11772 ..Default::default()
11773 };
11774
11775 let mut injector = AnomalyInjector::new(anomaly_config);
11776 let result = injector.process_entries(entries);
11777
11778 if let Some(pb) = &pb {
11779 pb.inc(entries.len() as u64);
11780 pb.finish_with_message("Anomaly injection complete");
11781 }
11782
11783 let mut by_type = HashMap::new();
11784 for label in &result.labels {
11785 *by_type
11786 .entry(format!("{:?}", label.anomaly_type))
11787 .or_insert(0) += 1;
11788 }
11789
11790 Ok(AnomalyLabels {
11791 labels: result.labels,
11792 summary: Some(result.summary),
11793 by_type,
11794 })
11795 }
11796
11797 fn validate_journal_entries(
11806 &mut self,
11807 entries: &[JournalEntry],
11808 ) -> SynthResult<BalanceValidationResult> {
11809 let clean_entries: Vec<&JournalEntry> = entries
11811 .iter()
11812 .filter(|e| {
11813 e.header
11814 .header_text
11815 .as_ref()
11816 .map(|t| !t.contains("[HUMAN_ERROR:"))
11817 .unwrap_or(true)
11818 })
11819 .collect();
11820
11821 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11822
11823 let config = BalanceTrackerConfig {
11825 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
11829 };
11830 let validation_currency = self
11831 .config
11832 .companies
11833 .first()
11834 .map(|c| c.currency.clone())
11835 .unwrap_or_else(|| "USD".to_string());
11836
11837 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11838
11839 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11841 let errors = tracker.apply_entries(&clean_refs);
11842
11843 if let Some(pb) = &pb {
11844 pb.inc(entries.len() as u64);
11845 }
11846
11847 let has_unbalanced = tracker
11850 .get_validation_errors()
11851 .iter()
11852 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11853
11854 let mut all_errors = errors;
11857 all_errors.extend(tracker.get_validation_errors().iter().cloned());
11858 let company_codes: Vec<String> = self
11859 .config
11860 .companies
11861 .iter()
11862 .map(|c| c.code.clone())
11863 .collect();
11864
11865 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11866 .map(|d| d + chrono::Months::new(self.config.global.period_months))
11867 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11868
11869 for company_code in &company_codes {
11870 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11871 all_errors.push(e);
11872 }
11873 }
11874
11875 let stats = tracker.get_statistics();
11877
11878 let is_balanced = all_errors.is_empty();
11880
11881 if let Some(pb) = pb {
11882 let msg = if is_balanced {
11883 "Balance validation passed"
11884 } else {
11885 "Balance validation completed with errors"
11886 };
11887 pb.finish_with_message(msg);
11888 }
11889
11890 Ok(BalanceValidationResult {
11891 validated: true,
11892 is_balanced,
11893 entries_processed: stats.entries_processed,
11894 total_debits: stats.total_debits,
11895 total_credits: stats.total_credits,
11896 accounts_tracked: stats.accounts_tracked,
11897 companies_tracked: stats.companies_tracked,
11898 validation_errors: all_errors,
11899 has_unbalanced_entries: has_unbalanced,
11900 })
11901 }
11902
11903 fn inject_data_quality(
11908 &mut self,
11909 entries: &mut [JournalEntry],
11910 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11911 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11912
11913 let config = if self.config.data_quality.enabled {
11916 let dq = &self.config.data_quality;
11917 let field_rates = dq.missing_values.field_rates.clone();
11921 let mut required_fields: std::collections::HashSet<String> =
11922 dq.missing_values.protected_fields.iter().cloned().collect();
11923 for f in [
11926 "document_id",
11927 "company_code",
11928 "posting_date",
11929 "fiscal_year",
11930 "fiscal_period",
11931 "gl_account",
11932 "line_number",
11933 "transaction_id",
11934 ] {
11935 required_fields.insert(f.to_string());
11936 }
11937 DataQualityConfig {
11938 enable_missing_values: dq.missing_values.enabled,
11939 missing_values: datasynth_generators::MissingValueConfig {
11940 global_rate: dq.effective_missing_rate(),
11941 field_rates,
11942 required_fields,
11943 ..Default::default()
11944 },
11945 enable_format_variations: dq.format_variations.enabled,
11946 format_variations: datasynth_generators::FormatVariationConfig {
11947 date_variation_rate: dq.format_variations.dates.rate,
11948 amount_variation_rate: dq.format_variations.amounts.rate,
11949 identifier_variation_rate: dq.format_variations.identifiers.rate,
11950 ..Default::default()
11951 },
11952 enable_duplicates: dq.duplicates.enabled,
11953 duplicates: datasynth_generators::DuplicateConfig {
11954 duplicate_rate: dq.effective_duplicate_rate(),
11955 ..Default::default()
11956 },
11957 enable_typos: dq.typos.enabled,
11958 typos: datasynth_generators::TypoConfig {
11959 char_error_rate: dq.effective_typo_rate(),
11960 ..Default::default()
11961 },
11962 enable_encoding_issues: dq.encoding_issues.enabled,
11963 encoding_issue_rate: dq.encoding_issues.rate,
11964 seed: self.seed.wrapping_add(77), track_statistics: true,
11966 }
11967 } else {
11968 DataQualityConfig::minimal()
11969 };
11970 let mut injector = DataQualityInjector::new(config);
11971
11972 injector.set_country_pack(self.primary_pack().clone());
11974
11975 let context = HashMap::new();
11977
11978 for entry in entries.iter_mut() {
11979 if let Some(text) = &entry.header.header_text {
11981 let processed = injector.process_text_field(
11982 "header_text",
11983 text,
11984 &entry.header.document_id.to_string(),
11985 &context,
11986 );
11987 match processed {
11988 Some(new_text) if new_text != *text => {
11989 entry.header.header_text = Some(new_text);
11990 }
11991 None => {
11992 entry.header.header_text = None; }
11994 _ => {}
11995 }
11996 }
11997
11998 if let Some(ref_text) = &entry.header.reference {
12000 let processed = injector.process_text_field(
12001 "reference",
12002 ref_text,
12003 &entry.header.document_id.to_string(),
12004 &context,
12005 );
12006 match processed {
12007 Some(new_text) if new_text != *ref_text => {
12008 entry.header.reference = Some(new_text);
12009 }
12010 None => {
12011 entry.header.reference = None;
12012 }
12013 _ => {}
12014 }
12015 }
12016
12017 let user_persona = entry.header.user_persona.clone();
12019 if let Some(processed) = injector.process_text_field(
12020 "user_persona",
12021 &user_persona,
12022 &entry.header.document_id.to_string(),
12023 &context,
12024 ) {
12025 if processed != user_persona {
12026 entry.header.user_persona = processed;
12027 }
12028 }
12029
12030 for line in &mut entry.lines {
12032 if let Some(ref text) = line.line_text {
12034 let processed = injector.process_text_field(
12035 "line_text",
12036 text,
12037 &entry.header.document_id.to_string(),
12038 &context,
12039 );
12040 match processed {
12041 Some(new_text) if new_text != *text => {
12042 line.line_text = Some(new_text);
12043 }
12044 None => {
12045 line.line_text = None;
12046 }
12047 _ => {}
12048 }
12049 }
12050
12051 if let Some(cc) = &line.cost_center {
12053 let processed = injector.process_text_field(
12054 "cost_center",
12055 cc,
12056 &entry.header.document_id.to_string(),
12057 &context,
12058 );
12059 match processed {
12060 Some(new_cc) if new_cc != *cc => {
12061 line.cost_center = Some(new_cc);
12062 }
12063 None => {
12064 line.cost_center = None;
12065 }
12066 _ => {}
12067 }
12068 }
12069
12070 macro_rules! process_opt_field {
12078 ($field_name:expr, $opt:expr) => {
12079 if let Some(val) = $opt.as_ref() {
12080 match injector.process_text_field(
12081 $field_name,
12082 val,
12083 &entry.header.document_id.to_string(),
12084 &context,
12085 ) {
12086 Some(new_val) if new_val != *val => {
12087 *$opt = Some(new_val);
12088 }
12089 None => {
12090 *$opt = None;
12091 }
12092 _ => {}
12093 }
12094 }
12095 };
12096 }
12097
12098 process_opt_field!("profit_center", &mut line.profit_center);
12099 process_opt_field!("assignment", &mut line.assignment);
12100 process_opt_field!("tax_code", &mut line.tax_code);
12101 process_opt_field!("account_description", &mut line.account_description);
12102 process_opt_field!(
12103 "auxiliary_account_number",
12104 &mut line.auxiliary_account_number
12105 );
12106 process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12107 process_opt_field!("lettrage", &mut line.lettrage);
12108 }
12109
12110 if let Some(pb) = &pb {
12111 pb.inc(1);
12112 }
12113 }
12114
12115 if let Some(pb) = pb {
12116 pb.finish_with_message("Data quality injection complete");
12117 }
12118
12119 let quality_issues = injector.issues().to_vec();
12120 Ok((injector.stats().clone(), quality_issues))
12121 }
12122
12123 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12134 let use_fsm = self
12136 .config
12137 .audit
12138 .fsm
12139 .as_ref()
12140 .map(|f| f.enabled)
12141 .unwrap_or(false);
12142
12143 if use_fsm {
12144 return self.generate_audit_data_with_fsm(entries);
12145 }
12146
12147 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12149 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12150 let fiscal_year = start_date.year() as u16;
12151 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12152
12153 let total_revenue: rust_decimal::Decimal = entries
12155 .iter()
12156 .flat_map(|e| e.lines.iter())
12157 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12158 .map(|l| l.credit_amount)
12159 .sum();
12160
12161 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12163
12164 let mut snapshot = AuditSnapshot::default();
12165
12166 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12168 engagement_gen.set_team_config(&self.config.audit.team);
12171
12172 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12173 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12177 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12178 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12179 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12180 finding_gen.set_template_provider(self.template_provider.clone());
12182 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12183 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12184 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12185 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12186 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12187 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12188 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12189
12190 let accounts: Vec<String> = self
12192 .coa
12193 .as_ref()
12194 .map(|coa| {
12195 coa.get_postable_accounts()
12196 .iter()
12197 .map(|acc| acc.account_code().to_string())
12198 .collect()
12199 })
12200 .unwrap_or_default();
12201
12202 for (i, company) in self.config.companies.iter().enumerate() {
12204 let company_revenue = total_revenue
12206 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12207
12208 let engagements_for_company =
12210 self.phase_config.audit_engagements / self.config.companies.len().max(1);
12211 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12212 1
12213 } else {
12214 0
12215 };
12216
12217 for _eng_idx in 0..(engagements_for_company + extra) {
12218 let eng_type =
12223 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12224
12225 let mut engagement = engagement_gen.generate_engagement(
12227 &company.code,
12228 &company.name,
12229 fiscal_year,
12230 period_end,
12231 company_revenue,
12232 Some(eng_type),
12233 );
12234
12235 if !self.master_data.employees.is_empty() {
12237 let emp_count = self.master_data.employees.len();
12238 let base = (i * 10 + _eng_idx) % emp_count;
12240 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12241 .employee_id
12242 .clone();
12243 engagement.engagement_manager_id = self.master_data.employees
12244 [(base + 1) % emp_count]
12245 .employee_id
12246 .clone();
12247 let real_team: Vec<String> = engagement
12248 .team_member_ids
12249 .iter()
12250 .enumerate()
12251 .map(|(j, _)| {
12252 self.master_data.employees[(base + 2 + j) % emp_count]
12253 .employee_id
12254 .clone()
12255 })
12256 .collect();
12257 engagement.team_member_ids = real_team;
12258 }
12259
12260 if let Some(pb) = &pb {
12261 pb.inc(1);
12262 }
12263
12264 let team_members: Vec<String> = engagement.team_member_ids.clone();
12266
12267 let workpapers = if self.config.audit.generate_workpapers {
12273 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12274 } else {
12275 Vec::new()
12276 };
12277
12278 for wp in &workpapers {
12279 if let Some(pb) = &pb {
12280 pb.inc(1);
12281 }
12282
12283 let evidence = evidence_gen.generate_evidence_for_workpaper(
12285 wp,
12286 &team_members,
12287 wp.preparer_date,
12288 );
12289
12290 for _ in &evidence {
12291 if let Some(pb) = &pb {
12292 pb.inc(1);
12293 }
12294 }
12295
12296 snapshot.evidence.extend(evidence);
12297 }
12298
12299 let risks =
12301 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12302
12303 for _ in &risks {
12304 if let Some(pb) = &pb {
12305 pb.inc(1);
12306 }
12307 }
12308 snapshot.risk_assessments.extend(risks);
12309
12310 let findings = finding_gen.generate_findings_for_engagement(
12312 &engagement,
12313 &workpapers,
12314 &team_members,
12315 );
12316
12317 for _ in &findings {
12318 if let Some(pb) = &pb {
12319 pb.inc(1);
12320 }
12321 }
12322 snapshot.findings.extend(findings);
12323
12324 let judgments =
12326 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12327
12328 for _ in &judgments {
12329 if let Some(pb) = &pb {
12330 pb.inc(1);
12331 }
12332 }
12333 snapshot.judgments.extend(judgments);
12334
12335 let (confs, resps) =
12337 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12338 snapshot.confirmations.extend(confs);
12339 snapshot.confirmation_responses.extend(resps);
12340
12341 let team_pairs: Vec<(String, String)> = team_members
12343 .iter()
12344 .map(|id| {
12345 let name = self
12346 .master_data
12347 .employees
12348 .iter()
12349 .find(|e| e.employee_id == *id)
12350 .map(|e| e.display_name.clone())
12351 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12352 (id.clone(), name)
12353 })
12354 .collect();
12355 for wp in &workpapers {
12356 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12357 snapshot.procedure_steps.extend(steps);
12358 }
12359
12360 for wp in &workpapers {
12362 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12363 snapshot.samples.push(sample);
12364 }
12365 }
12366
12367 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12369 snapshot.analytical_results.extend(analytical);
12370
12371 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12373 snapshot.ia_functions.push(ia_func);
12374 snapshot.ia_reports.extend(ia_reports);
12375
12376 let vendor_names: Vec<String> = self
12378 .master_data
12379 .vendors
12380 .iter()
12381 .map(|v| v.name.clone())
12382 .collect();
12383 let customer_names: Vec<String> = self
12384 .master_data
12385 .customers
12386 .iter()
12387 .map(|c| c.name.clone())
12388 .collect();
12389 let (parties, rp_txns) =
12390 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12391 snapshot.related_parties.extend(parties);
12392 snapshot.related_party_transactions.extend(rp_txns);
12393
12394 snapshot.workpapers.extend(workpapers);
12396
12397 {
12399 let scope_id = format!(
12400 "SCOPE-{}-{}",
12401 engagement.engagement_id.simple(),
12402 &engagement.client_entity_id
12403 );
12404 let scope = datasynth_core::models::audit::AuditScope::new(
12405 scope_id.clone(),
12406 engagement.engagement_id.to_string(),
12407 engagement.client_entity_id.clone(),
12408 engagement.materiality,
12409 );
12410 let mut eng = engagement;
12412 eng.scope_id = Some(scope_id);
12413 snapshot.audit_scopes.push(scope);
12414 snapshot.engagements.push(eng);
12415 }
12416 }
12417 }
12418
12419 if self.config.companies.len() > 1 {
12423 let group_materiality = snapshot
12426 .engagements
12427 .first()
12428 .map(|e| e.materiality)
12429 .unwrap_or_else(|| {
12430 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12431 total_revenue * pct
12432 });
12433
12434 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12435 let group_engagement_id = snapshot
12436 .engagements
12437 .first()
12438 .map(|e| e.engagement_id.to_string())
12439 .unwrap_or_else(|| "GROUP-ENG".to_string());
12440
12441 let component_snapshot = component_gen.generate(
12442 &self.config.companies,
12443 group_materiality,
12444 &group_engagement_id,
12445 period_end,
12446 );
12447
12448 snapshot.component_auditors = component_snapshot.component_auditors;
12449 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12450 snapshot.component_instructions = component_snapshot.component_instructions;
12451 snapshot.component_reports = component_snapshot.component_reports;
12452
12453 info!(
12454 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12455 snapshot.component_auditors.len(),
12456 snapshot.component_instructions.len(),
12457 snapshot.component_reports.len(),
12458 );
12459 }
12460
12461 {
12465 let applicable_framework = self
12466 .config
12467 .accounting_standards
12468 .framework
12469 .as_ref()
12470 .map(|f| format!("{f:?}"))
12471 .unwrap_or_else(|| "IFRS".to_string());
12472
12473 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12474 let entity_count = self.config.companies.len();
12475
12476 for engagement in &snapshot.engagements {
12477 let company = self
12478 .config
12479 .companies
12480 .iter()
12481 .find(|c| c.code == engagement.client_entity_id);
12482 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12483 let letter_date = engagement.planning_start;
12484 let letter = letter_gen.generate(
12485 &engagement.engagement_id.to_string(),
12486 &engagement.client_name,
12487 entity_count,
12488 engagement.period_end_date,
12489 currency,
12490 &applicable_framework,
12491 letter_date,
12492 );
12493 snapshot.engagement_letters.push(letter);
12494 }
12495
12496 info!(
12497 "ISA 210 engagement letters: {} generated",
12498 snapshot.engagement_letters.len()
12499 );
12500 }
12501
12502 if self.phase_config.generate_legal_documents {
12506 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12507 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12508 for engagement in &snapshot.engagements {
12509 let employee_names: Vec<String> = self
12513 .master_data
12514 .employees
12515 .iter()
12516 .filter(|e| e.company_code == engagement.client_entity_id)
12517 .map(|e| e.display_name.clone())
12518 .collect();
12519 let names_to_use = if !employee_names.is_empty() {
12520 employee_names
12521 } else {
12522 self.master_data
12523 .employees
12524 .iter()
12525 .take(10)
12526 .map(|e| e.display_name.clone())
12527 .collect()
12528 };
12529 let docs = legal_gen.generate(
12530 &engagement.client_entity_id,
12531 engagement.fiscal_year as i32,
12532 &names_to_use,
12533 );
12534 snapshot.legal_documents.extend(docs);
12535 }
12536 info!(
12537 "v3.3.0 legal documents: {} emitted across {} engagements",
12538 snapshot.legal_documents.len(),
12539 snapshot.engagements.len()
12540 );
12541 }
12542
12543 if self.phase_config.generate_it_controls {
12553 use datasynth_generators::it_controls_generator::ItControlsGenerator;
12554 use std::collections::HashMap;
12555 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12556
12557 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12560 HashMap::new();
12561 for engagement in &snapshot.engagements {
12562 let entry = by_company
12563 .entry(engagement.client_entity_id.clone())
12564 .or_insert((engagement.planning_start, engagement.period_end_date));
12565 if engagement.planning_start < entry.0 {
12566 entry.0 = engagement.planning_start;
12567 }
12568 if engagement.period_end_date > entry.1 {
12569 entry.1 = engagement.period_end_date;
12570 }
12571 }
12572
12573 let systems: Vec<String> = vec![
12577 "SAP ECC",
12578 "SAP S/4 HANA",
12579 "Oracle EBS",
12580 "Workday",
12581 "NetSuite",
12582 "Active Directory",
12583 "SharePoint",
12584 "Salesforce",
12585 "ServiceNow",
12586 "Jira",
12587 "GitHub Enterprise",
12588 "AWS Console",
12589 "Okta",
12590 ]
12591 .into_iter()
12592 .map(String::from)
12593 .collect();
12594
12595 for (company_code, (start, end)) in by_company {
12596 let emps: Vec<(String, String)> = self
12597 .master_data
12598 .employees
12599 .iter()
12600 .filter(|e| e.company_code == company_code)
12601 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12602 .collect();
12603 if emps.is_empty() {
12604 continue;
12605 }
12606 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12609 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12610 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12611 snapshot.it_controls_access_logs.extend(access_logs);
12612 snapshot.it_controls_change_records.extend(change_records);
12613 }
12614
12615 info!(
12616 "v3.3.0 IT controls: {} access logs, {} change records",
12617 snapshot.it_controls_access_logs.len(),
12618 snapshot.it_controls_change_records.len()
12619 );
12620 }
12621
12622 {
12626 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12627 let entity_codes: Vec<String> = self
12628 .config
12629 .companies
12630 .iter()
12631 .map(|c| c.code.clone())
12632 .collect();
12633 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12634 info!(
12635 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12636 subsequent.len(),
12637 subsequent
12638 .iter()
12639 .filter(|e| matches!(
12640 e.classification,
12641 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12642 ))
12643 .count(),
12644 subsequent
12645 .iter()
12646 .filter(|e| matches!(
12647 e.classification,
12648 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12649 ))
12650 .count(),
12651 );
12652 snapshot.subsequent_events = subsequent;
12653 }
12654
12655 {
12659 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12660 let entity_codes: Vec<String> = self
12661 .config
12662 .companies
12663 .iter()
12664 .map(|c| c.code.clone())
12665 .collect();
12666 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12667 info!(
12668 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12669 soc_snapshot.service_organizations.len(),
12670 soc_snapshot.soc_reports.len(),
12671 soc_snapshot.user_entity_controls.len(),
12672 );
12673 snapshot.service_organizations = soc_snapshot.service_organizations;
12674 snapshot.soc_reports = soc_snapshot.soc_reports;
12675 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12676 }
12677
12678 {
12682 use datasynth_generators::audit::going_concern_generator::{
12683 GoingConcernGenerator, GoingConcernInput,
12684 };
12685 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12686 let entity_codes: Vec<String> = self
12687 .config
12688 .companies
12689 .iter()
12690 .map(|c| c.code.clone())
12691 .collect();
12692 let assessment_date = period_end + chrono::Duration::days(75);
12694 let period_label = format!("FY{}", period_end.year());
12695
12696 let gc_inputs: Vec<GoingConcernInput> = self
12707 .config
12708 .companies
12709 .iter()
12710 .map(|company| {
12711 let code = &company.code;
12712 let mut revenue = rust_decimal::Decimal::ZERO;
12713 let mut expenses = rust_decimal::Decimal::ZERO;
12714 let mut current_assets = rust_decimal::Decimal::ZERO;
12715 let mut current_liabs = rust_decimal::Decimal::ZERO;
12716 let mut total_debt = rust_decimal::Decimal::ZERO;
12717
12718 for je in entries.iter().filter(|je| &je.header.company_code == code) {
12719 for line in &je.lines {
12720 let acct = line.gl_account.as_str();
12721 let net = line.debit_amount - line.credit_amount;
12722 if acct.starts_with('4') {
12723 revenue -= net;
12725 } else if acct.starts_with('6') {
12726 expenses += net;
12728 }
12729 if acct.starts_with('1') {
12731 if let Ok(n) = acct.parse::<u32>() {
12733 if (1000..=1499).contains(&n) {
12734 current_assets += net;
12735 }
12736 }
12737 } else if acct.starts_with('2') {
12738 if let Ok(n) = acct.parse::<u32>() {
12739 if (2000..=2499).contains(&n) {
12740 current_liabs -= net; } else if (2500..=2999).contains(&n) {
12743 total_debt -= net;
12745 }
12746 }
12747 }
12748 }
12749 }
12750
12751 let net_income = revenue - expenses;
12752 let working_capital = current_assets - current_liabs;
12753 let operating_cash_flow = net_income;
12756
12757 GoingConcernInput {
12758 entity_code: code.clone(),
12759 net_income,
12760 working_capital,
12761 operating_cash_flow,
12762 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12763 assessment_date,
12764 }
12765 })
12766 .collect();
12767
12768 let assessments = if gc_inputs.is_empty() {
12769 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12770 } else {
12771 gc_gen.generate_for_entities_with_inputs(
12772 &entity_codes,
12773 &gc_inputs,
12774 assessment_date,
12775 &period_label,
12776 )
12777 };
12778 info!(
12779 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12780 assessments.len(),
12781 assessments.iter().filter(|a| matches!(
12782 a.auditor_conclusion,
12783 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12784 )).count(),
12785 assessments.iter().filter(|a| matches!(
12786 a.auditor_conclusion,
12787 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12788 )).count(),
12789 assessments.iter().filter(|a| matches!(
12790 a.auditor_conclusion,
12791 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12792 )).count(),
12793 );
12794 snapshot.going_concern_assessments = assessments;
12795 }
12796
12797 {
12801 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12802 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12803 let entity_codes: Vec<String> = self
12804 .config
12805 .companies
12806 .iter()
12807 .map(|c| c.code.clone())
12808 .collect();
12809 let estimates = est_gen.generate_for_entities(&entity_codes);
12810 info!(
12811 "ISA 540 accounting estimates: {} estimates across {} entities \
12812 ({} with retrospective reviews, {} with auditor point estimates)",
12813 estimates.len(),
12814 entity_codes.len(),
12815 estimates
12816 .iter()
12817 .filter(|e| e.retrospective_review.is_some())
12818 .count(),
12819 estimates
12820 .iter()
12821 .filter(|e| e.auditor_point_estimate.is_some())
12822 .count(),
12823 );
12824 snapshot.accounting_estimates = estimates;
12825 }
12826
12827 {
12831 use datasynth_generators::audit::audit_opinion_generator::{
12832 AuditOpinionGenerator, AuditOpinionInput,
12833 };
12834
12835 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12836
12837 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12839 .engagements
12840 .iter()
12841 .map(|eng| {
12842 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12844 .findings
12845 .iter()
12846 .filter(|f| f.engagement_id == eng.engagement_id)
12847 .cloned()
12848 .collect();
12849
12850 let gc = snapshot
12852 .going_concern_assessments
12853 .iter()
12854 .find(|g| g.entity_code == eng.client_entity_id)
12855 .cloned();
12856
12857 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12859 snapshot.component_reports.clone();
12860
12861 let auditor = self
12862 .master_data
12863 .employees
12864 .first()
12865 .map(|e| e.display_name.clone())
12866 .unwrap_or_else(|| "Global Audit LLP".into());
12867
12868 let partner = self
12869 .master_data
12870 .employees
12871 .get(1)
12872 .map(|e| e.display_name.clone())
12873 .unwrap_or_else(|| eng.engagement_partner_id.clone());
12874
12875 AuditOpinionInput {
12876 entity_code: eng.client_entity_id.clone(),
12877 entity_name: eng.client_name.clone(),
12878 engagement_id: eng.engagement_id,
12879 period_end: eng.period_end_date,
12880 findings: eng_findings,
12881 going_concern: gc,
12882 component_reports: comp_reports,
12883 is_us_listed: {
12885 let fw = &self.config.audit_standards.isa_compliance.framework;
12886 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12887 },
12888 auditor_name: auditor,
12889 engagement_partner: partner,
12890 }
12891 })
12892 .collect();
12893
12894 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12895
12896 for go in &generated_opinions {
12897 snapshot
12898 .key_audit_matters
12899 .extend(go.key_audit_matters.clone());
12900 }
12901 snapshot.audit_opinions = generated_opinions
12902 .into_iter()
12903 .map(|go| go.opinion)
12904 .collect();
12905
12906 info!(
12907 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12908 snapshot.audit_opinions.len(),
12909 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12910 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12911 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12912 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12913 );
12914 }
12915
12916 {
12920 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12921
12922 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12923
12924 for (i, company) in self.config.companies.iter().enumerate() {
12925 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12927 .engagements
12928 .iter()
12929 .filter(|e| e.client_entity_id == company.code)
12930 .map(|e| e.engagement_id)
12931 .collect();
12932
12933 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12934 .findings
12935 .iter()
12936 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12937 .cloned()
12938 .collect();
12939
12940 let emp_count = self.master_data.employees.len();
12942 let ceo_name = if emp_count > 0 {
12943 self.master_data.employees[i % emp_count]
12944 .display_name
12945 .clone()
12946 } else {
12947 format!("CEO of {}", company.name)
12948 };
12949 let cfo_name = if emp_count > 1 {
12950 self.master_data.employees[(i + 1) % emp_count]
12951 .display_name
12952 .clone()
12953 } else {
12954 format!("CFO of {}", company.name)
12955 };
12956
12957 let materiality = snapshot
12959 .engagements
12960 .iter()
12961 .find(|e| e.client_entity_id == company.code)
12962 .map(|e| e.materiality)
12963 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12964
12965 let input = SoxGeneratorInput {
12966 company_code: company.code.clone(),
12967 company_name: company.name.clone(),
12968 fiscal_year,
12969 period_end,
12970 findings: company_findings,
12971 ceo_name,
12972 cfo_name,
12973 materiality_threshold: materiality,
12974 revenue_percent: rust_decimal::Decimal::from(100),
12975 assets_percent: rust_decimal::Decimal::from(100),
12976 significant_accounts: vec![
12977 "Revenue".into(),
12978 "Accounts Receivable".into(),
12979 "Inventory".into(),
12980 "Fixed Assets".into(),
12981 "Accounts Payable".into(),
12982 ],
12983 };
12984
12985 let (certs, assessment) = sox_gen.generate(&input);
12986 snapshot.sox_302_certifications.extend(certs);
12987 snapshot.sox_404_assessments.push(assessment);
12988 }
12989
12990 info!(
12991 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12992 snapshot.sox_302_certifications.len(),
12993 snapshot.sox_404_assessments.len(),
12994 snapshot
12995 .sox_404_assessments
12996 .iter()
12997 .filter(|a| a.icfr_effective)
12998 .count(),
12999 snapshot
13000 .sox_404_assessments
13001 .iter()
13002 .filter(|a| !a.icfr_effective)
13003 .count(),
13004 );
13005 }
13006
13007 {
13011 use datasynth_generators::audit::materiality_generator::{
13012 MaterialityGenerator, MaterialityInput,
13013 };
13014
13015 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13016
13017 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13021
13022 for company in &self.config.companies {
13023 let company_code = company.code.clone();
13024
13025 let company_revenue: rust_decimal::Decimal = entries
13027 .iter()
13028 .filter(|e| e.company_code() == company_code)
13029 .flat_map(|e| e.lines.iter())
13030 .filter(|l| l.account_code.starts_with('4'))
13031 .map(|l| l.credit_amount)
13032 .sum();
13033
13034 let total_assets: rust_decimal::Decimal = entries
13036 .iter()
13037 .filter(|e| e.company_code() == company_code)
13038 .flat_map(|e| e.lines.iter())
13039 .filter(|l| l.account_code.starts_with('1'))
13040 .map(|l| l.debit_amount)
13041 .sum();
13042
13043 let total_expenses: rust_decimal::Decimal = entries
13045 .iter()
13046 .filter(|e| e.company_code() == company_code)
13047 .flat_map(|e| e.lines.iter())
13048 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13049 .map(|l| l.debit_amount)
13050 .sum();
13051
13052 let equity: rust_decimal::Decimal = entries
13054 .iter()
13055 .filter(|e| e.company_code() == company_code)
13056 .flat_map(|e| e.lines.iter())
13057 .filter(|l| l.account_code.starts_with('3'))
13058 .map(|l| l.credit_amount)
13059 .sum();
13060
13061 let pretax_income = company_revenue - total_expenses;
13062
13063 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13065 let w = rust_decimal::Decimal::try_from(company.volume_weight)
13066 .unwrap_or(rust_decimal::Decimal::ONE);
13067 (
13068 total_revenue * w,
13069 total_revenue * w * rust_decimal::Decimal::from(3),
13070 total_revenue * w * rust_decimal::Decimal::new(1, 1),
13071 total_revenue * w * rust_decimal::Decimal::from(2),
13072 )
13073 } else {
13074 (company_revenue, total_assets, pretax_income, equity)
13075 };
13076
13077 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
13080 entity_code: company_code,
13081 period: format!("FY{}", fiscal_year),
13082 revenue: rev,
13083 pretax_income: pti,
13084 total_assets: assets,
13085 equity: eq,
13086 gross_profit,
13087 });
13088 }
13089
13090 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13091
13092 info!(
13093 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13094 {} total assets, {} equity benchmarks)",
13095 snapshot.materiality_calculations.len(),
13096 snapshot
13097 .materiality_calculations
13098 .iter()
13099 .filter(|m| matches!(
13100 m.benchmark,
13101 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13102 ))
13103 .count(),
13104 snapshot
13105 .materiality_calculations
13106 .iter()
13107 .filter(|m| matches!(
13108 m.benchmark,
13109 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13110 ))
13111 .count(),
13112 snapshot
13113 .materiality_calculations
13114 .iter()
13115 .filter(|m| matches!(
13116 m.benchmark,
13117 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13118 ))
13119 .count(),
13120 snapshot
13121 .materiality_calculations
13122 .iter()
13123 .filter(|m| matches!(
13124 m.benchmark,
13125 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13126 ))
13127 .count(),
13128 );
13129 }
13130
13131 {
13135 use datasynth_generators::audit::cra_generator::CraGenerator;
13136
13137 let mut cra_gen = CraGenerator::new(self.seed + 8315);
13138
13139 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13141 .audit_scopes
13142 .iter()
13143 .map(|s| (s.entity_code.clone(), s.id.clone()))
13144 .collect();
13145
13146 for company in &self.config.companies {
13147 let cras = cra_gen.generate_for_entity(&company.code, None);
13148 let scope_id = entity_scope_map.get(&company.code).cloned();
13149 let cras_with_scope: Vec<_> = cras
13150 .into_iter()
13151 .map(|mut cra| {
13152 cra.scope_id = scope_id.clone();
13153 cra
13154 })
13155 .collect();
13156 snapshot.combined_risk_assessments.extend(cras_with_scope);
13157 }
13158
13159 let significant_count = snapshot
13160 .combined_risk_assessments
13161 .iter()
13162 .filter(|c| c.significant_risk)
13163 .count();
13164 let high_cra_count = snapshot
13165 .combined_risk_assessments
13166 .iter()
13167 .filter(|c| {
13168 matches!(
13169 c.combined_risk,
13170 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13171 )
13172 })
13173 .count();
13174
13175 info!(
13176 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13177 snapshot.combined_risk_assessments.len(),
13178 significant_count,
13179 high_cra_count,
13180 );
13181 }
13182
13183 {
13187 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13188
13189 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13190
13191 for company in &self.config.companies {
13193 let entity_code = company.code.clone();
13194
13195 let tolerable_error = snapshot
13197 .materiality_calculations
13198 .iter()
13199 .find(|m| m.entity_code == entity_code)
13200 .map(|m| m.tolerable_error);
13201
13202 let entity_cras: Vec<_> = snapshot
13204 .combined_risk_assessments
13205 .iter()
13206 .filter(|c| c.entity_code == entity_code)
13207 .cloned()
13208 .collect();
13209
13210 if !entity_cras.is_empty() {
13211 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13212 snapshot.sampling_plans.extend(plans);
13213 snapshot.sampled_items.extend(items);
13214 }
13215 }
13216
13217 let misstatement_count = snapshot
13218 .sampled_items
13219 .iter()
13220 .filter(|i| i.misstatement_found)
13221 .count();
13222
13223 info!(
13224 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13225 snapshot.sampling_plans.len(),
13226 snapshot.sampled_items.len(),
13227 misstatement_count,
13228 );
13229 }
13230
13231 {
13235 use datasynth_generators::audit::scots_generator::{
13236 ScotsGenerator, ScotsGeneratorConfig,
13237 };
13238
13239 let ic_enabled = self.config.intercompany.enabled;
13240
13241 let config = ScotsGeneratorConfig {
13242 intercompany_enabled: ic_enabled,
13243 ..ScotsGeneratorConfig::default()
13244 };
13245 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13246
13247 for company in &self.config.companies {
13248 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13249 snapshot
13250 .significant_transaction_classes
13251 .extend(entity_scots);
13252 }
13253
13254 let estimation_count = snapshot
13255 .significant_transaction_classes
13256 .iter()
13257 .filter(|s| {
13258 matches!(
13259 s.transaction_type,
13260 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13261 )
13262 })
13263 .count();
13264
13265 info!(
13266 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13267 snapshot.significant_transaction_classes.len(),
13268 estimation_count,
13269 );
13270 }
13271
13272 {
13276 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13277
13278 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13279 let entity_codes: Vec<String> = self
13280 .config
13281 .companies
13282 .iter()
13283 .map(|c| c.code.clone())
13284 .collect();
13285 let unusual_flags =
13286 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13287 info!(
13288 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13289 unusual_flags.len(),
13290 unusual_flags
13291 .iter()
13292 .filter(|f| matches!(
13293 f.severity,
13294 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13295 ))
13296 .count(),
13297 unusual_flags
13298 .iter()
13299 .filter(|f| matches!(
13300 f.severity,
13301 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13302 ))
13303 .count(),
13304 unusual_flags
13305 .iter()
13306 .filter(|f| matches!(
13307 f.severity,
13308 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13309 ))
13310 .count(),
13311 );
13312 snapshot.unusual_items = unusual_flags;
13313 }
13314
13315 {
13319 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13320
13321 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13322 let entity_codes: Vec<String> = self
13323 .config
13324 .companies
13325 .iter()
13326 .map(|c| c.code.clone())
13327 .collect();
13328 let current_period_label = format!("FY{fiscal_year}");
13329 let prior_period_label = format!("FY{}", fiscal_year - 1);
13330 let analytical_rels = ar_gen.generate_for_entities(
13331 &entity_codes,
13332 entries,
13333 ¤t_period_label,
13334 &prior_period_label,
13335 );
13336 let out_of_range = analytical_rels
13337 .iter()
13338 .filter(|r| !r.within_expected_range)
13339 .count();
13340 info!(
13341 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13342 analytical_rels.len(),
13343 out_of_range,
13344 );
13345 snapshot.analytical_relationships = analytical_rels;
13346 }
13347
13348 if let Some(pb) = pb {
13349 pb.finish_with_message(format!(
13350 "Audit data: {} engagements, {} workpapers, {} evidence, \
13351 {} confirmations, {} procedure steps, {} samples, \
13352 {} analytical, {} IA funcs, {} related parties, \
13353 {} component auditors, {} letters, {} subsequent events, \
13354 {} service orgs, {} going concern, {} accounting estimates, \
13355 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13356 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13357 {} unusual items, {} analytical relationships",
13358 snapshot.engagements.len(),
13359 snapshot.workpapers.len(),
13360 snapshot.evidence.len(),
13361 snapshot.confirmations.len(),
13362 snapshot.procedure_steps.len(),
13363 snapshot.samples.len(),
13364 snapshot.analytical_results.len(),
13365 snapshot.ia_functions.len(),
13366 snapshot.related_parties.len(),
13367 snapshot.component_auditors.len(),
13368 snapshot.engagement_letters.len(),
13369 snapshot.subsequent_events.len(),
13370 snapshot.service_organizations.len(),
13371 snapshot.going_concern_assessments.len(),
13372 snapshot.accounting_estimates.len(),
13373 snapshot.audit_opinions.len(),
13374 snapshot.key_audit_matters.len(),
13375 snapshot.sox_302_certifications.len(),
13376 snapshot.sox_404_assessments.len(),
13377 snapshot.materiality_calculations.len(),
13378 snapshot.combined_risk_assessments.len(),
13379 snapshot.sampling_plans.len(),
13380 snapshot.significant_transaction_classes.len(),
13381 snapshot.unusual_items.len(),
13382 snapshot.analytical_relationships.len(),
13383 ));
13384 }
13385
13386 {
13393 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13394 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13395 debug!(
13396 "PCAOB-ISA mappings generated: {} mappings",
13397 snapshot.isa_pcaob_mappings.len()
13398 );
13399 }
13400
13401 {
13408 use datasynth_standards::audit::isa_reference::IsaStandard;
13409 snapshot.isa_mappings = IsaStandard::standard_entries();
13410 debug!(
13411 "ISA standard entries generated: {} standards",
13412 snapshot.isa_mappings.len()
13413 );
13414 }
13415
13416 {
13419 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13420 .engagements
13421 .iter()
13422 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13423 .collect();
13424
13425 for rpt in &mut snapshot.related_party_transactions {
13426 if rpt.journal_entry_id.is_some() {
13427 continue; }
13429 let entity = engagement_by_id
13430 .get(&rpt.engagement_id.to_string())
13431 .copied()
13432 .unwrap_or("");
13433
13434 let best_je = entries
13436 .iter()
13437 .filter(|je| je.header.company_code == entity)
13438 .min_by_key(|je| {
13439 (je.header.posting_date - rpt.transaction_date)
13440 .num_days()
13441 .abs()
13442 });
13443
13444 if let Some(je) = best_je {
13445 rpt.journal_entry_id = Some(je.header.document_id.to_string());
13446 }
13447 }
13448
13449 let linked = snapshot
13450 .related_party_transactions
13451 .iter()
13452 .filter(|t| t.journal_entry_id.is_some())
13453 .count();
13454 debug!(
13455 "Linked {}/{} related party transactions to journal entries",
13456 linked,
13457 snapshot.related_party_transactions.len()
13458 );
13459 }
13460
13461 if !snapshot.engagements.is_empty() {
13467 use datasynth_generators::audit_opinion_generator::{
13468 AuditOpinionGenerator, AuditOpinionInput,
13469 };
13470
13471 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13472 let inputs: Vec<AuditOpinionInput> = snapshot
13473 .engagements
13474 .iter()
13475 .map(|eng| {
13476 let findings = snapshot
13477 .findings
13478 .iter()
13479 .filter(|f| f.engagement_id == eng.engagement_id)
13480 .cloned()
13481 .collect();
13482 let going_concern = snapshot
13483 .going_concern_assessments
13484 .iter()
13485 .find(|gc| gc.entity_code == eng.client_entity_id)
13486 .cloned();
13487 let component_reports = snapshot
13490 .component_reports
13491 .iter()
13492 .filter(|r| r.entity_code == eng.client_entity_id)
13493 .cloned()
13494 .collect();
13495
13496 AuditOpinionInput {
13497 entity_code: eng.client_entity_id.clone(),
13498 entity_name: eng.client_name.clone(),
13499 engagement_id: eng.engagement_id,
13500 period_end: eng.period_end_date,
13501 findings,
13502 going_concern,
13503 component_reports,
13504 is_us_listed: matches!(
13505 eng.engagement_type,
13506 datasynth_core::audit::EngagementType::IntegratedAudit
13507 | datasynth_core::audit::EngagementType::Sox404
13508 ),
13509 auditor_name: "DataSynth Audit LLP".to_string(),
13510 engagement_partner: "Engagement Partner".to_string(),
13511 }
13512 })
13513 .collect();
13514
13515 let generated = opinion_gen.generate_batch(&inputs);
13516 for g in generated {
13517 snapshot.key_audit_matters.extend(g.key_audit_matters);
13518 snapshot.audit_opinions.push(g.opinion);
13519 }
13520 debug!(
13521 "Generated {} audit opinions with {} key audit matters",
13522 snapshot.audit_opinions.len(),
13523 snapshot.key_audit_matters.len()
13524 );
13525 }
13526
13527 Ok(snapshot)
13528 }
13529
13530 fn generate_audit_data_with_fsm(
13537 &mut self,
13538 entries: &[JournalEntry],
13539 ) -> SynthResult<AuditSnapshot> {
13540 use datasynth_audit_fsm::{
13541 context::EngagementContext,
13542 engine::AuditFsmEngine,
13543 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13544 };
13545 use rand::SeedableRng;
13546 use rand_chacha::ChaCha8Rng;
13547
13548 info!("Audit FSM: generating audit data via FSM engine");
13549
13550 let fsm_config = self
13551 .config
13552 .audit
13553 .fsm
13554 .as_ref()
13555 .expect("FSM config must be present when FSM is enabled");
13556
13557 let bwp = match fsm_config.blueprint.as_str() {
13559 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13560 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13561 _ => {
13562 warn!(
13563 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13564 fsm_config.blueprint
13565 );
13566 BlueprintWithPreconditions::load_builtin_fsa()
13567 }
13568 }
13569 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13570
13571 let overlay = match fsm_config.overlay.as_str() {
13573 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13574 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13575 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13576 _ => {
13577 warn!(
13578 "Unknown FSM overlay '{}', falling back to builtin:default",
13579 fsm_config.overlay
13580 );
13581 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13582 }
13583 }
13584 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13585
13586 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13588 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13589 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13590
13591 let company = self.config.companies.first();
13593 let company_code = company
13594 .map(|c| c.code.clone())
13595 .unwrap_or_else(|| "UNKNOWN".to_string());
13596 let company_name = company
13597 .map(|c| c.name.clone())
13598 .unwrap_or_else(|| "Unknown Company".to_string());
13599 let currency = company
13600 .map(|c| c.currency.clone())
13601 .unwrap_or_else(|| "USD".to_string());
13602
13603 let entity_entries: Vec<_> = entries
13605 .iter()
13606 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13607 .cloned()
13608 .collect();
13609 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
13613 .iter()
13614 .flat_map(|e| e.lines.iter())
13615 .filter(|l| l.account_code.starts_with('4'))
13616 .map(|l| l.credit_amount - l.debit_amount)
13617 .sum();
13618
13619 let total_assets: rust_decimal::Decimal = entries
13620 .iter()
13621 .flat_map(|e| e.lines.iter())
13622 .filter(|l| l.account_code.starts_with('1'))
13623 .map(|l| l.debit_amount - l.credit_amount)
13624 .sum();
13625
13626 let total_expenses: rust_decimal::Decimal = entries
13627 .iter()
13628 .flat_map(|e| e.lines.iter())
13629 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13630 .map(|l| l.debit_amount)
13631 .sum();
13632
13633 let equity: rust_decimal::Decimal = entries
13634 .iter()
13635 .flat_map(|e| e.lines.iter())
13636 .filter(|l| l.account_code.starts_with('3'))
13637 .map(|l| l.credit_amount - l.debit_amount)
13638 .sum();
13639
13640 let total_debt: rust_decimal::Decimal = entries
13641 .iter()
13642 .flat_map(|e| e.lines.iter())
13643 .filter(|l| l.account_code.starts_with('2'))
13644 .map(|l| l.credit_amount - l.debit_amount)
13645 .sum();
13646
13647 let pretax_income = total_revenue - total_expenses;
13648
13649 let cogs: rust_decimal::Decimal = entries
13650 .iter()
13651 .flat_map(|e| e.lines.iter())
13652 .filter(|l| l.account_code.starts_with('5'))
13653 .map(|l| l.debit_amount)
13654 .sum();
13655 let gross_profit = total_revenue - cogs;
13656
13657 let current_assets: rust_decimal::Decimal = entries
13658 .iter()
13659 .flat_map(|e| e.lines.iter())
13660 .filter(|l| {
13661 l.account_code.starts_with("10")
13662 || l.account_code.starts_with("11")
13663 || l.account_code.starts_with("12")
13664 || l.account_code.starts_with("13")
13665 })
13666 .map(|l| l.debit_amount - l.credit_amount)
13667 .sum();
13668 let current_liabilities: rust_decimal::Decimal = entries
13669 .iter()
13670 .flat_map(|e| e.lines.iter())
13671 .filter(|l| {
13672 l.account_code.starts_with("20")
13673 || l.account_code.starts_with("21")
13674 || l.account_code.starts_with("22")
13675 })
13676 .map(|l| l.credit_amount - l.debit_amount)
13677 .sum();
13678 let working_capital = current_assets - current_liabilities;
13679
13680 let depreciation: rust_decimal::Decimal = entries
13681 .iter()
13682 .flat_map(|e| e.lines.iter())
13683 .filter(|l| l.account_code.starts_with("60"))
13684 .map(|l| l.debit_amount)
13685 .sum();
13686 let operating_cash_flow = pretax_income + depreciation;
13687
13688 let accounts: Vec<String> = self
13690 .coa
13691 .as_ref()
13692 .map(|coa| {
13693 coa.get_postable_accounts()
13694 .iter()
13695 .map(|acc| acc.account_code().to_string())
13696 .collect()
13697 })
13698 .unwrap_or_default();
13699
13700 let team_member_ids: Vec<String> = self
13702 .master_data
13703 .employees
13704 .iter()
13705 .take(8) .map(|e| e.employee_id.clone())
13707 .collect();
13708 let team_member_pairs: Vec<(String, String)> = self
13709 .master_data
13710 .employees
13711 .iter()
13712 .take(8)
13713 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13714 .collect();
13715
13716 let vendor_names: Vec<String> = self
13717 .master_data
13718 .vendors
13719 .iter()
13720 .map(|v| v.name.clone())
13721 .collect();
13722 let customer_names: Vec<String> = self
13723 .master_data
13724 .customers
13725 .iter()
13726 .map(|c| c.name.clone())
13727 .collect();
13728
13729 let entity_codes: Vec<String> = self
13730 .config
13731 .companies
13732 .iter()
13733 .map(|c| c.code.clone())
13734 .collect();
13735
13736 let journal_entry_ids: Vec<String> = entries
13738 .iter()
13739 .take(50)
13740 .map(|e| e.header.document_id.to_string())
13741 .collect();
13742
13743 let mut account_balances = std::collections::HashMap::<String, f64>::new();
13745 for entry in entries {
13746 for line in &entry.lines {
13747 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13748 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13749 *account_balances
13750 .entry(line.account_code.clone())
13751 .or_insert(0.0) += debit_f64 - credit_f64;
13752 }
13753 }
13754
13755 let control_ids: Vec<String> = Vec::new();
13760 let anomaly_refs: Vec<String> = Vec::new();
13761
13762 let mut context = EngagementContext {
13763 company_code,
13764 company_name,
13765 fiscal_year: start_date.year(),
13766 currency,
13767 total_revenue,
13768 total_assets,
13769 engagement_start: start_date,
13770 report_date: period_end,
13771 pretax_income,
13772 equity,
13773 gross_profit,
13774 working_capital,
13775 operating_cash_flow,
13776 total_debt,
13777 team_member_ids,
13778 team_member_pairs,
13779 accounts,
13780 vendor_names,
13781 customer_names,
13782 journal_entry_ids,
13783 account_balances,
13784 control_ids,
13785 anomaly_refs,
13786 journal_entries: entries.to_vec(),
13787 is_us_listed: false,
13788 entity_codes,
13789 auditor_firm_name: "DataSynth Audit LLP".into(),
13790 accounting_framework: self
13791 .config
13792 .accounting_standards
13793 .framework
13794 .map(|f| match f {
13795 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13796 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13797 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13798 "French GAAP"
13799 }
13800 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13801 "German GAAP"
13802 }
13803 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13804 "Dual Reporting"
13805 }
13806 })
13807 .unwrap_or("IFRS")
13808 .into(),
13809 };
13810
13811 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13813 let rng = ChaCha8Rng::seed_from_u64(seed);
13814 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13815
13816 let mut result = engine
13817 .run_engagement(&context)
13818 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13819
13820 info!(
13821 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13822 {} phases completed, duration {:.1}h",
13823 result.event_log.len(),
13824 result.artifacts.total_artifacts(),
13825 result.anomalies.len(),
13826 result.phases_completed.len(),
13827 result.total_duration_hours,
13828 );
13829
13830 let tb_entity = context.company_code.clone();
13832 let tb_fy = context.fiscal_year;
13833 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13834 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13835 entries,
13836 &tb_entity,
13837 tb_fy,
13838 self.coa.as_ref().map(|c| c.as_ref()),
13839 );
13840
13841 let bag = result.artifacts;
13843 let mut snapshot = AuditSnapshot {
13844 engagements: bag.engagements,
13845 engagement_letters: bag.engagement_letters,
13846 materiality_calculations: bag.materiality_calculations,
13847 risk_assessments: bag.risk_assessments,
13848 combined_risk_assessments: bag.combined_risk_assessments,
13849 workpapers: bag.workpapers,
13850 evidence: bag.evidence,
13851 findings: bag.findings,
13852 judgments: bag.judgments,
13853 sampling_plans: bag.sampling_plans,
13854 sampled_items: bag.sampled_items,
13855 analytical_results: bag.analytical_results,
13856 going_concern_assessments: bag.going_concern_assessments,
13857 subsequent_events: bag.subsequent_events,
13858 audit_opinions: bag.audit_opinions,
13859 key_audit_matters: bag.key_audit_matters,
13860 procedure_steps: bag.procedure_steps,
13861 samples: bag.samples,
13862 confirmations: bag.confirmations,
13863 confirmation_responses: bag.confirmation_responses,
13864 fsm_event_trail: Some(result.event_log),
13866 ..Default::default()
13868 };
13869
13870 {
13872 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13873 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13874 }
13875 {
13876 use datasynth_standards::audit::isa_reference::IsaStandard;
13877 snapshot.isa_mappings = IsaStandard::standard_entries();
13878 }
13879
13880 info!(
13881 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13882 {} risk assessments, {} findings, {} materiality calcs",
13883 snapshot.engagements.len(),
13884 snapshot.workpapers.len(),
13885 snapshot.evidence.len(),
13886 snapshot.risk_assessments.len(),
13887 snapshot.findings.len(),
13888 snapshot.materiality_calculations.len(),
13889 );
13890
13891 Ok(snapshot)
13892 }
13893
13894 fn export_graphs(
13901 &mut self,
13902 entries: &[JournalEntry],
13903 _coa: &Arc<ChartOfAccounts>,
13904 stats: &mut EnhancedGenerationStatistics,
13905 ) -> SynthResult<GraphExportSnapshot> {
13906 let pb = self.create_progress_bar(100, "Exporting Graphs");
13907
13908 let mut snapshot = GraphExportSnapshot::default();
13909
13910 let output_dir = self
13912 .output_path
13913 .clone()
13914 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13915 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13916
13917 for graph_type in &self.config.graph_export.graph_types {
13919 if let Some(pb) = &pb {
13920 pb.inc(10);
13921 }
13922
13923 let graph_config = TransactionGraphConfig {
13925 include_vendors: false,
13926 include_customers: false,
13927 create_debit_credit_edges: true,
13928 include_document_nodes: graph_type.include_document_nodes,
13929 min_edge_weight: graph_type.min_edge_weight,
13930 aggregate_parallel_edges: graph_type.aggregate_edges,
13931 framework: None,
13932 };
13933
13934 let mut builder = TransactionGraphBuilder::new(graph_config);
13935 builder.add_journal_entries(entries);
13936 let graph = builder.build();
13937
13938 stats.graph_node_count += graph.node_count();
13940 stats.graph_edge_count += graph.edge_count();
13941
13942 if let Some(pb) = &pb {
13943 pb.inc(40);
13944 }
13945
13946 for format in &self.config.graph_export.formats {
13948 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13949
13950 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13952 warn!("Failed to create graph output directory: {}", e);
13953 continue;
13954 }
13955
13956 match format {
13957 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13958 let pyg_config = PyGExportConfig {
13959 common: datasynth_graph::CommonExportConfig {
13960 export_node_features: true,
13961 export_edge_features: true,
13962 export_node_labels: true,
13963 export_edge_labels: true,
13964 export_masks: true,
13965 train_ratio: self.config.graph_export.train_ratio,
13966 val_ratio: self.config.graph_export.validation_ratio,
13967 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13968 },
13969 one_hot_categoricals: false,
13970 };
13971
13972 let exporter = PyGExporter::new(pyg_config);
13973 match exporter.export(&graph, &format_dir) {
13974 Ok(metadata) => {
13975 snapshot.exports.insert(
13976 format!("{}_{}", graph_type.name, "pytorch_geometric"),
13977 GraphExportInfo {
13978 name: graph_type.name.clone(),
13979 format: "pytorch_geometric".to_string(),
13980 output_path: format_dir.clone(),
13981 node_count: metadata.num_nodes,
13982 edge_count: metadata.num_edges,
13983 },
13984 );
13985 snapshot.graph_count += 1;
13986 }
13987 Err(e) => {
13988 warn!("Failed to export PyTorch Geometric graph: {}", e);
13989 }
13990 }
13991 }
13992 datasynth_config::schema::GraphExportFormat::Neo4j => {
13993 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13994
13995 let neo4j_config = Neo4jExportConfig {
13996 export_node_properties: true,
13997 export_edge_properties: true,
13998 export_features: true,
13999 generate_cypher: true,
14000 generate_admin_import: true,
14001 database_name: "synth".to_string(),
14002 cypher_batch_size: 1000,
14003 };
14004
14005 let exporter = Neo4jExporter::new(neo4j_config);
14006 match exporter.export(&graph, &format_dir) {
14007 Ok(metadata) => {
14008 snapshot.exports.insert(
14009 format!("{}_{}", graph_type.name, "neo4j"),
14010 GraphExportInfo {
14011 name: graph_type.name.clone(),
14012 format: "neo4j".to_string(),
14013 output_path: format_dir.clone(),
14014 node_count: metadata.num_nodes,
14015 edge_count: metadata.num_edges,
14016 },
14017 );
14018 snapshot.graph_count += 1;
14019 }
14020 Err(e) => {
14021 warn!("Failed to export Neo4j graph: {}", e);
14022 }
14023 }
14024 }
14025 datasynth_config::schema::GraphExportFormat::Dgl => {
14026 use datasynth_graph::{DGLExportConfig, DGLExporter};
14027
14028 let dgl_config = DGLExportConfig {
14029 common: datasynth_graph::CommonExportConfig {
14030 export_node_features: true,
14031 export_edge_features: true,
14032 export_node_labels: true,
14033 export_edge_labels: true,
14034 export_masks: true,
14035 train_ratio: self.config.graph_export.train_ratio,
14036 val_ratio: self.config.graph_export.validation_ratio,
14037 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14038 },
14039 heterogeneous: self.config.graph_export.dgl.heterogeneous,
14040 include_pickle_script: true, };
14042
14043 let exporter = DGLExporter::new(dgl_config);
14044 match exporter.export(&graph, &format_dir) {
14045 Ok(metadata) => {
14046 snapshot.exports.insert(
14047 format!("{}_{}", graph_type.name, "dgl"),
14048 GraphExportInfo {
14049 name: graph_type.name.clone(),
14050 format: "dgl".to_string(),
14051 output_path: format_dir.clone(),
14052 node_count: metadata.common.num_nodes,
14053 edge_count: metadata.common.num_edges,
14054 },
14055 );
14056 snapshot.graph_count += 1;
14057 }
14058 Err(e) => {
14059 warn!("Failed to export DGL graph: {}", e);
14060 }
14061 }
14062 }
14063 datasynth_config::schema::GraphExportFormat::RustGraph => {
14064 use datasynth_graph::{
14065 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14066 };
14067
14068 let rustgraph_config = RustGraphExportConfig {
14069 include_features: true,
14070 include_temporal: true,
14071 include_labels: true,
14072 source_name: "datasynth".to_string(),
14073 batch_id: None,
14074 output_format: RustGraphOutputFormat::JsonLines,
14075 export_node_properties: true,
14076 export_edge_properties: true,
14077 pretty_print: false,
14078 };
14079
14080 let exporter = RustGraphExporter::new(rustgraph_config);
14081 match exporter.export(&graph, &format_dir) {
14082 Ok(metadata) => {
14083 snapshot.exports.insert(
14084 format!("{}_{}", graph_type.name, "rustgraph"),
14085 GraphExportInfo {
14086 name: graph_type.name.clone(),
14087 format: "rustgraph".to_string(),
14088 output_path: format_dir.clone(),
14089 node_count: metadata.num_nodes,
14090 edge_count: metadata.num_edges,
14091 },
14092 );
14093 snapshot.graph_count += 1;
14094 }
14095 Err(e) => {
14096 warn!("Failed to export RustGraph: {}", e);
14097 }
14098 }
14099 }
14100 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14101 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14103 }
14104 }
14105 }
14106
14107 if let Some(pb) = &pb {
14108 pb.inc(40);
14109 }
14110 }
14111
14112 stats.graph_export_count = snapshot.graph_count;
14113 snapshot.exported = snapshot.graph_count > 0;
14114
14115 if let Some(pb) = pb {
14116 pb.finish_with_message(format!(
14117 "Graphs exported: {} graphs ({} nodes, {} edges)",
14118 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14119 ));
14120 }
14121
14122 Ok(snapshot)
14123 }
14124
14125 fn build_additional_graphs(
14130 &self,
14131 banking: &BankingSnapshot,
14132 intercompany: &IntercompanySnapshot,
14133 entries: &[JournalEntry],
14134 stats: &mut EnhancedGenerationStatistics,
14135 ) {
14136 let output_dir = self
14137 .output_path
14138 .clone()
14139 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14140 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14141
14142 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14144 info!("Phase 10c: Building banking network graph");
14145 let config = BankingGraphConfig::default();
14146 let mut builder = BankingGraphBuilder::new(config);
14147 builder.add_customers(&banking.customers);
14148 builder.add_accounts(&banking.accounts, &banking.customers);
14149 builder.add_transactions(&banking.transactions);
14150 let graph = builder.build();
14151
14152 let node_count = graph.node_count();
14153 let edge_count = graph.edge_count();
14154 stats.graph_node_count += node_count;
14155 stats.graph_edge_count += edge_count;
14156
14157 for format in &self.config.graph_export.formats {
14159 if matches!(
14160 format,
14161 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14162 ) {
14163 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14164 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14165 warn!("Failed to create banking graph output dir: {}", e);
14166 continue;
14167 }
14168 let pyg_config = PyGExportConfig::default();
14169 let exporter = PyGExporter::new(pyg_config);
14170 if let Err(e) = exporter.export(&graph, &format_dir) {
14171 warn!("Failed to export banking graph as PyG: {}", e);
14172 } else {
14173 info!(
14174 "Banking network graph exported: {} nodes, {} edges",
14175 node_count, edge_count
14176 );
14177 }
14178 }
14179 }
14180 }
14181
14182 let approval_entries: Vec<_> = entries
14184 .iter()
14185 .filter(|je| je.header.approval_workflow.is_some())
14186 .collect();
14187
14188 if !approval_entries.is_empty() {
14189 info!(
14190 "Phase 10c: Building approval network graph ({} entries with approvals)",
14191 approval_entries.len()
14192 );
14193 let config = ApprovalGraphConfig::default();
14194 let mut builder = ApprovalGraphBuilder::new(config);
14195
14196 for je in &approval_entries {
14197 if let Some(ref wf) = je.header.approval_workflow {
14198 for action in &wf.actions {
14199 let record = datasynth_core::models::ApprovalRecord {
14200 approval_id: format!(
14201 "APR-{}-{}",
14202 je.header.document_id, action.approval_level
14203 ),
14204 document_number: je.header.document_id.to_string(),
14205 document_type: "JE".to_string(),
14206 company_code: je.company_code().to_string(),
14207 requester_id: wf.preparer_id.clone(),
14208 requester_name: Some(wf.preparer_name.clone()),
14209 approver_id: action.actor_id.clone(),
14210 approver_name: action.actor_name.clone(),
14211 approval_date: je.posting_date(),
14212 action: format!("{:?}", action.action),
14213 amount: wf.amount,
14214 approval_limit: None,
14215 comments: action.comments.clone(),
14216 delegation_from: None,
14217 is_auto_approved: false,
14218 };
14219 builder.add_approval(&record);
14220 }
14221 }
14222 }
14223
14224 let graph = builder.build();
14225 let node_count = graph.node_count();
14226 let edge_count = graph.edge_count();
14227 stats.graph_node_count += node_count;
14228 stats.graph_edge_count += edge_count;
14229
14230 for format in &self.config.graph_export.formats {
14232 if matches!(
14233 format,
14234 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14235 ) {
14236 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14237 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14238 warn!("Failed to create approval graph output dir: {}", e);
14239 continue;
14240 }
14241 let pyg_config = PyGExportConfig::default();
14242 let exporter = PyGExporter::new(pyg_config);
14243 if let Err(e) = exporter.export(&graph, &format_dir) {
14244 warn!("Failed to export approval graph as PyG: {}", e);
14245 } else {
14246 info!(
14247 "Approval network graph exported: {} nodes, {} edges",
14248 node_count, edge_count
14249 );
14250 }
14251 }
14252 }
14253 }
14254
14255 if self.config.companies.len() >= 2 {
14257 info!(
14258 "Phase 10c: Building entity relationship graph ({} companies)",
14259 self.config.companies.len()
14260 );
14261
14262 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14263 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14264
14265 let parent_code = &self.config.companies[0].code;
14267 let mut companies: Vec<datasynth_core::models::Company> =
14268 Vec::with_capacity(self.config.companies.len());
14269
14270 let first = &self.config.companies[0];
14272 companies.push(datasynth_core::models::Company::parent(
14273 &first.code,
14274 &first.name,
14275 &first.country,
14276 &first.currency,
14277 ));
14278
14279 for cc in self.config.companies.iter().skip(1) {
14281 companies.push(datasynth_core::models::Company::subsidiary(
14282 &cc.code,
14283 &cc.name,
14284 &cc.country,
14285 &cc.currency,
14286 parent_code,
14287 rust_decimal::Decimal::from(100),
14288 ));
14289 }
14290
14291 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14293 self.config
14294 .companies
14295 .iter()
14296 .skip(1)
14297 .enumerate()
14298 .map(|(i, cc)| {
14299 let mut rel =
14300 datasynth_core::models::intercompany::IntercompanyRelationship::new(
14301 format!("REL{:03}", i + 1),
14302 parent_code.clone(),
14303 cc.code.clone(),
14304 rust_decimal::Decimal::from(100),
14305 start_date,
14306 );
14307 rel.functional_currency = cc.currency.clone();
14308 rel
14309 })
14310 .collect();
14311
14312 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14313 builder.add_companies(&companies);
14314 builder.add_ownership_relationships(&relationships);
14315
14316 for pair in &intercompany.matched_pairs {
14318 builder.add_intercompany_edge(
14319 &pair.seller_company,
14320 &pair.buyer_company,
14321 pair.amount,
14322 &format!("{:?}", pair.transaction_type),
14323 );
14324 }
14325
14326 let graph = builder.build();
14327 let node_count = graph.node_count();
14328 let edge_count = graph.edge_count();
14329 stats.graph_node_count += node_count;
14330 stats.graph_edge_count += edge_count;
14331
14332 for format in &self.config.graph_export.formats {
14334 if matches!(
14335 format,
14336 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14337 ) {
14338 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14339 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14340 warn!("Failed to create entity graph output dir: {}", e);
14341 continue;
14342 }
14343 let pyg_config = PyGExportConfig::default();
14344 let exporter = PyGExporter::new(pyg_config);
14345 if let Err(e) = exporter.export(&graph, &format_dir) {
14346 warn!("Failed to export entity graph as PyG: {}", e);
14347 } else {
14348 info!(
14349 "Entity relationship graph exported: {} nodes, {} edges",
14350 node_count, edge_count
14351 );
14352 }
14353 }
14354 }
14355 } else {
14356 debug!(
14357 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14358 self.config.companies.len()
14359 );
14360 }
14361 }
14362
14363 #[allow(clippy::too_many_arguments)]
14370 fn export_hypergraph(
14371 &self,
14372 coa: &Arc<ChartOfAccounts>,
14373 entries: &[JournalEntry],
14374 document_flows: &DocumentFlowSnapshot,
14375 sourcing: &SourcingSnapshot,
14376 hr: &HrSnapshot,
14377 manufacturing: &ManufacturingSnapshot,
14378 banking: &BankingSnapshot,
14379 audit: &AuditSnapshot,
14380 financial_reporting: &FinancialReportingSnapshot,
14381 ocpm: &OcpmSnapshot,
14382 compliance: &ComplianceRegulationsSnapshot,
14383 stats: &mut EnhancedGenerationStatistics,
14384 ) -> SynthResult<HypergraphExportInfo> {
14385 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14386 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14387 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14388 use datasynth_graph::models::hypergraph::AggregationStrategy;
14389
14390 let hg_settings = &self.config.graph_export.hypergraph;
14391
14392 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14394 "truncate" => AggregationStrategy::Truncate,
14395 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14396 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14397 "importance_sample" => AggregationStrategy::ImportanceSample,
14398 _ => AggregationStrategy::PoolByCounterparty,
14399 };
14400
14401 let builder_config = HypergraphConfig {
14402 max_nodes: hg_settings.max_nodes,
14403 aggregation_strategy,
14404 include_coso: hg_settings.governance_layer.include_coso,
14405 include_controls: hg_settings.governance_layer.include_controls,
14406 include_sox: hg_settings.governance_layer.include_sox,
14407 include_vendors: hg_settings.governance_layer.include_vendors,
14408 include_customers: hg_settings.governance_layer.include_customers,
14409 include_employees: hg_settings.governance_layer.include_employees,
14410 include_p2p: hg_settings.process_layer.include_p2p,
14411 include_o2c: hg_settings.process_layer.include_o2c,
14412 include_s2c: hg_settings.process_layer.include_s2c,
14413 include_h2r: hg_settings.process_layer.include_h2r,
14414 include_mfg: hg_settings.process_layer.include_mfg,
14415 include_bank: hg_settings.process_layer.include_bank,
14416 include_audit: hg_settings.process_layer.include_audit,
14417 include_r2r: hg_settings.process_layer.include_r2r,
14418 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14419 docs_per_counterparty_threshold: hg_settings
14420 .process_layer
14421 .docs_per_counterparty_threshold,
14422 include_accounts: hg_settings.accounting_layer.include_accounts,
14423 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14424 include_cross_layer_edges: hg_settings.cross_layer.enabled,
14425 include_compliance: self.config.compliance_regulations.enabled,
14426 include_tax: true,
14427 include_treasury: true,
14428 include_esg: true,
14429 include_project: true,
14430 include_intercompany: true,
14431 include_temporal_events: true,
14432 };
14433
14434 let mut builder = HypergraphBuilder::new(builder_config);
14435
14436 builder.add_coso_framework();
14438
14439 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14442 let controls = InternalControl::standard_controls();
14443 builder.add_controls(&controls);
14444 }
14445
14446 builder.add_vendors(&self.master_data.vendors);
14448 builder.add_customers(&self.master_data.customers);
14449 builder.add_employees(&self.master_data.employees);
14450
14451 builder.add_p2p_documents(
14453 &document_flows.purchase_orders,
14454 &document_flows.goods_receipts,
14455 &document_flows.vendor_invoices,
14456 &document_flows.payments,
14457 );
14458 builder.add_o2c_documents(
14459 &document_flows.sales_orders,
14460 &document_flows.deliveries,
14461 &document_flows.customer_invoices,
14462 );
14463 builder.add_s2c_documents(
14464 &sourcing.sourcing_projects,
14465 &sourcing.qualifications,
14466 &sourcing.rfx_events,
14467 &sourcing.bids,
14468 &sourcing.bid_evaluations,
14469 &sourcing.contracts,
14470 );
14471 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14472 builder.add_mfg_documents(
14473 &manufacturing.production_orders,
14474 &manufacturing.quality_inspections,
14475 &manufacturing.cycle_counts,
14476 );
14477 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14478 builder.add_audit_documents(
14479 &audit.engagements,
14480 &audit.workpapers,
14481 &audit.findings,
14482 &audit.evidence,
14483 &audit.risk_assessments,
14484 &audit.judgments,
14485 &audit.materiality_calculations,
14486 &audit.audit_opinions,
14487 &audit.going_concern_assessments,
14488 );
14489 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14490
14491 if let Some(ref event_log) = ocpm.event_log {
14493 builder.add_ocpm_events(event_log);
14494 }
14495
14496 if self.config.compliance_regulations.enabled
14498 && hg_settings.governance_layer.include_controls
14499 {
14500 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14502 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14503 .standard_records
14504 .iter()
14505 .filter_map(|r| {
14506 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14507 registry.get(&sid).cloned()
14508 })
14509 .collect();
14510
14511 builder.add_compliance_regulations(
14512 &standards,
14513 &compliance.findings,
14514 &compliance.filings,
14515 );
14516 }
14517
14518 builder.add_accounts(coa);
14520 builder.add_journal_entries_as_hyperedges(entries);
14521
14522 let hypergraph = builder.build();
14524
14525 let output_dir = self
14527 .output_path
14528 .clone()
14529 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14530 let hg_dir = output_dir
14531 .join(&self.config.graph_export.output_subdirectory)
14532 .join(&hg_settings.output_subdirectory);
14533
14534 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14536 "unified" => {
14537 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14538 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14539 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14540 })?;
14541 (
14542 metadata.num_nodes,
14543 metadata.num_edges,
14544 metadata.num_hyperedges,
14545 )
14546 }
14547 _ => {
14548 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14550 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14551 SynthError::generation(format!("Hypergraph export failed: {e}"))
14552 })?;
14553 (
14554 metadata.num_nodes,
14555 metadata.num_edges,
14556 metadata.num_hyperedges,
14557 )
14558 }
14559 };
14560
14561 #[cfg(feature = "streaming")]
14563 if let Some(ref target_url) = hg_settings.stream_target {
14564 use crate::stream_client::{StreamClient, StreamConfig};
14565 use std::io::Write as _;
14566
14567 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14568 let stream_config = StreamConfig {
14569 target_url: target_url.clone(),
14570 batch_size: hg_settings.stream_batch_size,
14571 api_key,
14572 ..StreamConfig::default()
14573 };
14574
14575 match StreamClient::new(stream_config) {
14576 Ok(mut client) => {
14577 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14578 match exporter.export_to_writer(&hypergraph, &mut client) {
14579 Ok(_) => {
14580 if let Err(e) = client.flush() {
14581 warn!("Failed to flush stream client: {}", e);
14582 } else {
14583 info!("Streamed {} records to {}", client.total_sent(), target_url);
14584 }
14585 }
14586 Err(e) => {
14587 warn!("Streaming export failed: {}", e);
14588 }
14589 }
14590 }
14591 Err(e) => {
14592 warn!("Failed to create stream client: {}", e);
14593 }
14594 }
14595 }
14596
14597 stats.graph_node_count += num_nodes;
14599 stats.graph_edge_count += num_edges;
14600 stats.graph_export_count += 1;
14601
14602 Ok(HypergraphExportInfo {
14603 node_count: num_nodes,
14604 edge_count: num_edges,
14605 hyperedge_count: num_hyperedges,
14606 output_path: hg_dir,
14607 })
14608 }
14609
14610 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14615 let pb = self.create_progress_bar(100, "Generating Banking Data");
14616
14617 let orchestrator = BankingOrchestratorBuilder::new()
14619 .config(self.config.banking.clone())
14620 .seed(self.seed + 9000)
14621 .country_pack(self.primary_pack().clone())
14622 .build();
14623
14624 if let Some(pb) = &pb {
14625 pb.inc(10);
14626 }
14627
14628 let result = orchestrator.generate();
14630
14631 if let Some(pb) = &pb {
14632 pb.inc(90);
14633 pb.finish_with_message(format!(
14634 "Banking: {} customers, {} transactions",
14635 result.customers.len(),
14636 result.transactions.len()
14637 ));
14638 }
14639
14640 let mut banking_customers = result.customers;
14645 let core_customers = &self.master_data.customers;
14646 if !core_customers.is_empty() {
14647 for (i, bc) in banking_customers.iter_mut().enumerate() {
14648 let core = &core_customers[i % core_customers.len()];
14649 bc.name = CustomerName::business(&core.name);
14650 bc.residence_country = core.country.clone();
14651 bc.enterprise_customer_id = Some(core.customer_id.clone());
14652 }
14653 debug!(
14654 "Cross-referenced {} banking customers with {} core customers",
14655 banking_customers.len(),
14656 core_customers.len()
14657 );
14658 }
14659
14660 Ok(BankingSnapshot {
14661 customers: banking_customers,
14662 accounts: result.accounts,
14663 transactions: result.transactions,
14664 transaction_labels: result.transaction_labels,
14665 customer_labels: result.customer_labels,
14666 account_labels: result.account_labels,
14667 relationship_labels: result.relationship_labels,
14668 narratives: result.narratives,
14669 suspicious_count: result.stats.suspicious_count,
14670 scenario_count: result.scenarios.len(),
14671 })
14672 }
14673
14674 fn calculate_total_transactions(&self) -> u64 {
14676 let months = self.config.global.period_months as f64;
14677 self.config
14678 .companies
14679 .iter()
14680 .map(|c| {
14681 let annual = c.annual_transaction_volume.count() as f64;
14682 let weighted = annual * c.volume_weight;
14683 (weighted * months / 12.0) as u64
14684 })
14685 .sum()
14686 }
14687
14688 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14690 if !self.phase_config.show_progress {
14691 return None;
14692 }
14693
14694 let pb = if let Some(mp) = &self.multi_progress {
14695 mp.add(ProgressBar::new(total))
14696 } else {
14697 ProgressBar::new(total)
14698 };
14699
14700 pb.set_style(
14701 ProgressStyle::default_bar()
14702 .template(&format!(
14703 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14704 ))
14705 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14706 .progress_chars("#>-"),
14707 );
14708
14709 Some(pb)
14710 }
14711
14712 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14714 self.coa.clone()
14715 }
14716
14717 pub fn get_master_data(&self) -> &MasterDataSnapshot {
14719 &self.master_data
14720 }
14721
14722 fn phase_compliance_regulations(
14724 &mut self,
14725 _stats: &mut EnhancedGenerationStatistics,
14726 ) -> SynthResult<ComplianceRegulationsSnapshot> {
14727 if !self.phase_config.generate_compliance_regulations {
14728 return Ok(ComplianceRegulationsSnapshot::default());
14729 }
14730
14731 info!("Phase: Generating Compliance Regulations Data");
14732
14733 let cr_config = &self.config.compliance_regulations;
14734
14735 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14737 self.config
14738 .companies
14739 .iter()
14740 .map(|c| c.country.clone())
14741 .collect::<std::collections::HashSet<_>>()
14742 .into_iter()
14743 .collect()
14744 } else {
14745 cr_config.jurisdictions.clone()
14746 };
14747
14748 let fallback_date =
14750 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14751 let reference_date = cr_config
14752 .reference_date
14753 .as_ref()
14754 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14755 .unwrap_or_else(|| {
14756 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14757 .unwrap_or(fallback_date)
14758 });
14759
14760 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14762 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14763 let cross_reference_records = reg_gen.generate_cross_reference_records();
14764 let jurisdiction_records =
14765 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14766
14767 info!(
14768 " Standards: {} records, {} cross-references, {} jurisdictions",
14769 standard_records.len(),
14770 cross_reference_records.len(),
14771 jurisdiction_records.len()
14772 );
14773
14774 let audit_procedures = if cr_config.audit_procedures.enabled {
14776 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14777 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14778 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14779 confidence_level: cr_config.audit_procedures.confidence_level,
14780 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14781 };
14782 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14783 self.seed + 9000,
14784 proc_config,
14785 );
14786 let registry = reg_gen.registry();
14787 let mut all_procs = Vec::new();
14788 for jurisdiction in &jurisdictions {
14789 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14790 all_procs.extend(procs);
14791 }
14792 info!(" Audit procedures: {}", all_procs.len());
14793 all_procs
14794 } else {
14795 Vec::new()
14796 };
14797
14798 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14800 let finding_config =
14801 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14802 finding_rate: cr_config.findings.finding_rate,
14803 material_weakness_rate: cr_config.findings.material_weakness_rate,
14804 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14805 generate_remediation: cr_config.findings.generate_remediation,
14806 };
14807 let mut finding_gen =
14808 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14809 self.seed + 9100,
14810 finding_config,
14811 );
14812 let mut all_findings = Vec::new();
14813 for company in &self.config.companies {
14814 let company_findings =
14815 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14816 all_findings.extend(company_findings);
14817 }
14818 info!(" Compliance findings: {}", all_findings.len());
14819 all_findings
14820 } else {
14821 Vec::new()
14822 };
14823
14824 let filings = if cr_config.filings.enabled {
14826 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14827 filing_types: cr_config.filings.filing_types.clone(),
14828 generate_status_progression: cr_config.filings.generate_status_progression,
14829 };
14830 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14831 self.seed + 9200,
14832 filing_config,
14833 );
14834 let company_codes: Vec<String> = self
14835 .config
14836 .companies
14837 .iter()
14838 .map(|c| c.code.clone())
14839 .collect();
14840 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14841 .unwrap_or(fallback_date);
14842 let filings = filing_gen.generate_filings(
14843 &company_codes,
14844 &jurisdictions,
14845 start_date,
14846 self.config.global.period_months,
14847 );
14848 info!(" Regulatory filings: {}", filings.len());
14849 filings
14850 } else {
14851 Vec::new()
14852 };
14853
14854 let compliance_graph = if cr_config.graph.enabled {
14856 let graph_config = datasynth_graph::ComplianceGraphConfig {
14857 include_standard_nodes: cr_config.graph.include_compliance_nodes,
14858 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14859 include_cross_references: cr_config.graph.include_cross_references,
14860 include_supersession_edges: cr_config.graph.include_supersession_edges,
14861 include_account_links: cr_config.graph.include_account_links,
14862 include_control_links: cr_config.graph.include_control_links,
14863 include_company_links: cr_config.graph.include_company_links,
14864 };
14865 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14866
14867 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14869 .iter()
14870 .map(|r| datasynth_graph::StandardNodeInput {
14871 standard_id: r.standard_id.clone(),
14872 title: r.title.clone(),
14873 category: r.category.clone(),
14874 domain: r.domain.clone(),
14875 is_active: r.is_active,
14876 features: vec![if r.is_active { 1.0 } else { 0.0 }],
14877 applicable_account_types: r.applicable_account_types.clone(),
14878 applicable_processes: r.applicable_processes.clone(),
14879 })
14880 .collect();
14881 builder.add_standards(&standard_inputs);
14882
14883 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14885 jurisdiction_records
14886 .iter()
14887 .map(|r| datasynth_graph::JurisdictionNodeInput {
14888 country_code: r.country_code.clone(),
14889 country_name: r.country_name.clone(),
14890 framework: r.accounting_framework.clone(),
14891 standard_count: r.standard_count,
14892 tax_rate: r.statutory_tax_rate,
14893 })
14894 .collect();
14895 builder.add_jurisdictions(&jurisdiction_inputs);
14896
14897 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14899 cross_reference_records
14900 .iter()
14901 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14902 from_standard: r.from_standard.clone(),
14903 to_standard: r.to_standard.clone(),
14904 relationship: r.relationship.clone(),
14905 convergence_level: r.convergence_level,
14906 })
14907 .collect();
14908 builder.add_cross_references(&xref_inputs);
14909
14910 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14912 .iter()
14913 .map(|r| datasynth_graph::JurisdictionMappingInput {
14914 country_code: r.jurisdiction.clone(),
14915 standard_id: r.standard_id.clone(),
14916 })
14917 .collect();
14918 builder.add_jurisdiction_mappings(&mapping_inputs);
14919
14920 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14922 .iter()
14923 .map(|p| datasynth_graph::ProcedureNodeInput {
14924 procedure_id: p.procedure_id.clone(),
14925 standard_id: p.standard_id.clone(),
14926 procedure_type: p.procedure_type.clone(),
14927 sample_size: p.sample_size,
14928 confidence_level: p.confidence_level,
14929 })
14930 .collect();
14931 builder.add_procedures(&proc_inputs);
14932
14933 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14935 .iter()
14936 .map(|f| datasynth_graph::FindingNodeInput {
14937 finding_id: f.finding_id.to_string(),
14938 standard_id: f
14939 .related_standards
14940 .first()
14941 .map(|s| s.as_str().to_string())
14942 .unwrap_or_default(),
14943 severity: f.severity.to_string(),
14944 deficiency_level: f.deficiency_level.to_string(),
14945 severity_score: f.deficiency_level.severity_score(),
14946 control_id: f.control_id.clone(),
14947 affected_accounts: f.affected_accounts.clone(),
14948 })
14949 .collect();
14950 builder.add_findings(&finding_inputs);
14951
14952 if cr_config.graph.include_account_links {
14954 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14955 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14956 for std_record in &standard_records {
14957 if let Some(std_obj) =
14958 registry.get(&datasynth_core::models::compliance::StandardId::parse(
14959 &std_record.standard_id,
14960 ))
14961 {
14962 for acct_type in &std_obj.applicable_account_types {
14963 account_links.push(datasynth_graph::AccountLinkInput {
14964 standard_id: std_record.standard_id.clone(),
14965 account_code: acct_type.clone(),
14966 account_name: acct_type.clone(),
14967 });
14968 }
14969 }
14970 }
14971 builder.add_account_links(&account_links);
14972 }
14973
14974 if cr_config.graph.include_control_links {
14976 let mut control_links = Vec::new();
14977 let sox_like_ids: Vec<String> = standard_records
14979 .iter()
14980 .filter(|r| {
14981 r.standard_id.starts_with("SOX")
14982 || r.standard_id.starts_with("PCAOB-AS-2201")
14983 })
14984 .map(|r| r.standard_id.clone())
14985 .collect();
14986 let control_ids = [
14988 ("C001", "Cash Controls"),
14989 ("C002", "Large Transaction Approval"),
14990 ("C010", "PO Approval"),
14991 ("C011", "Three-Way Match"),
14992 ("C020", "Revenue Recognition"),
14993 ("C021", "Credit Check"),
14994 ("C030", "Manual JE Approval"),
14995 ("C031", "Period Close Review"),
14996 ("C032", "Account Reconciliation"),
14997 ("C040", "Payroll Processing"),
14998 ("C050", "Fixed Asset Capitalization"),
14999 ("C060", "Intercompany Elimination"),
15000 ];
15001 for sox_id in &sox_like_ids {
15002 for (ctrl_id, ctrl_name) in &control_ids {
15003 control_links.push(datasynth_graph::ControlLinkInput {
15004 standard_id: sox_id.clone(),
15005 control_id: ctrl_id.to_string(),
15006 control_name: ctrl_name.to_string(),
15007 });
15008 }
15009 }
15010 builder.add_control_links(&control_links);
15011 }
15012
15013 if cr_config.graph.include_company_links {
15015 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15016 .iter()
15017 .enumerate()
15018 .map(|(i, f)| datasynth_graph::FilingNodeInput {
15019 filing_id: format!("F{:04}", i + 1),
15020 filing_type: f.filing_type.to_string(),
15021 company_code: f.company_code.clone(),
15022 jurisdiction: f.jurisdiction.clone(),
15023 status: format!("{:?}", f.status),
15024 })
15025 .collect();
15026 builder.add_filings(&filing_inputs);
15027 }
15028
15029 let graph = builder.build();
15030 info!(
15031 " Compliance graph: {} nodes, {} edges",
15032 graph.nodes.len(),
15033 graph.edges.len()
15034 );
15035 Some(graph)
15036 } else {
15037 None
15038 };
15039
15040 self.check_resources_with_log("post-compliance-regulations")?;
15041
15042 Ok(ComplianceRegulationsSnapshot {
15043 standard_records,
15044 cross_reference_records,
15045 jurisdiction_records,
15046 audit_procedures,
15047 findings,
15048 filings,
15049 compliance_graph,
15050 })
15051 }
15052
15053 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15055 use super::lineage::LineageGraphBuilder;
15056
15057 let mut builder = LineageGraphBuilder::new();
15058
15059 builder.add_config_section("config:global", "Global Config");
15061 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15062 builder.add_config_section("config:transactions", "Transaction Config");
15063
15064 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15066 builder.add_generator_phase("phase:je", "Journal Entry Generation");
15067
15068 builder.configured_by("phase:coa", "config:chart_of_accounts");
15070 builder.configured_by("phase:je", "config:transactions");
15071
15072 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15074 builder.produced_by("output:je", "phase:je");
15075
15076 if self.phase_config.generate_master_data {
15078 builder.add_config_section("config:master_data", "Master Data Config");
15079 builder.add_generator_phase("phase:master_data", "Master Data Generation");
15080 builder.configured_by("phase:master_data", "config:master_data");
15081 builder.input_to("phase:master_data", "phase:je");
15082 }
15083
15084 if self.phase_config.generate_document_flows {
15085 builder.add_config_section("config:document_flows", "Document Flow Config");
15086 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15087 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15088 builder.configured_by("phase:p2p", "config:document_flows");
15089 builder.configured_by("phase:o2c", "config:document_flows");
15090
15091 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15092 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15093 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15094 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15095 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15096
15097 builder.produced_by("output:po", "phase:p2p");
15098 builder.produced_by("output:gr", "phase:p2p");
15099 builder.produced_by("output:vi", "phase:p2p");
15100 builder.produced_by("output:so", "phase:o2c");
15101 builder.produced_by("output:ci", "phase:o2c");
15102 }
15103
15104 if self.phase_config.inject_anomalies {
15105 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15106 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15107 builder.configured_by("phase:anomaly", "config:fraud");
15108 builder.add_output_file(
15109 "output:labels",
15110 "Anomaly Labels",
15111 "labels/anomaly_labels.csv",
15112 );
15113 builder.produced_by("output:labels", "phase:anomaly");
15114 }
15115
15116 if self.phase_config.generate_audit {
15117 builder.add_config_section("config:audit", "Audit Config");
15118 builder.add_generator_phase("phase:audit", "Audit Data Generation");
15119 builder.configured_by("phase:audit", "config:audit");
15120 }
15121
15122 if self.phase_config.generate_banking {
15123 builder.add_config_section("config:banking", "Banking Config");
15124 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15125 builder.configured_by("phase:banking", "config:banking");
15126 }
15127
15128 if self.config.llm.enabled {
15129 builder.add_config_section("config:llm", "LLM Enrichment Config");
15130 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15131 builder.configured_by("phase:llm_enrichment", "config:llm");
15132 }
15133
15134 if self.config.diffusion.enabled {
15135 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15136 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15137 builder.configured_by("phase:diffusion", "config:diffusion");
15138 }
15139
15140 if self.config.causal.enabled {
15141 builder.add_config_section("config:causal", "Causal Generation Config");
15142 builder.add_generator_phase("phase:causal", "Causal Overlay");
15143 builder.configured_by("phase:causal", "config:causal");
15144 }
15145
15146 builder.build()
15147 }
15148
15149 fn compute_company_revenue(
15158 entries: &[JournalEntry],
15159 company_code: &str,
15160 ) -> rust_decimal::Decimal {
15161 use rust_decimal::Decimal;
15162 let mut revenue = Decimal::ZERO;
15163 for je in entries {
15164 if je.header.company_code != company_code {
15165 continue;
15166 }
15167 for line in &je.lines {
15168 if line.gl_account.starts_with('4') {
15169 revenue += line.credit_amount - line.debit_amount;
15171 }
15172 }
15173 }
15174 revenue.max(Decimal::ZERO)
15175 }
15176
15177 fn compute_entity_net_assets(
15181 entries: &[JournalEntry],
15182 entity_code: &str,
15183 ) -> rust_decimal::Decimal {
15184 use rust_decimal::Decimal;
15185 let mut asset_net = Decimal::ZERO;
15186 let mut liability_net = Decimal::ZERO;
15187 for je in entries {
15188 if je.header.company_code != entity_code {
15189 continue;
15190 }
15191 for line in &je.lines {
15192 if line.gl_account.starts_with('1') {
15193 asset_net += line.debit_amount - line.credit_amount;
15194 } else if line.gl_account.starts_with('2') {
15195 liability_net += line.credit_amount - line.debit_amount;
15196 }
15197 }
15198 }
15199 asset_net - liability_net
15200 }
15201
15202 fn phase_statistical_validation(
15213 &self,
15214 entries: &[JournalEntry],
15215 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15216 use datasynth_config::schema::StatisticalTestConfig;
15217 use datasynth_core::distributions::{
15218 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15219 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15220 };
15221 use rust_decimal::prelude::ToPrimitive;
15222
15223 let cfg = &self.config.distributions.validation;
15224 if !cfg.enabled {
15225 return Ok(None);
15226 }
15227
15228 let amounts: Vec<rust_decimal::Decimal> = entries
15231 .iter()
15232 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15233 .filter(|a| *a > rust_decimal::Decimal::ZERO)
15234 .collect();
15235
15236 let paired_amount_linecount: Vec<(f64, f64)> = entries
15240 .iter()
15241 .filter_map(|je| {
15242 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15243 if amt > rust_decimal::Decimal::ZERO {
15244 amt.to_f64().map(|a| (a, je.lines.len() as f64))
15245 } else {
15246 None
15247 }
15248 })
15249 .collect();
15250
15251 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15252 for test_cfg in &cfg.tests {
15253 match test_cfg {
15254 StatisticalTestConfig::BenfordFirstDigit {
15255 threshold_mad,
15256 warning_mad,
15257 } => {
15258 results.push(run_benford_first_digit(
15259 &amounts,
15260 *threshold_mad,
15261 *warning_mad,
15262 ));
15263 }
15264 StatisticalTestConfig::ChiSquared { bins, significance } => {
15265 results.push(run_chi_squared(&amounts, *bins, *significance));
15266 }
15267 StatisticalTestConfig::DistributionFit {
15268 target: _,
15269 ks_significance,
15270 method: _,
15271 } => {
15272 results.push(run_ks_uniform_log(&amounts, *ks_significance));
15275 }
15276 StatisticalTestConfig::AndersonDarling {
15277 target: _,
15278 significance,
15279 } => {
15280 results.push(run_anderson_darling(&amounts, *significance));
15283 }
15284 StatisticalTestConfig::CorrelationCheck {
15285 expected_correlations,
15286 } => {
15287 if expected_correlations.is_empty() {
15291 results.push(StatisticalTestResult {
15292 name: "correlation_check".to_string(),
15293 outcome: TestOutcome::Skipped,
15294 statistic: 0.0,
15295 threshold: 0.0,
15296 message: "no expected correlations declared".to_string(),
15297 });
15298 } else {
15299 for ec in expected_correlations {
15300 let pair_key = format!("{}_{}", ec.field1, ec.field2);
15301 let is_amount_linecount = (ec.field1 == "amount"
15302 && ec.field2 == "line_count")
15303 || (ec.field1 == "line_count" && ec.field2 == "amount");
15304 if is_amount_linecount {
15305 let xs: Vec<f64> =
15306 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15307 let ys: Vec<f64> =
15308 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15309 results.push(run_correlation_check(
15310 &pair_key,
15311 &xs,
15312 &ys,
15313 ec.expected_r,
15314 ec.tolerance,
15315 ));
15316 } else {
15317 results.push(StatisticalTestResult {
15318 name: format!("correlation_check_{pair_key}"),
15319 outcome: TestOutcome::Skipped,
15320 statistic: 0.0,
15321 threshold: ec.tolerance,
15322 message: format!(
15323 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15324 ec.field1, ec.field2
15325 ),
15326 });
15327 }
15328 }
15329 }
15330 }
15331 }
15332 }
15333
15334 let report = StatisticalValidationReport {
15335 sample_count: amounts.len(),
15336 results,
15337 };
15338
15339 if cfg.reporting.fail_on_error && !report.all_passed() {
15340 let failed = report.failed_names().join(", ");
15341 return Err(SynthError::validation(format!(
15342 "statistical validation failed: {failed}"
15343 )));
15344 }
15345
15346 Ok(Some(report))
15347 }
15348
15349 fn phase_analytics_metadata(
15362 &mut self,
15363 entries: &[JournalEntry],
15364 ) -> SynthResult<AnalyticsMetadataSnapshot> {
15365 use datasynth_generators::drift_event_generator::DriftEventGenerator;
15366 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15367 use datasynth_generators::management_report_generator::ManagementReportGenerator;
15368 use datasynth_generators::prior_year_generator::PriorYearGenerator;
15369 use std::collections::BTreeMap;
15370
15371 let mut snap = AnalyticsMetadataSnapshot::default();
15372
15373 if !self.phase_config.generate_analytics_metadata {
15374 return Ok(snap);
15375 }
15376
15377 let cfg = &self.config.analytics_metadata;
15378 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15379 .map(|d| d.year())
15380 .unwrap_or(2025);
15381
15382 if cfg.prior_year {
15384 let mut gen = PriorYearGenerator::new(self.seed + 9100);
15385 for company in &self.config.companies {
15386 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15389 BTreeMap::new();
15390 for je in entries {
15391 if je.header.company_code != company.code {
15392 continue;
15393 }
15394 for line in &je.lines {
15395 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15396 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15397 });
15398 entry.1 += line.debit_amount - line.credit_amount;
15399 }
15400 }
15401 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15402 .into_iter()
15403 .filter(|(_, (_, bal))| !bal.is_zero())
15404 .map(|(code, (name, bal))| (code, name, bal))
15405 .collect();
15406 if !current.is_empty() {
15407 let comparatives =
15408 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
15409 snap.prior_year_comparatives.extend(comparatives);
15410 }
15411 }
15412 info!(
15413 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15414 snap.prior_year_comparatives.len(),
15415 self.config.companies.len()
15416 );
15417 }
15418
15419 if cfg.industry_benchmark {
15421 use datasynth_core::models::IndustrySector;
15422 let industry = match self.config.global.industry {
15423 IndustrySector::Manufacturing => "manufacturing",
15424 IndustrySector::Retail => "retail",
15425 IndustrySector::FinancialServices => "financial_services",
15426 IndustrySector::Technology => "technology",
15427 IndustrySector::Healthcare => "healthcare",
15428 _ => "other",
15429 };
15430 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15431 let benchmarks = gen.generate(industry, fiscal_year);
15432 info!(
15433 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15434 benchmarks.len()
15435 );
15436 snap.industry_benchmarks = benchmarks;
15437 }
15438
15439 if cfg.management_reports {
15441 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15442 let period_months = self.config.global.period_months;
15443 for company in &self.config.companies {
15444 let reports =
15445 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15446 snap.management_reports.extend(reports);
15447 }
15448 info!(
15449 "v3.3.0 analytics: {} management reports across {} companies",
15450 snap.management_reports.len(),
15451 self.config.companies.len()
15452 );
15453 }
15454
15455 if cfg.drift_events {
15457 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15458 .expect("hardcoded NaiveDate 2025-01-01 is valid");
15459 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15460 .unwrap_or(fallback_start);
15461 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15462 let mut gen = DriftEventGenerator::new(self.seed + 9400);
15463 let drifts = gen.generate_standalone_drifts(start_date, end_date);
15464 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15465 snap.drift_events = drifts;
15466 }
15467 let _ = entries;
15469
15470 Ok(snap)
15471 }
15472}
15473
15474fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15476 match format {
15477 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15478 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15479 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15480 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15481 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15482 }
15483}
15484
15485fn compute_trial_balance_entries(
15490 entries: &[JournalEntry],
15491 entity_code: &str,
15492 fiscal_year: i32,
15493 coa: Option<&ChartOfAccounts>,
15494) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15495 use std::collections::BTreeMap;
15496
15497 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15498 BTreeMap::new();
15499
15500 for je in entries {
15501 for line in &je.lines {
15502 let entry = balances.entry(line.account_code.clone()).or_default();
15503 entry.0 += line.debit_amount;
15504 entry.1 += line.credit_amount;
15505 }
15506 }
15507
15508 balances
15509 .into_iter()
15510 .map(
15511 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15512 account_description: coa
15513 .and_then(|c| c.get_account(&account_code))
15514 .map(|a| a.description().to_string())
15515 .unwrap_or_else(|| account_code.clone()),
15516 account_code,
15517 debit_balance: debit,
15518 credit_balance: credit,
15519 net_balance: debit - credit,
15520 entity_code: entity_code.to_string(),
15521 period: format!("FY{}", fiscal_year),
15522 },
15523 )
15524 .collect()
15525}
15526
15527#[cfg(test)]
15528#[allow(clippy::unwrap_used)]
15529mod tests {
15530 use super::*;
15531 use datasynth_config::schema::*;
15532
15533 fn create_test_config() -> GeneratorConfig {
15534 GeneratorConfig {
15535 global: GlobalConfig {
15536 industry: IndustrySector::Manufacturing,
15537 start_date: "2024-01-01".to_string(),
15538 period_months: 1,
15539 seed: Some(42),
15540 parallel: false,
15541 group_currency: "USD".to_string(),
15542 presentation_currency: None,
15543 worker_threads: 0,
15544 memory_limit_mb: 0,
15545 fiscal_year_months: None,
15546 },
15547 companies: vec![CompanyConfig {
15548 code: "1000".to_string(),
15549 name: "Test Company".to_string(),
15550 currency: "USD".to_string(),
15551 functional_currency: None,
15552 country: "US".to_string(),
15553 annual_transaction_volume: TransactionVolume::TenK,
15554 volume_weight: 1.0,
15555 fiscal_year_variant: "K4".to_string(),
15556 }],
15557 chart_of_accounts: ChartOfAccountsConfig {
15558 complexity: CoAComplexity::Small,
15559 industry_specific: true,
15560 custom_accounts: None,
15561 min_hierarchy_depth: 2,
15562 max_hierarchy_depth: 4,
15563 expand_industry_subaccounts: false,
15564 },
15565 transactions: TransactionConfig::default(),
15566 output: OutputConfig::default(),
15567 fraud: FraudConfig::default(),
15568 internal_controls: InternalControlsConfig::default(),
15569 business_processes: BusinessProcessConfig::default(),
15570 user_personas: UserPersonaConfig::default(),
15571 templates: TemplateConfig::default(),
15572 approval: ApprovalConfig::default(),
15573 departments: DepartmentConfig::default(),
15574 master_data: MasterDataConfig::default(),
15575 document_flows: DocumentFlowConfig::default(),
15576 intercompany: IntercompanyConfig::default(),
15577 balance: BalanceConfig::default(),
15578 ocpm: OcpmConfig::default(),
15579 audit: AuditGenerationConfig::default(),
15580 banking: datasynth_banking::BankingConfig::default(),
15581 data_quality: DataQualitySchemaConfig::default(),
15582 scenario: ScenarioConfig::default(),
15583 temporal: TemporalDriftConfig::default(),
15584 graph_export: GraphExportConfig::default(),
15585 streaming: StreamingSchemaConfig::default(),
15586 rate_limit: RateLimitSchemaConfig::default(),
15587 temporal_attributes: TemporalAttributeSchemaConfig::default(),
15588 relationships: RelationshipSchemaConfig::default(),
15589 accounting_standards: AccountingStandardsConfig::default(),
15590 audit_standards: AuditStandardsConfig::default(),
15591 distributions: Default::default(),
15592 temporal_patterns: Default::default(),
15593 vendor_network: VendorNetworkSchemaConfig::default(),
15594 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15595 relationship_strength: RelationshipStrengthSchemaConfig::default(),
15596 cross_process_links: CrossProcessLinksSchemaConfig::default(),
15597 organizational_events: OrganizationalEventsSchemaConfig::default(),
15598 behavioral_drift: BehavioralDriftSchemaConfig::default(),
15599 market_drift: MarketDriftSchemaConfig::default(),
15600 drift_labeling: DriftLabelingSchemaConfig::default(),
15601 anomaly_injection: Default::default(),
15602 industry_specific: Default::default(),
15603 fingerprint_privacy: Default::default(),
15604 quality_gates: Default::default(),
15605 compliance: Default::default(),
15606 webhooks: Default::default(),
15607 llm: Default::default(),
15608 diffusion: Default::default(),
15609 causal: Default::default(),
15610 source_to_pay: Default::default(),
15611 financial_reporting: Default::default(),
15612 hr: Default::default(),
15613 manufacturing: Default::default(),
15614 sales_quotes: Default::default(),
15615 tax: Default::default(),
15616 treasury: Default::default(),
15617 project_accounting: Default::default(),
15618 esg: Default::default(),
15619 country_packs: None,
15620 scenarios: Default::default(),
15621 session: Default::default(),
15622 compliance_regulations: Default::default(),
15623 analytics_metadata: Default::default(),
15624 }
15625 }
15626
15627 #[test]
15628 fn test_enhanced_orchestrator_creation() {
15629 let config = create_test_config();
15630 let orchestrator = EnhancedOrchestrator::with_defaults(config);
15631 assert!(orchestrator.is_ok());
15632 }
15633
15634 #[test]
15635 fn test_minimal_generation() {
15636 let config = create_test_config();
15637 let phase_config = PhaseConfig {
15638 generate_master_data: false,
15639 generate_document_flows: false,
15640 generate_journal_entries: true,
15641 inject_anomalies: false,
15642 show_progress: false,
15643 ..Default::default()
15644 };
15645
15646 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15647 let result = orchestrator.generate();
15648
15649 assert!(result.is_ok());
15650 let result = result.unwrap();
15651 assert!(!result.journal_entries.is_empty());
15652 }
15653
15654 #[test]
15655 fn test_master_data_generation() {
15656 let config = create_test_config();
15657 let phase_config = PhaseConfig {
15658 generate_master_data: true,
15659 generate_document_flows: false,
15660 generate_journal_entries: false,
15661 inject_anomalies: false,
15662 show_progress: false,
15663 vendors_per_company: 5,
15664 customers_per_company: 5,
15665 materials_per_company: 10,
15666 assets_per_company: 5,
15667 employees_per_company: 10,
15668 ..Default::default()
15669 };
15670
15671 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15672 let result = orchestrator.generate().unwrap();
15673
15674 assert!(!result.master_data.vendors.is_empty());
15675 assert!(!result.master_data.customers.is_empty());
15676 assert!(!result.master_data.materials.is_empty());
15677 }
15678
15679 #[test]
15680 fn test_document_flow_generation() {
15681 let config = create_test_config();
15682 let phase_config = PhaseConfig {
15683 generate_master_data: true,
15684 generate_document_flows: true,
15685 generate_journal_entries: false,
15686 inject_anomalies: false,
15687 inject_data_quality: false,
15688 validate_balances: false,
15689 validate_coa_coverage_strict: false,
15690 generate_ocpm_events: false,
15691 show_progress: false,
15692 vendors_per_company: 5,
15693 customers_per_company: 5,
15694 materials_per_company: 10,
15695 assets_per_company: 5,
15696 employees_per_company: 10,
15697 p2p_chains: 5,
15698 o2c_chains: 5,
15699 ..Default::default()
15700 };
15701
15702 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15703 let result = orchestrator.generate().unwrap();
15704
15705 assert!(!result.document_flows.p2p_chains.is_empty());
15707 assert!(!result.document_flows.o2c_chains.is_empty());
15708
15709 assert!(!result.document_flows.purchase_orders.is_empty());
15711 assert!(!result.document_flows.sales_orders.is_empty());
15712 }
15713
15714 #[test]
15715 fn test_anomaly_injection() {
15716 let config = create_test_config();
15717 let phase_config = PhaseConfig {
15718 generate_master_data: false,
15719 generate_document_flows: false,
15720 generate_journal_entries: true,
15721 inject_anomalies: true,
15722 show_progress: false,
15723 ..Default::default()
15724 };
15725
15726 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15727 let result = orchestrator.generate().unwrap();
15728
15729 assert!(!result.journal_entries.is_empty());
15731
15732 assert!(result.anomaly_labels.summary.is_some());
15735 }
15736
15737 #[test]
15738 fn test_full_generation_pipeline() {
15739 let config = create_test_config();
15740 let phase_config = PhaseConfig {
15741 generate_master_data: true,
15742 generate_document_flows: true,
15743 generate_journal_entries: true,
15744 inject_anomalies: false,
15745 inject_data_quality: false,
15746 validate_balances: true,
15747 validate_coa_coverage_strict: false,
15748 generate_ocpm_events: false,
15749 show_progress: false,
15750 vendors_per_company: 3,
15751 customers_per_company: 3,
15752 materials_per_company: 5,
15753 assets_per_company: 3,
15754 employees_per_company: 5,
15755 p2p_chains: 3,
15756 o2c_chains: 3,
15757 ..Default::default()
15758 };
15759
15760 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15761 let result = orchestrator.generate().unwrap();
15762
15763 assert!(!result.master_data.vendors.is_empty());
15765 assert!(!result.master_data.customers.is_empty());
15766 assert!(!result.document_flows.p2p_chains.is_empty());
15767 assert!(!result.document_flows.o2c_chains.is_empty());
15768 assert!(!result.journal_entries.is_empty());
15769 assert!(result.statistics.accounts_count > 0);
15770
15771 assert!(!result.subledger.ap_invoices.is_empty());
15773 assert!(!result.subledger.ar_invoices.is_empty());
15774
15775 assert!(result.balance_validation.validated);
15777 assert!(result.balance_validation.entries_processed > 0);
15778 }
15779
15780 #[test]
15781 fn test_subledger_linking() {
15782 let config = create_test_config();
15783 let phase_config = PhaseConfig {
15784 generate_master_data: true,
15785 generate_document_flows: true,
15786 generate_journal_entries: false,
15787 inject_anomalies: false,
15788 inject_data_quality: false,
15789 validate_balances: false,
15790 validate_coa_coverage_strict: false,
15791 generate_ocpm_events: false,
15792 show_progress: false,
15793 vendors_per_company: 5,
15794 customers_per_company: 5,
15795 materials_per_company: 10,
15796 assets_per_company: 3,
15797 employees_per_company: 5,
15798 p2p_chains: 5,
15799 o2c_chains: 5,
15800 ..Default::default()
15801 };
15802
15803 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15804 let result = orchestrator.generate().unwrap();
15805
15806 assert!(!result.document_flows.vendor_invoices.is_empty());
15808 assert!(!result.document_flows.customer_invoices.is_empty());
15809
15810 assert!(!result.subledger.ap_invoices.is_empty());
15812 assert!(!result.subledger.ar_invoices.is_empty());
15813
15814 assert_eq!(
15816 result.subledger.ap_invoices.len(),
15817 result.document_flows.vendor_invoices.len()
15818 );
15819
15820 assert_eq!(
15822 result.subledger.ar_invoices.len(),
15823 result.document_flows.customer_invoices.len()
15824 );
15825
15826 assert_eq!(
15828 result.statistics.ap_invoice_count,
15829 result.subledger.ap_invoices.len()
15830 );
15831 assert_eq!(
15832 result.statistics.ar_invoice_count,
15833 result.subledger.ar_invoices.len()
15834 );
15835 }
15836
15837 #[test]
15838 fn test_balance_validation() {
15839 let config = create_test_config();
15840 let phase_config = PhaseConfig {
15841 generate_master_data: false,
15842 generate_document_flows: false,
15843 generate_journal_entries: true,
15844 inject_anomalies: false,
15845 validate_balances: true,
15846 validate_coa_coverage_strict: false,
15847 show_progress: false,
15848 ..Default::default()
15849 };
15850
15851 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15852 let result = orchestrator.generate().unwrap();
15853
15854 assert!(result.balance_validation.validated);
15856 assert!(result.balance_validation.entries_processed > 0);
15857
15858 assert!(!result.balance_validation.has_unbalanced_entries);
15860
15861 assert_eq!(
15863 result.balance_validation.total_debits,
15864 result.balance_validation.total_credits
15865 );
15866 }
15867
15868 #[test]
15869 fn test_statistics_accuracy() {
15870 let config = create_test_config();
15871 let phase_config = PhaseConfig {
15872 generate_master_data: true,
15873 generate_document_flows: false,
15874 generate_journal_entries: true,
15875 inject_anomalies: false,
15876 show_progress: false,
15877 vendors_per_company: 10,
15878 customers_per_company: 20,
15879 materials_per_company: 15,
15880 assets_per_company: 5,
15881 employees_per_company: 8,
15882 ..Default::default()
15883 };
15884
15885 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15886 let result = orchestrator.generate().unwrap();
15887
15888 assert_eq!(
15890 result.statistics.vendor_count,
15891 result.master_data.vendors.len()
15892 );
15893 assert_eq!(
15894 result.statistics.customer_count,
15895 result.master_data.customers.len()
15896 );
15897 assert_eq!(
15898 result.statistics.material_count,
15899 result.master_data.materials.len()
15900 );
15901 assert_eq!(
15902 result.statistics.total_entries as usize,
15903 result.journal_entries.len()
15904 );
15905 }
15906
15907 #[test]
15908 fn test_phase_config_defaults() {
15909 let config = PhaseConfig::default();
15910 assert!(config.generate_master_data);
15911 assert!(config.generate_document_flows);
15912 assert!(config.generate_journal_entries);
15913 assert!(!config.inject_anomalies);
15914 assert!(config.validate_balances);
15915 assert!(config.show_progress);
15916 assert!(config.vendors_per_company > 0);
15917 assert!(config.customers_per_company > 0);
15918 }
15919
15920 #[test]
15921 fn test_get_coa_before_generation() {
15922 let config = create_test_config();
15923 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15924
15925 assert!(orchestrator.get_coa().is_none());
15927 }
15928
15929 #[test]
15930 fn test_get_coa_after_generation() {
15931 let config = create_test_config();
15932 let phase_config = PhaseConfig {
15933 generate_master_data: false,
15934 generate_document_flows: false,
15935 generate_journal_entries: true,
15936 inject_anomalies: false,
15937 show_progress: false,
15938 ..Default::default()
15939 };
15940
15941 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15942 let _ = orchestrator.generate().unwrap();
15943
15944 assert!(orchestrator.get_coa().is_some());
15946 }
15947
15948 #[test]
15949 fn test_get_master_data() {
15950 let config = create_test_config();
15951 let phase_config = PhaseConfig {
15952 generate_master_data: true,
15953 generate_document_flows: false,
15954 generate_journal_entries: false,
15955 inject_anomalies: false,
15956 show_progress: false,
15957 vendors_per_company: 5,
15958 customers_per_company: 5,
15959 materials_per_company: 5,
15960 assets_per_company: 5,
15961 employees_per_company: 5,
15962 ..Default::default()
15963 };
15964
15965 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15966 let result = orchestrator.generate().unwrap();
15967
15968 assert!(!result.master_data.vendors.is_empty());
15970 }
15971
15972 #[test]
15973 fn test_with_progress_builder() {
15974 let config = create_test_config();
15975 let orchestrator = EnhancedOrchestrator::with_defaults(config)
15976 .unwrap()
15977 .with_progress(false);
15978
15979 assert!(!orchestrator.phase_config.show_progress);
15981 }
15982
15983 #[test]
15984 fn test_multi_company_generation() {
15985 let mut config = create_test_config();
15986 config.companies.push(CompanyConfig {
15987 code: "2000".to_string(),
15988 name: "Subsidiary".to_string(),
15989 currency: "EUR".to_string(),
15990 functional_currency: None,
15991 country: "DE".to_string(),
15992 annual_transaction_volume: TransactionVolume::TenK,
15993 volume_weight: 0.5,
15994 fiscal_year_variant: "K4".to_string(),
15995 });
15996
15997 let phase_config = PhaseConfig {
15998 generate_master_data: true,
15999 generate_document_flows: false,
16000 generate_journal_entries: true,
16001 inject_anomalies: false,
16002 show_progress: false,
16003 vendors_per_company: 5,
16004 customers_per_company: 5,
16005 materials_per_company: 5,
16006 assets_per_company: 5,
16007 employees_per_company: 5,
16008 ..Default::default()
16009 };
16010
16011 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16012 let result = orchestrator.generate().unwrap();
16013
16014 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
16017 assert!(result.statistics.companies_count == 2);
16018 }
16019
16020 #[test]
16021 fn test_empty_master_data_skips_document_flows() {
16022 let config = create_test_config();
16023 let phase_config = PhaseConfig {
16024 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
16027 inject_anomalies: false,
16028 show_progress: false,
16029 ..Default::default()
16030 };
16031
16032 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16033 let result = orchestrator.generate().unwrap();
16034
16035 assert!(result.document_flows.p2p_chains.is_empty());
16037 assert!(result.document_flows.o2c_chains.is_empty());
16038 }
16039
16040 #[test]
16041 fn test_journal_entry_line_item_count() {
16042 let config = create_test_config();
16043 let phase_config = PhaseConfig {
16044 generate_master_data: false,
16045 generate_document_flows: false,
16046 generate_journal_entries: true,
16047 inject_anomalies: false,
16048 show_progress: false,
16049 ..Default::default()
16050 };
16051
16052 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16053 let result = orchestrator.generate().unwrap();
16054
16055 let calculated_line_items: u64 = result
16057 .journal_entries
16058 .iter()
16059 .map(|e| e.line_count() as u64)
16060 .sum();
16061 assert_eq!(result.statistics.total_line_items, calculated_line_items);
16062 }
16063
16064 #[test]
16065 fn test_audit_generation() {
16066 let config = create_test_config();
16067 let phase_config = PhaseConfig {
16068 generate_master_data: false,
16069 generate_document_flows: false,
16070 generate_journal_entries: true,
16071 inject_anomalies: false,
16072 show_progress: false,
16073 generate_audit: true,
16074 audit_engagements: 2,
16075 workpapers_per_engagement: 5,
16076 evidence_per_workpaper: 2,
16077 risks_per_engagement: 3,
16078 findings_per_engagement: 2,
16079 judgments_per_engagement: 2,
16080 ..Default::default()
16081 };
16082
16083 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16084 let result = orchestrator.generate().unwrap();
16085
16086 assert_eq!(result.audit.engagements.len(), 2);
16088 assert!(!result.audit.workpapers.is_empty());
16089 assert!(!result.audit.evidence.is_empty());
16090 assert!(!result.audit.risk_assessments.is_empty());
16091 assert!(!result.audit.findings.is_empty());
16092 assert!(!result.audit.judgments.is_empty());
16093
16094 assert!(
16096 !result.audit.confirmations.is_empty(),
16097 "ISA 505 confirmations should be generated"
16098 );
16099 assert!(
16100 !result.audit.confirmation_responses.is_empty(),
16101 "ISA 505 confirmation responses should be generated"
16102 );
16103 assert!(
16104 !result.audit.procedure_steps.is_empty(),
16105 "ISA 330 procedure steps should be generated"
16106 );
16107 assert!(
16109 !result.audit.analytical_results.is_empty(),
16110 "ISA 520 analytical procedures should be generated"
16111 );
16112 assert!(
16113 !result.audit.ia_functions.is_empty(),
16114 "ISA 610 IA functions should be generated (one per engagement)"
16115 );
16116 assert!(
16117 !result.audit.related_parties.is_empty(),
16118 "ISA 550 related parties should be generated"
16119 );
16120
16121 assert_eq!(
16123 result.statistics.audit_engagement_count,
16124 result.audit.engagements.len()
16125 );
16126 assert_eq!(
16127 result.statistics.audit_workpaper_count,
16128 result.audit.workpapers.len()
16129 );
16130 assert_eq!(
16131 result.statistics.audit_evidence_count,
16132 result.audit.evidence.len()
16133 );
16134 assert_eq!(
16135 result.statistics.audit_risk_count,
16136 result.audit.risk_assessments.len()
16137 );
16138 assert_eq!(
16139 result.statistics.audit_finding_count,
16140 result.audit.findings.len()
16141 );
16142 assert_eq!(
16143 result.statistics.audit_judgment_count,
16144 result.audit.judgments.len()
16145 );
16146 assert_eq!(
16147 result.statistics.audit_confirmation_count,
16148 result.audit.confirmations.len()
16149 );
16150 assert_eq!(
16151 result.statistics.audit_confirmation_response_count,
16152 result.audit.confirmation_responses.len()
16153 );
16154 assert_eq!(
16155 result.statistics.audit_procedure_step_count,
16156 result.audit.procedure_steps.len()
16157 );
16158 assert_eq!(
16159 result.statistics.audit_sample_count,
16160 result.audit.samples.len()
16161 );
16162 assert_eq!(
16163 result.statistics.audit_analytical_result_count,
16164 result.audit.analytical_results.len()
16165 );
16166 assert_eq!(
16167 result.statistics.audit_ia_function_count,
16168 result.audit.ia_functions.len()
16169 );
16170 assert_eq!(
16171 result.statistics.audit_ia_report_count,
16172 result.audit.ia_reports.len()
16173 );
16174 assert_eq!(
16175 result.statistics.audit_related_party_count,
16176 result.audit.related_parties.len()
16177 );
16178 assert_eq!(
16179 result.statistics.audit_related_party_transaction_count,
16180 result.audit.related_party_transactions.len()
16181 );
16182 }
16183
16184 #[test]
16185 fn test_new_phases_disabled_by_default() {
16186 let config = create_test_config();
16187 assert!(!config.llm.enabled);
16189 assert!(!config.diffusion.enabled);
16190 assert!(!config.causal.enabled);
16191
16192 let phase_config = PhaseConfig {
16193 generate_master_data: false,
16194 generate_document_flows: false,
16195 generate_journal_entries: true,
16196 inject_anomalies: false,
16197 show_progress: false,
16198 ..Default::default()
16199 };
16200
16201 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16202 let result = orchestrator.generate().unwrap();
16203
16204 assert_eq!(result.statistics.llm_enrichment_ms, 0);
16206 assert_eq!(result.statistics.llm_vendors_enriched, 0);
16207 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16208 assert_eq!(result.statistics.diffusion_samples_generated, 0);
16209 assert_eq!(result.statistics.causal_generation_ms, 0);
16210 assert_eq!(result.statistics.causal_samples_generated, 0);
16211 assert!(result.statistics.causal_validation_passed.is_none());
16212 assert_eq!(result.statistics.counterfactual_pair_count, 0);
16213 assert!(result.counterfactual_pairs.is_empty());
16214 }
16215
16216 #[test]
16217 fn test_counterfactual_generation_enabled() {
16218 let config = create_test_config();
16219 let phase_config = PhaseConfig {
16220 generate_master_data: false,
16221 generate_document_flows: false,
16222 generate_journal_entries: true,
16223 inject_anomalies: false,
16224 show_progress: false,
16225 generate_counterfactuals: true,
16226 generate_period_close: false, ..Default::default()
16228 };
16229
16230 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16231 let result = orchestrator.generate().unwrap();
16232
16233 if !result.journal_entries.is_empty() {
16235 assert_eq!(
16236 result.counterfactual_pairs.len(),
16237 result.journal_entries.len()
16238 );
16239 assert_eq!(
16240 result.statistics.counterfactual_pair_count,
16241 result.journal_entries.len()
16242 );
16243 let ids: std::collections::HashSet<_> = result
16245 .counterfactual_pairs
16246 .iter()
16247 .map(|p| p.pair_id.clone())
16248 .collect();
16249 assert_eq!(ids.len(), result.counterfactual_pairs.len());
16250 }
16251 }
16252
16253 #[test]
16254 fn test_llm_enrichment_enabled() {
16255 let mut config = create_test_config();
16256 config.llm.enabled = true;
16257 config.llm.max_vendor_enrichments = 3;
16258
16259 let phase_config = PhaseConfig {
16260 generate_master_data: true,
16261 generate_document_flows: false,
16262 generate_journal_entries: false,
16263 inject_anomalies: false,
16264 show_progress: false,
16265 vendors_per_company: 5,
16266 customers_per_company: 3,
16267 materials_per_company: 3,
16268 assets_per_company: 3,
16269 employees_per_company: 3,
16270 ..Default::default()
16271 };
16272
16273 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16274 let result = orchestrator.generate().unwrap();
16275
16276 assert!(result.statistics.llm_vendors_enriched > 0);
16278 assert!(result.statistics.llm_vendors_enriched <= 3);
16279 }
16280
16281 #[test]
16282 fn test_diffusion_enhancement_enabled() {
16283 let mut config = create_test_config();
16284 config.diffusion.enabled = true;
16285 config.diffusion.n_steps = 50;
16286 config.diffusion.sample_size = 20;
16287
16288 let phase_config = PhaseConfig {
16289 generate_master_data: false,
16290 generate_document_flows: false,
16291 generate_journal_entries: true,
16292 inject_anomalies: false,
16293 show_progress: false,
16294 ..Default::default()
16295 };
16296
16297 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16298 let result = orchestrator.generate().unwrap();
16299
16300 assert_eq!(result.statistics.diffusion_samples_generated, 20);
16302 }
16303
16304 #[test]
16305 fn test_causal_overlay_enabled() {
16306 let mut config = create_test_config();
16307 config.causal.enabled = true;
16308 config.causal.template = "fraud_detection".to_string();
16309 config.causal.sample_size = 100;
16310 config.causal.validate = true;
16311
16312 let phase_config = PhaseConfig {
16313 generate_master_data: false,
16314 generate_document_flows: false,
16315 generate_journal_entries: true,
16316 inject_anomalies: false,
16317 show_progress: false,
16318 ..Default::default()
16319 };
16320
16321 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16322 let result = orchestrator.generate().unwrap();
16323
16324 assert_eq!(result.statistics.causal_samples_generated, 100);
16326 assert!(result.statistics.causal_validation_passed.is_some());
16328 }
16329
16330 #[test]
16331 fn test_causal_overlay_revenue_cycle_template() {
16332 let mut config = create_test_config();
16333 config.causal.enabled = true;
16334 config.causal.template = "revenue_cycle".to_string();
16335 config.causal.sample_size = 50;
16336 config.causal.validate = false;
16337
16338 let phase_config = PhaseConfig {
16339 generate_master_data: false,
16340 generate_document_flows: false,
16341 generate_journal_entries: true,
16342 inject_anomalies: false,
16343 show_progress: false,
16344 ..Default::default()
16345 };
16346
16347 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16348 let result = orchestrator.generate().unwrap();
16349
16350 assert_eq!(result.statistics.causal_samples_generated, 50);
16352 assert!(result.statistics.causal_validation_passed.is_none());
16354 }
16355
16356 #[test]
16357 fn test_all_new_phases_enabled_together() {
16358 let mut config = create_test_config();
16359 config.llm.enabled = true;
16360 config.llm.max_vendor_enrichments = 2;
16361 config.diffusion.enabled = true;
16362 config.diffusion.n_steps = 20;
16363 config.diffusion.sample_size = 10;
16364 config.causal.enabled = true;
16365 config.causal.sample_size = 50;
16366 config.causal.validate = true;
16367
16368 let phase_config = PhaseConfig {
16369 generate_master_data: true,
16370 generate_document_flows: false,
16371 generate_journal_entries: true,
16372 inject_anomalies: false,
16373 show_progress: false,
16374 vendors_per_company: 5,
16375 customers_per_company: 3,
16376 materials_per_company: 3,
16377 assets_per_company: 3,
16378 employees_per_company: 3,
16379 ..Default::default()
16380 };
16381
16382 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16383 let result = orchestrator.generate().unwrap();
16384
16385 assert!(result.statistics.llm_vendors_enriched > 0);
16387 assert_eq!(result.statistics.diffusion_samples_generated, 10);
16388 assert_eq!(result.statistics.causal_samples_generated, 50);
16389 assert!(result.statistics.causal_validation_passed.is_some());
16390 }
16391
16392 #[test]
16393 fn test_statistics_serialization_with_new_fields() {
16394 let stats = EnhancedGenerationStatistics {
16395 total_entries: 100,
16396 total_line_items: 500,
16397 llm_enrichment_ms: 42,
16398 llm_vendors_enriched: 10,
16399 diffusion_enhancement_ms: 100,
16400 diffusion_samples_generated: 50,
16401 causal_generation_ms: 200,
16402 causal_samples_generated: 100,
16403 causal_validation_passed: Some(true),
16404 ..Default::default()
16405 };
16406
16407 let json = serde_json::to_string(&stats).unwrap();
16408 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16409
16410 assert_eq!(deserialized.llm_enrichment_ms, 42);
16411 assert_eq!(deserialized.llm_vendors_enriched, 10);
16412 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16413 assert_eq!(deserialized.diffusion_samples_generated, 50);
16414 assert_eq!(deserialized.causal_generation_ms, 200);
16415 assert_eq!(deserialized.causal_samples_generated, 100);
16416 assert_eq!(deserialized.causal_validation_passed, Some(true));
16417 }
16418
16419 #[test]
16420 fn test_statistics_backward_compat_deserialization() {
16421 let old_json = r#"{
16423 "total_entries": 100,
16424 "total_line_items": 500,
16425 "accounts_count": 50,
16426 "companies_count": 1,
16427 "period_months": 12,
16428 "vendor_count": 10,
16429 "customer_count": 20,
16430 "material_count": 15,
16431 "asset_count": 5,
16432 "employee_count": 8,
16433 "p2p_chain_count": 5,
16434 "o2c_chain_count": 5,
16435 "ap_invoice_count": 5,
16436 "ar_invoice_count": 5,
16437 "ocpm_event_count": 0,
16438 "ocpm_object_count": 0,
16439 "ocpm_case_count": 0,
16440 "audit_engagement_count": 0,
16441 "audit_workpaper_count": 0,
16442 "audit_evidence_count": 0,
16443 "audit_risk_count": 0,
16444 "audit_finding_count": 0,
16445 "audit_judgment_count": 0,
16446 "anomalies_injected": 0,
16447 "data_quality_issues": 0,
16448 "banking_customer_count": 0,
16449 "banking_account_count": 0,
16450 "banking_transaction_count": 0,
16451 "banking_suspicious_count": 0,
16452 "graph_export_count": 0,
16453 "graph_node_count": 0,
16454 "graph_edge_count": 0
16455 }"#;
16456
16457 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16458
16459 assert_eq!(stats.llm_enrichment_ms, 0);
16461 assert_eq!(stats.llm_vendors_enriched, 0);
16462 assert_eq!(stats.diffusion_enhancement_ms, 0);
16463 assert_eq!(stats.diffusion_samples_generated, 0);
16464 assert_eq!(stats.causal_generation_ms, 0);
16465 assert_eq!(stats.causal_samples_generated, 0);
16466 assert!(stats.causal_validation_passed.is_none());
16467 }
16468}