1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164 AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165 TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195 #[allow(clippy::field_reassign_with_default)]
196 {
197 let mut s = DataQualityStats::default();
198 s.total_records = n_entries;
199 s.missing_values.total_records = n_entries;
200 s.format_variations.total_processed = n_entries;
201 s.duplicates.total_processed = n_entries;
202 s
203 }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207 let payment_behavior = &schema_config.payment_behavior;
208 let late_dist = &payment_behavior.late_payment_days_distribution;
209
210 P2PGeneratorConfig {
211 three_way_match_rate: schema_config.three_way_match_rate,
212 partial_delivery_rate: schema_config.partial_delivery_rate,
213 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214 price_variance_rate: schema_config.price_variance_rate,
215 max_price_variance_percent: schema_config.max_price_variance_percent,
216 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219 payment_method_distribution: vec![
220 (PaymentMethod::BankTransfer, 0.60),
221 (PaymentMethod::Check, 0.25),
222 (PaymentMethod::Wire, 0.10),
223 (PaymentMethod::CreditCard, 0.05),
224 ],
225 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226 payment_behavior: P2PPaymentBehavior {
227 late_payment_rate: payment_behavior.late_payment_rate,
228 late_payment_distribution: LatePaymentDistribution {
229 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230 late_8_to_14: late_dist.late_8_to_14,
231 very_late_15_to_30: late_dist.very_late_15_to_30,
232 severely_late_31_to_60: late_dist.severely_late_31_to_60,
233 extremely_late_over_60: late_dist.extremely_late_over_60,
234 },
235 partial_payment_rate: payment_behavior.partial_payment_rate,
236 payment_correction_rate: payment_behavior.payment_correction_rate,
237 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238 },
239 }
240}
241
242fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244 let payment_behavior = &schema_config.payment_behavior;
245
246 O2CGeneratorConfig {
247 credit_check_failure_rate: schema_config.credit_check_failure_rate,
248 partial_shipment_rate: schema_config.partial_shipment_rate,
249 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253 bad_debt_rate: schema_config.bad_debt_rate,
254 returns_rate: schema_config.return_rate,
255 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256 payment_method_distribution: vec![
257 (PaymentMethod::BankTransfer, 0.50),
258 (PaymentMethod::Check, 0.30),
259 (PaymentMethod::Wire, 0.15),
260 (PaymentMethod::CreditCard, 0.05),
261 ],
262 payment_behavior: O2CPaymentBehavior {
263 partial_payment_rate: payment_behavior.partial_payments.rate,
264 short_payment_rate: payment_behavior.short_payments.rate,
265 max_short_percent: payment_behavior.short_payments.max_short_percent,
266 on_account_rate: payment_behavior.on_account_payments.rate,
267 payment_correction_rate: payment_behavior.payment_corrections.rate,
268 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269 },
270 }
271}
272
273#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276 pub generate_master_data: bool,
278 pub generate_document_flows: bool,
280 pub generate_ocpm_events: bool,
282 pub generate_journal_entries: bool,
284 pub inject_anomalies: bool,
286 pub inject_data_quality: bool,
288 pub validate_balances: bool,
290 pub validate_coa_coverage_strict: bool,
294 pub show_progress: bool,
296 pub vendors_per_company: usize,
298 pub customers_per_company: usize,
300 pub materials_per_company: usize,
302 pub assets_per_company: usize,
304 pub employees_per_company: usize,
306 pub p2p_chains: usize,
308 pub o2c_chains: usize,
310 pub generate_audit: bool,
312 pub audit_engagements: usize,
314 pub workpapers_per_engagement: usize,
316 pub evidence_per_workpaper: usize,
318 pub risks_per_engagement: usize,
320 pub findings_per_engagement: usize,
322 pub judgments_per_engagement: usize,
324 pub generate_banking: bool,
326 pub generate_graph_export: bool,
328 pub generate_sourcing: bool,
330 pub generate_bank_reconciliation: bool,
332 pub generate_financial_statements: bool,
334 pub generate_accounting_standards: bool,
336 pub generate_manufacturing: bool,
338 pub generate_sales_kpi_budgets: bool,
340 pub generate_tax: bool,
342 pub generate_esg: bool,
344 pub generate_intercompany: bool,
346 pub generate_evolution_events: bool,
348 pub generate_counterfactuals: bool,
350 pub generate_compliance_regulations: bool,
352 pub generate_period_close: bool,
354 pub generate_hr: bool,
356 pub generate_treasury: bool,
358 pub generate_project_accounting: bool,
360 pub generate_legal_documents: bool,
364 pub generate_it_controls: bool,
368 pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376 fn default() -> Self {
377 Self {
378 generate_master_data: true,
379 generate_document_flows: true,
380 generate_ocpm_events: false, generate_journal_entries: true,
382 inject_anomalies: false,
383 inject_data_quality: false, validate_balances: true,
385 validate_coa_coverage_strict: false,
386 show_progress: true,
387 vendors_per_company: 50,
388 customers_per_company: 100,
389 materials_per_company: 200,
390 assets_per_company: 50,
391 employees_per_company: 100,
392 p2p_chains: 100,
393 o2c_chains: 100,
394 generate_audit: false, audit_engagements: 5,
396 workpapers_per_engagement: 20,
397 evidence_per_workpaper: 5,
398 risks_per_engagement: 15,
399 findings_per_engagement: 8,
400 judgments_per_engagement: 10,
401 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
423 }
424}
425
426impl PhaseConfig {
427 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432 Self {
433 generate_master_data: true,
435 generate_document_flows: true,
436 generate_journal_entries: true,
437 validate_balances: true,
438 validate_coa_coverage_strict: false,
439 generate_period_close: true,
440 generate_evolution_events: true,
441 show_progress: true,
442
443 generate_audit: cfg.audit.enabled,
445 generate_banking: cfg.banking.enabled,
446 generate_graph_export: cfg.graph_export.enabled,
447 generate_sourcing: cfg.source_to_pay.enabled,
448 generate_intercompany: cfg.intercompany.enabled,
449 generate_financial_statements: cfg.financial_reporting.enabled,
450 generate_bank_reconciliation: cfg.financial_reporting.enabled,
451 generate_accounting_standards: cfg.accounting_standards.enabled,
452 generate_manufacturing: cfg.manufacturing.enabled,
453 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454 generate_tax: cfg.tax.enabled,
455 generate_esg: cfg.esg.enabled,
456 generate_ocpm_events: cfg.ocpm.enabled,
457 generate_compliance_regulations: cfg.compliance_regulations.enabled,
458 generate_hr: cfg.hr.enabled,
459 generate_treasury: cfg.treasury.enabled,
460 generate_project_accounting: cfg.project_accounting.enabled,
461
462 generate_legal_documents: cfg.compliance_regulations.enabled
466 && cfg.compliance_regulations.legal_documents.enabled,
467 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470 generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478 inject_data_quality: cfg.data_quality.enabled,
479
480 vendors_per_company: 50,
482 customers_per_company: 100,
483 materials_per_company: 200,
484 assets_per_company: 50,
485 employees_per_company: 100,
486 p2p_chains: 100,
487 o2c_chains: 100,
488 audit_engagements: 5,
489 workpapers_per_engagement: 20,
490 evidence_per_workpaper: 5,
491 risks_per_engagement: 15,
492 findings_per_engagement: 8,
493 judgments_per_engagement: 10,
494 }
495 }
496}
497
498#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501 pub vendors: Vec<Vendor>,
503 pub customers: Vec<Customer>,
505 pub materials: Vec<Material>,
507 pub assets: Vec<FixedAsset>,
509 pub employees: Vec<Employee>,
511 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513 pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528 pub node_count: usize,
530 pub edge_count: usize,
532 pub hyperedge_count: usize,
534 pub output_path: PathBuf,
536}
537
538#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541 pub p2p_chains: Vec<P2PDocumentChain>,
543 pub o2c_chains: Vec<O2CDocumentChain>,
545 pub purchase_orders: Vec<documents::PurchaseOrder>,
547 pub goods_receipts: Vec<documents::GoodsReceipt>,
549 pub vendor_invoices: Vec<documents::VendorInvoice>,
551 pub sales_orders: Vec<documents::SalesOrder>,
553 pub deliveries: Vec<documents::Delivery>,
555 pub customer_invoices: Vec<documents::CustomerInvoice>,
557 pub payments: Vec<documents::Payment>,
559 pub document_references: Vec<documents::DocumentReference>,
562}
563
564#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567 pub ap_invoices: Vec<APInvoice>,
569 pub ar_invoices: Vec<ARInvoice>,
571 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577 pub ar_aging_reports: Vec<ARAgingReport>,
579 pub ap_aging_reports: Vec<APAgingReport>,
581 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594 pub event_log: Option<OcpmEventLog>,
596 pub event_count: usize,
598 pub object_count: usize,
600 pub case_count: usize,
602}
603
604#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607 pub engagements: Vec<AuditEngagement>,
609 pub workpapers: Vec<Workpaper>,
611 pub evidence: Vec<AuditEvidence>,
613 pub risk_assessments: Vec<RiskAssessment>,
615 pub findings: Vec<AuditFinding>,
617 pub judgments: Vec<ProfessionalJudgment>,
619 pub confirmations: Vec<ExternalConfirmation>,
621 pub confirmation_responses: Vec<ConfirmationResponse>,
623 pub procedure_steps: Vec<AuditProcedureStep>,
625 pub samples: Vec<AuditSample>,
627 pub analytical_results: Vec<AnalyticalProcedureResult>,
629 pub ia_functions: Vec<InternalAuditFunction>,
631 pub ia_reports: Vec<InternalAuditReport>,
633 pub related_parties: Vec<RelatedParty>,
635 pub related_party_transactions: Vec<RelatedPartyTransaction>,
637 pub component_auditors: Vec<ComponentAuditor>,
640 pub group_audit_plan: Option<GroupAuditPlan>,
642 pub component_instructions: Vec<ComponentInstruction>,
644 pub component_reports: Vec<ComponentAuditorReport>,
646 pub engagement_letters: Vec<EngagementLetter>,
649 pub subsequent_events: Vec<SubsequentEvent>,
652 pub service_organizations: Vec<ServiceOrganization>,
655 pub soc_reports: Vec<SocReport>,
657 pub user_entity_controls: Vec<UserEntityControl>,
659 pub going_concern_assessments:
662 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663 pub accounting_estimates:
666 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677 pub materiality_calculations:
680 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681 pub combined_risk_assessments:
684 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690 pub significant_transaction_classes:
693 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697 pub analytical_relationships:
700 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733 pub customers: Vec<BankingCustomer>,
735 pub accounts: Vec<BankAccount>,
737 pub transactions: Vec<BankTransaction>,
739 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749 pub suspicious_count: usize,
751 pub scenario_count: usize,
753}
754
755#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758 pub exported: bool,
760 pub graph_count: usize,
762 pub exports: HashMap<String, GraphExportInfo>,
764}
765
766#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769 pub name: String,
771 pub format: String,
773 pub output_path: PathBuf,
775 pub node_count: usize,
777 pub edge_count: usize,
779}
780
781#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784 pub spend_analyses: Vec<SpendAnalysis>,
786 pub sourcing_projects: Vec<SourcingProject>,
788 pub qualifications: Vec<SupplierQualification>,
790 pub rfx_events: Vec<RfxEvent>,
792 pub bids: Vec<SupplierBid>,
794 pub bid_evaluations: Vec<BidEvaluation>,
796 pub contracts: Vec<ProcurementContract>,
798 pub catalog_items: Vec<CatalogItem>,
800 pub scorecards: Vec<SupplierScorecard>,
802}
803
804#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816 pub fiscal_year: u16,
818 pub fiscal_period: u8,
820 pub period_start: NaiveDate,
822 pub period_end: NaiveDate,
824 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826}
827
828impl PeriodTrialBalance {
829 pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
838 let mut total_debits = Decimal::ZERO;
839 let mut total_credits = Decimal::ZERO;
840 let lines: Vec<TrialBalanceLine> = self
841 .entries
842 .into_iter()
843 .map(|e| {
844 total_debits += e.debit_balance;
845 total_credits += e.credit_balance;
846 let category = AccountCategory::from_account_code(&e.account_code);
847 TrialBalanceLine {
848 account_code: e.account_code,
849 account_description: e.account_name,
850 category,
851 account_type: AccountType::Asset,
852 opening_balance: Decimal::ZERO,
853 period_debits: e.debit_balance,
854 period_credits: e.credit_balance,
855 closing_balance: e.debit_balance - e.credit_balance,
856 debit_balance: e.debit_balance,
857 credit_balance: e.credit_balance,
858 cost_center: None,
859 profit_center: None,
860 }
861 })
862 .collect();
863 let imbalance = total_debits - total_credits;
864 let is_balanced = imbalance.abs() < Decimal::new(1, 2);
865 TrialBalance {
866 trial_balance_id: format!(
867 "{company_code}-{:04}{:02}",
868 self.fiscal_year, self.fiscal_period
869 ),
870 company_code: company_code.to_string(),
871 company_name: None,
872 as_of_date: self.period_end,
873 fiscal_year: self.fiscal_year as i32,
874 fiscal_period: self.fiscal_period as u32,
875 currency: currency.to_string(),
876 balance_type: TrialBalanceType::Adjusted,
877 lines,
878 total_debits,
879 total_credits,
880 is_balanced,
881 out_of_balance: imbalance,
882 is_equation_valid: is_balanced,
883 equation_difference: imbalance,
884 category_summary: std::collections::HashMap::new(),
885 created_at: self
886 .period_start
887 .and_hms_opt(0, 0, 0)
888 .expect("midnight is a valid time"),
889 created_by: "ORCHESTRATOR".to_string(),
890 approved_by: None,
891 approved_at: None,
892 status: TrialBalanceStatus::Final,
893 }
894 }
895}
896
897#[derive(Debug, Clone, Default)]
899pub struct FinancialReportingSnapshot {
900 pub financial_statements: Vec<FinancialStatement>,
903 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
906 pub consolidated_statements: Vec<FinancialStatement>,
908 pub consolidation_schedules: Vec<ConsolidationSchedule>,
910 pub bank_reconciliations: Vec<BankReconciliation>,
912 pub trial_balances: Vec<PeriodTrialBalance>,
914 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
916 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
918 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
920}
921
922#[derive(Debug, Clone, Default)]
924pub struct HrSnapshot {
925 pub payroll_runs: Vec<PayrollRun>,
927 pub payroll_line_items: Vec<PayrollLineItem>,
929 pub time_entries: Vec<TimeEntry>,
931 pub expense_reports: Vec<ExpenseReport>,
933 pub benefit_enrollments: Vec<BenefitEnrollment>,
935 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
937 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
939 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
941 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
943 pub pension_journal_entries: Vec<JournalEntry>,
945 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
947 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
949 pub stock_comp_journal_entries: Vec<JournalEntry>,
951 pub payroll_run_count: usize,
953 pub payroll_line_item_count: usize,
955 pub time_entry_count: usize,
957 pub expense_report_count: usize,
959 pub benefit_enrollment_count: usize,
961 pub pension_plan_count: usize,
963 pub stock_grant_count: usize,
965}
966
967#[derive(Debug, Clone, Default)]
969pub struct AccountingStandardsSnapshot {
970 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
972 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
974 pub business_combinations:
976 Vec<datasynth_core::models::business_combination::BusinessCombination>,
977 pub business_combination_journal_entries: Vec<JournalEntry>,
979 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
981 pub ecl_provision_movements:
983 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
984 pub ecl_journal_entries: Vec<JournalEntry>,
986 pub provisions: Vec<datasynth_core::models::provision::Provision>,
988 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
990 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
992 pub provision_journal_entries: Vec<JournalEntry>,
994 pub currency_translation_results:
996 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
997 pub revenue_contract_count: usize,
999 pub impairment_test_count: usize,
1001 pub business_combination_count: usize,
1003 pub ecl_model_count: usize,
1005 pub provision_count: usize,
1007 pub currency_translation_count: usize,
1009 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1013 pub fair_value_measurements:
1015 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1016 pub framework_differences:
1018 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1019 pub framework_reconciliations:
1021 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1022 pub lease_count: usize,
1024 pub fair_value_measurement_count: usize,
1025 pub framework_difference_count: usize,
1026}
1027
1028#[derive(Debug, Clone, Default)]
1030pub struct ComplianceRegulationsSnapshot {
1031 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1033 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1035 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1037 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1039 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1041 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1043 pub compliance_graph: Option<datasynth_graph::Graph>,
1045}
1046
1047#[derive(Debug, Clone, Default)]
1049pub struct ManufacturingSnapshot {
1050 pub production_orders: Vec<ProductionOrder>,
1052 pub quality_inspections: Vec<QualityInspection>,
1054 pub cycle_counts: Vec<CycleCount>,
1056 pub bom_components: Vec<BomComponent>,
1058 pub inventory_movements: Vec<InventoryMovement>,
1060 pub production_order_count: usize,
1062 pub quality_inspection_count: usize,
1064 pub cycle_count_count: usize,
1066 pub bom_component_count: usize,
1068 pub inventory_movement_count: usize,
1070}
1071
1072#[derive(Debug, Clone, Default)]
1074pub struct SalesKpiBudgetsSnapshot {
1075 pub sales_quotes: Vec<SalesQuote>,
1077 pub kpis: Vec<ManagementKpi>,
1079 pub budgets: Vec<Budget>,
1081 pub sales_quote_count: usize,
1083 pub kpi_count: usize,
1085 pub budget_line_count: usize,
1087}
1088
1089#[derive(Debug, Clone, Default)]
1091pub struct AnomalyLabels {
1092 pub labels: Vec<LabeledAnomaly>,
1094 pub summary: Option<AnomalySummary>,
1096 pub by_type: HashMap<String, usize>,
1098}
1099
1100#[derive(Debug, Clone, Default)]
1102pub struct BalanceValidationResult {
1103 pub validated: bool,
1105 pub is_balanced: bool,
1107 pub entries_processed: u64,
1109 pub total_debits: rust_decimal::Decimal,
1111 pub total_credits: rust_decimal::Decimal,
1113 pub accounts_tracked: usize,
1115 pub companies_tracked: usize,
1117 pub validation_errors: Vec<ValidationError>,
1119 pub has_unbalanced_entries: bool,
1121}
1122
1123#[derive(Debug, Clone, Default)]
1125pub struct TaxSnapshot {
1126 pub jurisdictions: Vec<TaxJurisdiction>,
1128 pub codes: Vec<TaxCode>,
1130 pub tax_lines: Vec<TaxLine>,
1132 pub tax_returns: Vec<TaxReturn>,
1134 pub tax_provisions: Vec<TaxProvision>,
1136 pub withholding_records: Vec<WithholdingTaxRecord>,
1138 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1140 pub jurisdiction_count: usize,
1142 pub code_count: usize,
1144 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1146 pub tax_posting_journal_entries: Vec<JournalEntry>,
1148}
1149
1150#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1152pub struct IntercompanySnapshot {
1153 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1155 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1157 pub seller_journal_entries: Vec<JournalEntry>,
1159 pub buyer_journal_entries: Vec<JournalEntry>,
1161 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1163 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1165 #[serde(skip)]
1167 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1168 pub matched_pair_count: usize,
1170 pub elimination_entry_count: usize,
1172 pub match_rate: f64,
1174}
1175
1176#[derive(Debug, Clone, Default)]
1178pub struct EsgSnapshot {
1179 pub emissions: Vec<EmissionRecord>,
1181 pub energy: Vec<EnergyConsumption>,
1183 pub water: Vec<WaterUsage>,
1185 pub waste: Vec<WasteRecord>,
1187 pub diversity: Vec<WorkforceDiversityMetric>,
1189 pub pay_equity: Vec<PayEquityMetric>,
1191 pub safety_incidents: Vec<SafetyIncident>,
1193 pub safety_metrics: Vec<SafetyMetric>,
1195 pub governance: Vec<GovernanceMetric>,
1197 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1199 pub materiality: Vec<MaterialityAssessment>,
1201 pub disclosures: Vec<EsgDisclosure>,
1203 pub climate_scenarios: Vec<ClimateScenario>,
1205 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1207 pub emission_count: usize,
1209 pub disclosure_count: usize,
1211}
1212
1213#[derive(Debug, Clone, Default)]
1215pub struct TreasurySnapshot {
1216 pub cash_positions: Vec<CashPosition>,
1218 pub cash_forecasts: Vec<CashForecast>,
1220 pub cash_pools: Vec<CashPool>,
1222 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1224 pub hedging_instruments: Vec<HedgingInstrument>,
1226 pub hedge_relationships: Vec<HedgeRelationship>,
1228 pub debt_instruments: Vec<DebtInstrument>,
1230 pub bank_guarantees: Vec<BankGuarantee>,
1232 pub netting_runs: Vec<NettingRun>,
1234 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1236 pub journal_entries: Vec<JournalEntry>,
1239}
1240
1241#[derive(Debug, Clone, Default)]
1243pub struct ProjectAccountingSnapshot {
1244 pub projects: Vec<Project>,
1246 pub cost_lines: Vec<ProjectCostLine>,
1248 pub revenue_records: Vec<ProjectRevenue>,
1250 pub earned_value_metrics: Vec<EarnedValueMetric>,
1252 pub change_orders: Vec<ChangeOrder>,
1254 pub milestones: Vec<ProjectMilestone>,
1256}
1257
1258#[derive(Debug, Default)]
1260pub struct EnhancedGenerationResult {
1261 pub chart_of_accounts: ChartOfAccounts,
1263 pub master_data: MasterDataSnapshot,
1265 pub document_flows: DocumentFlowSnapshot,
1267 pub subledger: SubledgerSnapshot,
1269 pub ocpm: OcpmSnapshot,
1271 pub audit: AuditSnapshot,
1273 pub banking: BankingSnapshot,
1275 pub graph_export: GraphExportSnapshot,
1277 pub sourcing: SourcingSnapshot,
1279 pub financial_reporting: FinancialReportingSnapshot,
1281 pub hr: HrSnapshot,
1283 pub accounting_standards: AccountingStandardsSnapshot,
1285 pub manufacturing: ManufacturingSnapshot,
1287 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1289 pub tax: TaxSnapshot,
1291 pub esg: EsgSnapshot,
1293 pub treasury: TreasurySnapshot,
1295 pub project_accounting: ProjectAccountingSnapshot,
1297 pub process_evolution: Vec<ProcessEvolutionEvent>,
1299 pub organizational_events: Vec<OrganizationalEvent>,
1301 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1303 pub intercompany: IntercompanySnapshot,
1305 pub journal_entries: Vec<JournalEntry>,
1307 pub anomaly_labels: AnomalyLabels,
1309 pub balance_validation: BalanceValidationResult,
1311 pub data_quality_stats: DataQualityStats,
1313 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1315 pub statistics: EnhancedGenerationStatistics,
1317 pub lineage: Option<super::lineage::LineageGraph>,
1319 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1321 pub internal_controls: Vec<InternalControl>,
1323 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1327 pub opening_balances: Vec<GeneratedOpeningBalance>,
1329 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1331 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1333 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1335 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1337 pub temporal_vendor_chains:
1339 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1340 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1342 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1344 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1346 pub compliance_regulations: ComplianceRegulationsSnapshot,
1348 pub analytics_metadata: AnalyticsMetadataSnapshot,
1352 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1356 pub interconnectivity: InterconnectivitySnapshot,
1362}
1363
1364#[derive(Debug, Clone, Default)]
1370pub struct InterconnectivitySnapshot {
1371 pub vendor_tiers: Vec<(String, u8)>,
1374 pub vendor_clusters: Vec<(String, String)>,
1378 pub customer_value_segments: Vec<(String, String)>,
1381 pub customer_lifecycle_stages: Vec<(String, String)>,
1385 pub industry_metadata: Vec<String>,
1388}
1389
1390#[derive(Debug, Clone, Default)]
1392pub struct AnalyticsMetadataSnapshot {
1393 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1395 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1397 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1399 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1401}
1402
1403#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1405pub struct EnhancedGenerationStatistics {
1406 pub total_entries: u64,
1408 pub total_line_items: u64,
1410 pub accounts_count: usize,
1412 pub companies_count: usize,
1414 pub period_months: u32,
1416 pub vendor_count: usize,
1418 pub customer_count: usize,
1419 pub material_count: usize,
1420 pub asset_count: usize,
1421 pub employee_count: usize,
1422 pub p2p_chain_count: usize,
1424 pub o2c_chain_count: usize,
1425 pub ap_invoice_count: usize,
1427 pub ar_invoice_count: usize,
1428 pub ocpm_event_count: usize,
1430 pub ocpm_object_count: usize,
1431 pub ocpm_case_count: usize,
1432 pub audit_engagement_count: usize,
1434 pub audit_workpaper_count: usize,
1435 pub audit_evidence_count: usize,
1436 pub audit_risk_count: usize,
1437 pub audit_finding_count: usize,
1438 pub audit_judgment_count: usize,
1439 #[serde(default)]
1441 pub audit_confirmation_count: usize,
1442 #[serde(default)]
1443 pub audit_confirmation_response_count: usize,
1444 #[serde(default)]
1446 pub audit_procedure_step_count: usize,
1447 #[serde(default)]
1448 pub audit_sample_count: usize,
1449 #[serde(default)]
1451 pub audit_analytical_result_count: usize,
1452 #[serde(default)]
1454 pub audit_ia_function_count: usize,
1455 #[serde(default)]
1456 pub audit_ia_report_count: usize,
1457 #[serde(default)]
1459 pub audit_related_party_count: usize,
1460 #[serde(default)]
1461 pub audit_related_party_transaction_count: usize,
1462 pub anomalies_injected: usize,
1464 pub data_quality_issues: usize,
1466 pub banking_customer_count: usize,
1468 pub banking_account_count: usize,
1469 pub banking_transaction_count: usize,
1470 pub banking_suspicious_count: usize,
1471 pub graph_export_count: usize,
1473 pub graph_node_count: usize,
1474 pub graph_edge_count: usize,
1475 #[serde(default)]
1477 pub llm_enrichment_ms: u64,
1478 #[serde(default)]
1480 pub llm_vendors_enriched: usize,
1481 #[serde(default)]
1483 pub llm_customers_enriched: usize,
1484 #[serde(default)]
1486 pub llm_materials_enriched: usize,
1487 #[serde(default)]
1489 pub llm_findings_enriched: usize,
1490 #[serde(default)]
1492 pub diffusion_enhancement_ms: u64,
1493 #[serde(default)]
1495 pub diffusion_samples_generated: usize,
1496 #[serde(default, skip_serializing_if = "Option::is_none")]
1499 pub neural_hybrid_weight: Option<f64>,
1500 #[serde(default, skip_serializing_if = "Option::is_none")]
1502 pub neural_hybrid_strategy: Option<String>,
1503 #[serde(default, skip_serializing_if = "Option::is_none")]
1505 pub neural_routed_column_count: Option<usize>,
1506 #[serde(default)]
1508 pub causal_generation_ms: u64,
1509 #[serde(default)]
1511 pub causal_samples_generated: usize,
1512 #[serde(default)]
1514 pub causal_validation_passed: Option<bool>,
1515 #[serde(default)]
1517 pub sourcing_project_count: usize,
1518 #[serde(default)]
1519 pub rfx_event_count: usize,
1520 #[serde(default)]
1521 pub bid_count: usize,
1522 #[serde(default)]
1523 pub contract_count: usize,
1524 #[serde(default)]
1525 pub catalog_item_count: usize,
1526 #[serde(default)]
1527 pub scorecard_count: usize,
1528 #[serde(default)]
1530 pub financial_statement_count: usize,
1531 #[serde(default)]
1532 pub bank_reconciliation_count: usize,
1533 #[serde(default)]
1535 pub payroll_run_count: usize,
1536 #[serde(default)]
1537 pub time_entry_count: usize,
1538 #[serde(default)]
1539 pub expense_report_count: usize,
1540 #[serde(default)]
1541 pub benefit_enrollment_count: usize,
1542 #[serde(default)]
1543 pub pension_plan_count: usize,
1544 #[serde(default)]
1545 pub stock_grant_count: usize,
1546 #[serde(default)]
1548 pub revenue_contract_count: usize,
1549 #[serde(default)]
1550 pub impairment_test_count: usize,
1551 #[serde(default)]
1552 pub business_combination_count: usize,
1553 #[serde(default)]
1554 pub ecl_model_count: usize,
1555 #[serde(default)]
1556 pub provision_count: usize,
1557 #[serde(default)]
1559 pub production_order_count: usize,
1560 #[serde(default)]
1561 pub quality_inspection_count: usize,
1562 #[serde(default)]
1563 pub cycle_count_count: usize,
1564 #[serde(default)]
1565 pub bom_component_count: usize,
1566 #[serde(default)]
1567 pub inventory_movement_count: usize,
1568 #[serde(default)]
1570 pub sales_quote_count: usize,
1571 #[serde(default)]
1572 pub kpi_count: usize,
1573 #[serde(default)]
1574 pub budget_line_count: usize,
1575 #[serde(default)]
1577 pub tax_jurisdiction_count: usize,
1578 #[serde(default)]
1579 pub tax_code_count: usize,
1580 #[serde(default)]
1582 pub esg_emission_count: usize,
1583 #[serde(default)]
1584 pub esg_disclosure_count: usize,
1585 #[serde(default)]
1587 pub ic_matched_pair_count: usize,
1588 #[serde(default)]
1589 pub ic_elimination_count: usize,
1590 #[serde(default)]
1592 pub ic_transaction_count: usize,
1593 #[serde(default)]
1595 pub fa_subledger_count: usize,
1596 #[serde(default)]
1598 pub inventory_subledger_count: usize,
1599 #[serde(default)]
1601 pub treasury_debt_instrument_count: usize,
1602 #[serde(default)]
1604 pub treasury_hedging_instrument_count: usize,
1605 #[serde(default)]
1607 pub project_count: usize,
1608 #[serde(default)]
1610 pub project_change_order_count: usize,
1611 #[serde(default)]
1613 pub tax_provision_count: usize,
1614 #[serde(default)]
1616 pub opening_balance_count: usize,
1617 #[serde(default)]
1619 pub subledger_reconciliation_count: usize,
1620 #[serde(default)]
1622 pub tax_line_count: usize,
1623 #[serde(default)]
1625 pub project_cost_line_count: usize,
1626 #[serde(default)]
1628 pub cash_position_count: usize,
1629 #[serde(default)]
1631 pub cash_forecast_count: usize,
1632 #[serde(default)]
1634 pub cash_pool_count: usize,
1635 #[serde(default)]
1637 pub process_evolution_event_count: usize,
1638 #[serde(default)]
1640 pub organizational_event_count: usize,
1641 #[serde(default)]
1643 pub counterfactual_pair_count: usize,
1644 #[serde(default)]
1646 pub red_flag_count: usize,
1647 #[serde(default)]
1649 pub collusion_ring_count: usize,
1650 #[serde(default)]
1652 pub temporal_version_chain_count: usize,
1653 #[serde(default)]
1655 pub entity_relationship_node_count: usize,
1656 #[serde(default)]
1658 pub entity_relationship_edge_count: usize,
1659 #[serde(default)]
1661 pub cross_process_link_count: usize,
1662 #[serde(default)]
1664 pub disruption_event_count: usize,
1665 #[serde(default)]
1667 pub industry_gl_account_count: usize,
1668 #[serde(default)]
1670 pub period_close_je_count: usize,
1671}
1672
1673pub struct EnhancedOrchestrator {
1675 config: GeneratorConfig,
1676 phase_config: PhaseConfig,
1677 coa: Option<Arc<ChartOfAccounts>>,
1678 master_data: MasterDataSnapshot,
1679 seed: u64,
1680 multi_progress: Option<MultiProgress>,
1681 resource_guard: ResourceGuard,
1683 output_path: Option<PathBuf>,
1685 copula_generators: Vec<CopulaGeneratorSpec>,
1687 country_pack_registry: datasynth_core::CountryPackRegistry,
1689 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1691 template_provider: datasynth_core::templates::SharedTemplateProvider,
1698 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1705 shard_context: Option<crate::shard_context::ShardContext>,
1708}
1709
1710impl EnhancedOrchestrator {
1711 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1713 datasynth_config::validate_config(&config)?;
1714
1715 let seed = config.global.seed.unwrap_or_else(rand::random);
1716
1717 let resource_guard = Self::build_resource_guard(&config, None);
1719
1720 let country_pack_registry = match &config.country_packs {
1722 Some(cp) => {
1723 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1724 .map_err(|e| SynthError::config(e.to_string()))?
1725 }
1726 None => datasynth_core::CountryPackRegistry::builtin_only()
1727 .map_err(|e| SynthError::config(e.to_string()))?,
1728 };
1729
1730 let template_provider = Self::build_template_provider(&config)?;
1734
1735 let temporal_context = Self::build_temporal_context(&config)?;
1739
1740 Ok(Self {
1741 config,
1742 phase_config,
1743 coa: None,
1744 master_data: MasterDataSnapshot::default(),
1745 seed,
1746 multi_progress: None,
1747 resource_guard,
1748 output_path: None,
1749 copula_generators: Vec::new(),
1750 country_pack_registry,
1751 phase_sink: None,
1752 template_provider,
1753 temporal_context,
1754 shard_context: None,
1755 })
1756 }
1757
1758 pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1764 self.shard_context = Some(ctx);
1765 }
1766
1767 fn build_temporal_context(
1773 config: &GeneratorConfig,
1774 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1775 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1776
1777 let tp = &config.temporal_patterns;
1778 if !tp.enabled || !tp.business_days.enabled {
1779 return Ok(None);
1780 }
1781
1782 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1783 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1784 let end_date = start_date + chrono::Months::new(config.global.period_months);
1785
1786 let region_code = tp
1787 .calendars
1788 .regions
1789 .first()
1790 .cloned()
1791 .unwrap_or_else(|| "US".to_string());
1792 let region = parse_region_code(®ion_code);
1793
1794 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1795 }
1796
1797 fn build_template_provider(
1805 config: &GeneratorConfig,
1806 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1807 use datasynth_core::templates::{
1808 loader::{MergeStrategy, TemplateLoader},
1809 DefaultTemplateProvider,
1810 };
1811 use std::sync::Arc;
1812
1813 let provider = match &config.templates.path {
1814 None => DefaultTemplateProvider::new(),
1815 Some(path) => {
1816 let data = if path.is_dir() {
1817 TemplateLoader::load_from_directory(path)
1818 } else {
1819 TemplateLoader::load_from_file(path)
1820 }
1821 .map_err(|e| {
1822 SynthError::config(format!(
1823 "Failed to load templates from {}: {e}",
1824 path.display()
1825 ))
1826 })?;
1827 let strategy = match config.templates.merge_strategy {
1828 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1829 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1830 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1831 MergeStrategy::MergePreferFile
1832 }
1833 };
1834 DefaultTemplateProvider::with_templates(data, strategy)
1835 }
1836 };
1837 Ok(Arc::new(provider))
1838 }
1839
1840 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1842 Self::new(config, PhaseConfig::default())
1843 }
1844
1845 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1847 self.phase_sink = Some(sink);
1848 self
1849 }
1850
1851 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1853 self.phase_sink = Some(sink);
1854 }
1855
1856 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1858 if let Some(ref sink) = self.phase_sink {
1859 for item in items {
1860 if let Ok(value) = serde_json::to_value(item) {
1861 if let Err(e) = sink.emit(phase, type_name, &value) {
1862 warn!(
1863 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1864 );
1865 }
1866 }
1867 }
1868 if let Err(e) = sink.phase_complete(phase) {
1869 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1870 }
1871 }
1872 }
1873
1874 pub fn with_progress(mut self, show: bool) -> Self {
1876 self.phase_config.show_progress = show;
1877 if show {
1878 self.multi_progress = Some(MultiProgress::new());
1879 }
1880 self
1881 }
1882
1883 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1885 let path = path.into();
1886 self.output_path = Some(path.clone());
1887 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1889 self
1890 }
1891
1892 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1894 &self.country_pack_registry
1895 }
1896
1897 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1899 self.country_pack_registry.get_by_str(country)
1900 }
1901
1902 fn primary_country_code(&self) -> &str {
1905 self.config
1906 .companies
1907 .first()
1908 .map(|c| c.country.as_str())
1909 .unwrap_or("US")
1910 }
1911
1912 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1914 self.country_pack_for(self.primary_country_code())
1915 }
1916
1917 fn resolve_coa_framework(&self) -> CoAFramework {
1919 if self.config.accounting_standards.enabled {
1920 match self.config.accounting_standards.framework {
1921 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1922 return CoAFramework::FrenchPcg;
1923 }
1924 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1925 return CoAFramework::GermanSkr04;
1926 }
1927 _ => {}
1928 }
1929 }
1930 let pack = self.primary_pack();
1932 match pack.accounting.framework.as_str() {
1933 "french_gaap" => CoAFramework::FrenchPcg,
1934 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1935 _ => CoAFramework::UsGaap,
1936 }
1937 }
1938
1939 pub fn has_copulas(&self) -> bool {
1944 !self.copula_generators.is_empty()
1945 }
1946
1947 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1953 &self.copula_generators
1954 }
1955
1956 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1960 &mut self.copula_generators
1961 }
1962
1963 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1967 self.copula_generators
1968 .iter_mut()
1969 .find(|c| c.name == copula_name)
1970 .map(|c| c.generator.sample())
1971 }
1972
1973 pub fn from_fingerprint(
1996 fingerprint_path: &std::path::Path,
1997 phase_config: PhaseConfig,
1998 scale: f64,
1999 ) -> SynthResult<Self> {
2000 info!("Loading fingerprint from: {}", fingerprint_path.display());
2001
2002 let reader = FingerprintReader::new();
2004 let fingerprint = reader
2005 .read_from_file(fingerprint_path)
2006 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2007
2008 Self::from_fingerprint_data(fingerprint, phase_config, scale)
2009 }
2010
2011 pub fn from_fingerprint_data(
2018 fingerprint: Fingerprint,
2019 phase_config: PhaseConfig,
2020 scale: f64,
2021 ) -> SynthResult<Self> {
2022 info!(
2023 "Synthesizing config from fingerprint (version: {}, tables: {})",
2024 fingerprint.manifest.version,
2025 fingerprint.schema.tables.len()
2026 );
2027
2028 let seed: u64 = rand::random();
2030 info!("Fingerprint synthesis seed: {}", seed);
2031
2032 let options = SynthesisOptions {
2034 scale,
2035 seed: Some(seed),
2036 preserve_correlations: true,
2037 inject_anomalies: true,
2038 };
2039 let synthesizer = ConfigSynthesizer::with_options(options);
2040
2041 let synthesis_result = synthesizer
2043 .synthesize_full(&fingerprint, seed)
2044 .map_err(|e| {
2045 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2046 })?;
2047
2048 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2050 Self::base_config_for_industry(industry)
2051 } else {
2052 Self::base_config_for_industry("manufacturing")
2053 };
2054
2055 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2057
2058 info!(
2060 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2061 fingerprint.schema.tables.len(),
2062 scale,
2063 synthesis_result.copula_generators.len()
2064 );
2065
2066 if !synthesis_result.copula_generators.is_empty() {
2067 for spec in &synthesis_result.copula_generators {
2068 info!(
2069 " Copula '{}' for table '{}': {} columns",
2070 spec.name,
2071 spec.table,
2072 spec.columns.len()
2073 );
2074 }
2075 }
2076
2077 let mut orchestrator = Self::new(config, phase_config)?;
2079
2080 orchestrator.copula_generators = synthesis_result.copula_generators;
2082
2083 Ok(orchestrator)
2084 }
2085
2086 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2088 use datasynth_config::presets::create_preset;
2089 use datasynth_config::TransactionVolume;
2090 use datasynth_core::models::{CoAComplexity, IndustrySector};
2091
2092 let sector = match industry.to_lowercase().as_str() {
2093 "manufacturing" => IndustrySector::Manufacturing,
2094 "retail" => IndustrySector::Retail,
2095 "financial" | "financial_services" => IndustrySector::FinancialServices,
2096 "healthcare" => IndustrySector::Healthcare,
2097 "technology" | "tech" => IndustrySector::Technology,
2098 _ => IndustrySector::Manufacturing,
2099 };
2100
2101 create_preset(
2103 sector,
2104 1, 12, CoAComplexity::Medium,
2107 TransactionVolume::TenK,
2108 )
2109 }
2110
2111 fn apply_config_patch(
2113 mut config: GeneratorConfig,
2114 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2115 ) -> GeneratorConfig {
2116 use datasynth_fingerprint::synthesis::ConfigValue;
2117
2118 for (key, value) in patch.values() {
2119 match (key.as_str(), value) {
2120 ("transactions.count", ConfigValue::Integer(n)) => {
2123 info!(
2124 "Fingerprint suggests {} transactions (apply via company volumes)",
2125 n
2126 );
2127 }
2128 ("global.period_months", ConfigValue::Integer(n)) => {
2129 config.global.period_months = (*n).clamp(1, 120) as u32;
2130 }
2131 ("global.start_date", ConfigValue::String(s)) => {
2132 config.global.start_date = s.clone();
2133 }
2134 ("global.seed", ConfigValue::Integer(n)) => {
2135 config.global.seed = Some(*n as u64);
2136 }
2137 ("fraud.enabled", ConfigValue::Bool(b)) => {
2138 config.fraud.enabled = *b;
2139 }
2140 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2141 config.fraud.fraud_rate = *f;
2142 }
2143 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2144 config.data_quality.enabled = *b;
2145 }
2146 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2148 config.fraud.enabled = *b;
2149 }
2150 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2151 config.fraud.fraud_rate = *f;
2152 }
2153 _ => {
2154 debug!("Ignoring unknown config patch key: {}", key);
2155 }
2156 }
2157 }
2158
2159 config
2160 }
2161
2162 fn build_resource_guard(
2164 config: &GeneratorConfig,
2165 output_path: Option<PathBuf>,
2166 ) -> ResourceGuard {
2167 let mut builder = ResourceGuardBuilder::new();
2168
2169 if config.global.memory_limit_mb > 0 {
2171 builder = builder.memory_limit(config.global.memory_limit_mb);
2172 }
2173
2174 if let Some(path) = output_path {
2176 builder = builder.output_path(path).min_free_disk(100); }
2178
2179 builder = builder.conservative();
2181
2182 builder.build()
2183 }
2184
2185 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2190 self.resource_guard.check()
2191 }
2192
2193 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2195 let level = self.resource_guard.check()?;
2196
2197 if level != DegradationLevel::Normal {
2198 warn!(
2199 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2200 phase,
2201 level,
2202 self.resource_guard.current_memory_mb(),
2203 self.resource_guard.available_disk_mb()
2204 );
2205 }
2206
2207 Ok(level)
2208 }
2209
2210 fn get_degradation_actions(&self) -> DegradationActions {
2212 self.resource_guard.get_actions()
2213 }
2214
2215 fn check_memory_limit(&self) -> SynthResult<()> {
2217 self.check_resources()?;
2218 Ok(())
2219 }
2220
2221 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2223 info!("Starting enhanced generation workflow");
2224 info!(
2225 "Config: industry={:?}, period_months={}, companies={}",
2226 self.config.global.industry,
2227 self.config.global.period_months,
2228 self.config.companies.len()
2229 );
2230
2231 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2234 datasynth_core::serde_decimal::set_numeric_native(is_native);
2235 struct NumericModeGuard;
2236 impl Drop for NumericModeGuard {
2237 fn drop(&mut self) {
2238 datasynth_core::serde_decimal::set_numeric_native(false);
2239 }
2240 }
2241 let _numeric_guard = if is_native {
2242 Some(NumericModeGuard)
2243 } else {
2244 None
2245 };
2246
2247 let initial_level = self.check_resources_with_log("initial")?;
2249 if initial_level == DegradationLevel::Emergency {
2250 return Err(SynthError::resource(
2251 "Insufficient resources to start generation",
2252 ));
2253 }
2254
2255 let mut stats = EnhancedGenerationStatistics {
2256 companies_count: self.config.companies.len(),
2257 period_months: self.config.global.period_months,
2258 ..Default::default()
2259 };
2260
2261 let coa = self.phase_chart_of_accounts(&mut stats)?;
2263
2264 self.phase_master_data(&mut stats)?;
2266
2267 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2269 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2270 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2271
2272 let (mut document_flows, mut subledger, fa_journal_entries) =
2274 self.phase_document_flows(&mut stats)?;
2275
2276 self.emit_phase_items(
2278 "document_flows",
2279 "PurchaseOrder",
2280 &document_flows.purchase_orders,
2281 );
2282 self.emit_phase_items(
2283 "document_flows",
2284 "GoodsReceipt",
2285 &document_flows.goods_receipts,
2286 );
2287 self.emit_phase_items(
2288 "document_flows",
2289 "VendorInvoice",
2290 &document_flows.vendor_invoices,
2291 );
2292 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2293 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2294
2295 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2297
2298 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2303 .iter()
2304 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2305 .collect();
2306 if !opening_balance_jes.is_empty() {
2307 debug!(
2308 "Prepending {} opening balance JEs to entries",
2309 opening_balance_jes.len()
2310 );
2311 }
2312
2313 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2315
2316 if !opening_balance_jes.is_empty() {
2319 let mut combined = opening_balance_jes;
2320 combined.extend(entries);
2321 entries = combined;
2322 }
2323
2324 if !fa_journal_entries.is_empty() {
2326 debug!(
2327 "Appending {} FA acquisition JEs to main entries",
2328 fa_journal_entries.len()
2329 );
2330 entries.extend(fa_journal_entries);
2331 }
2332
2333 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2335
2336 let actions = self.get_degradation_actions();
2338
2339 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2341
2342 if !sourcing.contracts.is_empty() {
2345 let mut linked_count = 0usize;
2346 let po_vendor_pairs: Vec<(String, String)> = document_flows
2348 .p2p_chains
2349 .iter()
2350 .map(|chain| {
2351 (
2352 chain.purchase_order.vendor_id.clone(),
2353 chain.purchase_order.header.document_id.clone(),
2354 )
2355 })
2356 .collect();
2357
2358 for chain in &mut document_flows.p2p_chains {
2359 if chain.purchase_order.contract_id.is_none() {
2360 if let Some(contract) = sourcing
2361 .contracts
2362 .iter()
2363 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2364 {
2365 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2366 linked_count += 1;
2367 }
2368 }
2369 }
2370
2371 for contract in &mut sourcing.contracts {
2373 let po_ids: Vec<String> = po_vendor_pairs
2374 .iter()
2375 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2376 .map(|(_, po_id)| po_id.clone())
2377 .collect();
2378 if !po_ids.is_empty() {
2379 contract.purchase_order_ids = po_ids;
2380 }
2381 }
2382
2383 if linked_count > 0 {
2384 debug!(
2385 "Linked {} purchase orders to S2C contracts by vendor match",
2386 linked_count
2387 );
2388 }
2389 }
2390
2391 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2393
2394 if !intercompany.seller_journal_entries.is_empty()
2396 || !intercompany.buyer_journal_entries.is_empty()
2397 {
2398 let ic_je_count = intercompany.seller_journal_entries.len()
2399 + intercompany.buyer_journal_entries.len();
2400 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2401 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2402 debug!(
2403 "Appended {} IC journal entries to main entries",
2404 ic_je_count
2405 );
2406 }
2407
2408 if !intercompany.elimination_entries.is_empty() {
2410 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2411 &intercompany.elimination_entries,
2412 );
2413 if !elim_jes.is_empty() {
2414 debug!(
2415 "Appended {} elimination journal entries to main entries",
2416 elim_jes.len()
2417 );
2418 let elim_debit: rust_decimal::Decimal =
2420 elim_jes.iter().map(|je| je.total_debit()).sum();
2421 let elim_credit: rust_decimal::Decimal =
2422 elim_jes.iter().map(|je| je.total_credit()).sum();
2423 let elim_diff = (elim_debit - elim_credit).abs();
2424 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2426 return Err(datasynth_core::error::SynthError::generation(format!(
2427 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2428 elim_debit, elim_credit, elim_diff, tolerance
2429 )));
2430 }
2431 debug!(
2432 "IC elimination balance verified: debits={}, credits={} (diff={})",
2433 elim_debit, elim_credit, elim_diff
2434 );
2435 entries.extend(elim_jes);
2436 }
2437 }
2438
2439 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2441 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2442 document_flows
2443 .customer_invoices
2444 .extend(ic_docs.seller_invoices.iter().cloned());
2445 document_flows
2446 .purchase_orders
2447 .extend(ic_docs.buyer_orders.iter().cloned());
2448 document_flows
2449 .goods_receipts
2450 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2451 document_flows
2452 .vendor_invoices
2453 .extend(ic_docs.buyer_invoices.iter().cloned());
2454 debug!(
2455 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2456 ic_docs.seller_invoices.len(),
2457 ic_docs.buyer_orders.len(),
2458 ic_docs.buyer_goods_receipts.len(),
2459 ic_docs.buyer_invoices.len(),
2460 );
2461 }
2462 }
2463
2464 let hr = self.phase_hr_data(&mut stats)?;
2466
2467 if !hr.payroll_runs.is_empty() {
2469 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2470 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2471 entries.extend(payroll_jes);
2472 }
2473
2474 if !hr.pension_journal_entries.is_empty() {
2476 debug!(
2477 "Generated {} JEs from pension plans",
2478 hr.pension_journal_entries.len()
2479 );
2480 entries.extend(hr.pension_journal_entries.iter().cloned());
2481 }
2482
2483 if !hr.stock_comp_journal_entries.is_empty() {
2485 debug!(
2486 "Generated {} JEs from stock-based compensation",
2487 hr.stock_comp_journal_entries.len()
2488 );
2489 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2490 }
2491
2492 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2494
2495 if !manufacturing_snap.production_orders.is_empty() {
2497 let currency = self
2498 .config
2499 .companies
2500 .first()
2501 .map(|c| c.currency.as_str())
2502 .unwrap_or("USD");
2503 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2504 &manufacturing_snap.production_orders,
2505 &manufacturing_snap.quality_inspections,
2506 currency,
2507 );
2508 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2509 entries.extend(mfg_jes);
2510 }
2511
2512 if !manufacturing_snap.quality_inspections.is_empty() {
2514 let framework = match self.config.accounting_standards.framework {
2515 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2516 _ => "US_GAAP",
2517 };
2518 for company in &self.config.companies {
2519 let company_orders: Vec<_> = manufacturing_snap
2520 .production_orders
2521 .iter()
2522 .filter(|o| o.company_code == company.code)
2523 .cloned()
2524 .collect();
2525 let company_inspections: Vec<_> = manufacturing_snap
2526 .quality_inspections
2527 .iter()
2528 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2529 .cloned()
2530 .collect();
2531 if company_inspections.is_empty() {
2532 continue;
2533 }
2534 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2535 let warranty_result = warranty_gen.generate(
2536 &company.code,
2537 &company_orders,
2538 &company_inspections,
2539 &company.currency,
2540 framework,
2541 );
2542 if !warranty_result.journal_entries.is_empty() {
2543 debug!(
2544 "Generated {} warranty provision JEs for {}",
2545 warranty_result.journal_entries.len(),
2546 company.code
2547 );
2548 entries.extend(warranty_result.journal_entries);
2549 }
2550 }
2551 }
2552
2553 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2555 {
2556 let cogs_currency = self
2557 .config
2558 .companies
2559 .first()
2560 .map(|c| c.currency.as_str())
2561 .unwrap_or("USD");
2562 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2563 &document_flows.deliveries,
2564 &manufacturing_snap.production_orders,
2565 cogs_currency,
2566 );
2567 if !cogs_jes.is_empty() {
2568 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2569 entries.extend(cogs_jes);
2570 }
2571 }
2572
2573 if !manufacturing_snap.inventory_movements.is_empty()
2579 && !subledger.inventory_positions.is_empty()
2580 {
2581 use datasynth_core::models::MovementType as MfgMovementType;
2582 let mut receipt_count = 0usize;
2583 let mut issue_count = 0usize;
2584 for movement in &manufacturing_snap.inventory_movements {
2585 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2587 p.material_id == movement.material_code
2588 && p.company_code == movement.entity_code
2589 }) {
2590 match movement.movement_type {
2591 MfgMovementType::GoodsReceipt => {
2592 pos.add_quantity(
2594 movement.quantity,
2595 movement.value,
2596 movement.movement_date,
2597 );
2598 receipt_count += 1;
2599 }
2600 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2601 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2603 issue_count += 1;
2604 }
2605 _ => {}
2606 }
2607 }
2608 }
2609 debug!(
2610 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2611 manufacturing_snap.inventory_movements.len(),
2612 receipt_count,
2613 issue_count,
2614 );
2615 }
2616
2617 if !entries.is_empty() {
2620 stats.total_entries = entries.len() as u64;
2621 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2622 debug!(
2623 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2624 stats.total_entries, stats.total_line_items
2625 );
2626 }
2627
2628 if self.config.internal_controls.enabled && !entries.is_empty() {
2630 info!("Phase 7b: Applying internal controls to journal entries");
2631 let control_config = ControlGeneratorConfig {
2632 exception_rate: self.config.internal_controls.exception_rate,
2633 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2634 enable_sox_marking: true,
2635 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2636 self.config.internal_controls.sox_materiality_threshold,
2637 )
2638 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2639 ..Default::default()
2640 };
2641 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2642 for entry in &mut entries {
2643 control_gen.apply_controls(entry, &coa);
2644 }
2645 let with_controls = entries
2646 .iter()
2647 .filter(|e| !e.header.control_ids.is_empty())
2648 .count();
2649 info!(
2650 "Applied controls to {} entries ({} with control IDs assigned)",
2651 entries.len(),
2652 with_controls
2653 );
2654 }
2655
2656 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2660 .iter()
2661 .filter(|e| e.header.sod_violation)
2662 .filter_map(|e| {
2663 e.header.sod_conflict_type.map(|ct| {
2664 use datasynth_core::models::{RiskLevel, SodViolation};
2665 let severity = match ct {
2666 datasynth_core::models::SodConflictType::PaymentReleaser
2667 | datasynth_core::models::SodConflictType::RequesterApprover => {
2668 RiskLevel::Critical
2669 }
2670 datasynth_core::models::SodConflictType::PreparerApprover
2671 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2672 | datasynth_core::models::SodConflictType::JournalEntryPoster
2673 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2674 RiskLevel::High
2675 }
2676 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2677 RiskLevel::Medium
2678 }
2679 };
2680 let action = format!(
2681 "SoD conflict {:?} on entry {} ({})",
2682 ct, e.header.document_id, e.header.company_code
2683 );
2684 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2685 })
2686 })
2687 .collect();
2688 if !sod_violations.is_empty() {
2689 info!(
2690 "Phase 7c: Extracted {} SoD violations from {} entries",
2691 sod_violations.len(),
2692 entries.len()
2693 );
2694 }
2695
2696 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2698
2699 {
2707 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2708 if self.config.fraud.enabled && doc_rate > 0.0 {
2709 use datasynth_core::fraud_propagation::{
2710 inject_document_fraud, propagate_documents_to_entries,
2711 };
2712 use datasynth_core::utils::weighted_select;
2713 use datasynth_core::FraudType;
2714 use rand_chacha::rand_core::SeedableRng;
2715
2716 let dist = &self.config.fraud.fraud_type_distribution;
2717 let fraud_type_weights: [(FraudType, f64); 8] = [
2718 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2719 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2720 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2721 (
2722 FraudType::ImproperCapitalization,
2723 dist.expense_capitalization,
2724 ),
2725 (FraudType::SplitTransaction, dist.split_transaction),
2726 (FraudType::TimingAnomaly, dist.timing_anomaly),
2727 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2728 (FraudType::DuplicatePayment, dist.duplicate_payment),
2729 ];
2730 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2731 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2732 if weights_sum <= 0.0 {
2733 FraudType::FictitiousEntry
2734 } else {
2735 *weighted_select(rng, &fraud_type_weights)
2736 }
2737 };
2738
2739 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2740 let mut doc_tagged = 0usize;
2741 macro_rules! inject_into {
2742 ($collection:expr) => {{
2743 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2744 $collection.iter_mut().map(|d| &mut d.header).collect();
2745 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2746 }};
2747 }
2748 inject_into!(document_flows.purchase_orders);
2749 inject_into!(document_flows.goods_receipts);
2750 inject_into!(document_flows.vendor_invoices);
2751 inject_into!(document_flows.payments);
2752 inject_into!(document_flows.sales_orders);
2753 inject_into!(document_flows.deliveries);
2754 inject_into!(document_flows.customer_invoices);
2755 if doc_tagged > 0 {
2756 info!(
2757 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2758 );
2759 }
2760
2761 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2762 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2763 Vec::new();
2764 headers.extend(
2765 document_flows
2766 .purchase_orders
2767 .iter()
2768 .map(|d| d.header.clone()),
2769 );
2770 headers.extend(
2771 document_flows
2772 .goods_receipts
2773 .iter()
2774 .map(|d| d.header.clone()),
2775 );
2776 headers.extend(
2777 document_flows
2778 .vendor_invoices
2779 .iter()
2780 .map(|d| d.header.clone()),
2781 );
2782 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2783 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2784 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2785 headers.extend(
2786 document_flows
2787 .customer_invoices
2788 .iter()
2789 .map(|d| d.header.clone()),
2790 );
2791 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2792 if propagated > 0 {
2793 info!(
2794 "Propagated document-level fraud to {propagated} derived journal entries"
2795 );
2796 }
2797 }
2798 }
2799 }
2800
2801 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2803
2804 {
2822 use datasynth_core::fraud_bias::{
2823 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2824 };
2825 use rand_chacha::rand_core::SeedableRng;
2826 let cfg = FraudBehavioralBiasConfig::default();
2827 if cfg.enabled {
2828 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2829 let mut swept = 0usize;
2830 for entry in entries.iter_mut() {
2831 if entry.header.is_fraud && !entry.header.is_anomaly {
2832 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2833 swept += 1;
2834 }
2835 }
2836 if swept > 0 {
2837 info!(
2838 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2839 (doc-propagated + je_generator intrinsic fraud)"
2840 );
2841 }
2842 }
2843 }
2844
2845 self.emit_phase_items(
2847 "anomaly_injection",
2848 "LabeledAnomaly",
2849 &anomaly_labels.labels,
2850 );
2851
2852 if self.config.fraud.propagate_to_document {
2860 use std::collections::HashMap;
2861 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2874 for je in &entries {
2875 if je.header.is_fraud {
2876 if let Some(ref fraud_type) = je.header.fraud_type {
2877 if let Some(ref reference) = je.header.reference {
2878 fraud_map.insert(reference.clone(), *fraud_type);
2880 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2883 if !bare.is_empty() {
2884 fraud_map.insert(bare.to_string(), *fraud_type);
2885 }
2886 }
2887 }
2888 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2890 }
2891 }
2892 }
2893 if !fraud_map.is_empty() {
2894 let mut propagated = 0usize;
2895 macro_rules! propagate_to {
2897 ($collection:expr) => {
2898 for doc in &mut $collection {
2899 if doc.header.propagate_fraud(&fraud_map) {
2900 propagated += 1;
2901 }
2902 }
2903 };
2904 }
2905 propagate_to!(document_flows.purchase_orders);
2906 propagate_to!(document_flows.goods_receipts);
2907 propagate_to!(document_flows.vendor_invoices);
2908 propagate_to!(document_flows.payments);
2909 propagate_to!(document_flows.sales_orders);
2910 propagate_to!(document_flows.deliveries);
2911 propagate_to!(document_flows.customer_invoices);
2912 if propagated > 0 {
2913 info!(
2914 "Propagated fraud labels to {} document flow records",
2915 propagated
2916 );
2917 }
2918 }
2919 }
2920
2921 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2923
2924 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2926
2927 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2929
2930 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2932
2933 let balance_validation = self.phase_balance_validation(&entries)?;
2935
2936 self.validate_coa_coverage(&entries, coa.as_ref())?;
2940
2941 let subledger_reconciliation =
2943 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2944
2945 let (data_quality_stats, quality_issues) =
2947 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2948
2949 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2951
2952 {
2954 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2959 for je in &entries {
2960 if je.header.is_fraud || je.header.is_anomaly {
2961 continue;
2962 }
2963 let diff = (je.total_debit() - je.total_credit()).abs();
2964 if diff > tolerance {
2965 unbalanced_clean += 1;
2966 if unbalanced_clean <= 3 {
2967 warn!(
2968 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2969 je.header.document_id,
2970 je.total_debit(),
2971 je.total_credit(),
2972 diff
2973 );
2974 }
2975 }
2976 }
2977 if unbalanced_clean > 0 {
2978 return Err(datasynth_core::error::SynthError::generation(format!(
2979 "{} non-anomaly JEs are unbalanced (debits != credits). \
2980 First few logged above. Tolerance={}",
2981 unbalanced_clean, tolerance
2982 )));
2983 }
2984 debug!(
2985 "Phase 10c: All {} non-anomaly JEs individually balanced",
2986 entries
2987 .iter()
2988 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2989 .count()
2990 );
2991
2992 let company_codes: Vec<String> = self
2994 .config
2995 .companies
2996 .iter()
2997 .map(|c| c.code.clone())
2998 .collect();
2999 for company_code in &company_codes {
3000 let mut assets = rust_decimal::Decimal::ZERO;
3001 let mut liab_equity = rust_decimal::Decimal::ZERO;
3002
3003 for entry in &entries {
3004 if entry.header.company_code != *company_code {
3005 continue;
3006 }
3007 for line in &entry.lines {
3008 let acct = &line.gl_account;
3009 let net = line.debit_amount - line.credit_amount;
3010 if acct.starts_with('1') {
3012 assets += net;
3013 }
3014 else if acct.starts_with('2') || acct.starts_with('3') {
3016 liab_equity -= net; }
3018 }
3021 }
3022
3023 let bs_diff = (assets - liab_equity).abs();
3024 if bs_diff > tolerance {
3025 warn!(
3026 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3027 revenue/expense closing entries may not fully offset",
3028 company_code, assets, liab_equity, bs_diff
3029 );
3030 } else {
3034 debug!(
3035 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3036 company_code, assets, liab_equity, bs_diff
3037 );
3038 }
3039 }
3040
3041 info!("Phase 10c: All generation-time accounting assertions passed");
3042 }
3043
3044 let audit = self.phase_audit_data(&entries, &mut stats)?;
3046
3047 let mut banking = self.phase_banking_data(&mut stats)?;
3049
3050 if self.phase_config.generate_banking
3055 && !document_flows.payments.is_empty()
3056 && !banking.accounts.is_empty()
3057 {
3058 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3059 if bridge_rate > 0.0 {
3060 let mut bridge =
3061 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3062 self.seed,
3063 );
3064 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3065 &document_flows.payments,
3066 &banking.customers,
3067 &banking.accounts,
3068 bridge_rate,
3069 );
3070 info!(
3071 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3072 bridge_stats.bridged_count,
3073 bridge_stats.transactions_emitted,
3074 bridge_stats.fraud_propagated,
3075 );
3076 let bridged_count = bridged_txns.len();
3077 banking.transactions.extend(bridged_txns);
3078
3079 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3082 datasynth_banking::generators::velocity_computer::compute_velocity_features(
3083 &mut banking.transactions,
3084 );
3085 }
3086
3087 banking.suspicious_count = banking
3089 .transactions
3090 .iter()
3091 .filter(|t| t.is_suspicious)
3092 .count();
3093 stats.banking_transaction_count = banking.transactions.len();
3094 stats.banking_suspicious_count = banking.suspicious_count;
3095 }
3096 }
3097
3098 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3100
3101 self.phase_llm_enrichment(&mut stats);
3103
3104 self.phase_diffusion_enhancement(&entries, &mut stats);
3106
3107 self.phase_causal_overlay(&mut stats);
3109
3110 let mut financial_reporting = self.phase_financial_reporting(
3114 &document_flows,
3115 &entries,
3116 &coa,
3117 &hr,
3118 &audit,
3119 &mut stats,
3120 )?;
3121
3122 {
3124 use datasynth_core::models::StatementType;
3125 for stmt in &financial_reporting.consolidated_statements {
3126 if stmt.statement_type == StatementType::BalanceSheet {
3127 let total_assets: rust_decimal::Decimal = stmt
3128 .line_items
3129 .iter()
3130 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3131 .map(|li| li.amount)
3132 .sum();
3133 let total_le: rust_decimal::Decimal = stmt
3134 .line_items
3135 .iter()
3136 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3137 .map(|li| li.amount)
3138 .sum();
3139 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3140 warn!(
3141 "BS equation imbalance: assets={}, L+E={}",
3142 total_assets, total_le
3143 );
3144 }
3145 }
3146 }
3147 }
3148
3149 let accounting_standards =
3151 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3152
3153 if !accounting_standards.ecl_journal_entries.is_empty() {
3155 debug!(
3156 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3157 accounting_standards.ecl_journal_entries.len()
3158 );
3159 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3160 }
3161
3162 if !accounting_standards.provision_journal_entries.is_empty() {
3164 debug!(
3165 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3166 accounting_standards.provision_journal_entries.len()
3167 );
3168 entries.extend(
3169 accounting_standards
3170 .provision_journal_entries
3171 .iter()
3172 .cloned(),
3173 );
3174 }
3175
3176 let mut ocpm = self.phase_ocpm_events(
3178 &document_flows,
3179 &sourcing,
3180 &hr,
3181 &manufacturing_snap,
3182 &banking,
3183 &audit,
3184 &financial_reporting,
3185 &mut stats,
3186 )?;
3187
3188 if let Some(ref event_log) = ocpm.event_log {
3190 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3191 }
3192
3193 if let Some(ref event_log) = ocpm.event_log {
3195 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3197 std::collections::HashMap::new();
3198 for (idx, event) in event_log.events.iter().enumerate() {
3199 if let Some(ref doc_ref) = event.document_ref {
3200 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3201 }
3202 }
3203
3204 if !doc_index.is_empty() {
3205 let mut annotated = 0usize;
3206 for entry in &mut entries {
3207 let doc_id_str = entry.header.document_id.to_string();
3208 let mut matched_indices: Vec<usize> = Vec::new();
3210 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3211 matched_indices.extend(indices);
3212 }
3213 if let Some(ref reference) = entry.header.reference {
3214 let bare_ref = reference
3215 .find(':')
3216 .map(|i| &reference[i + 1..])
3217 .unwrap_or(reference.as_str());
3218 if let Some(indices) = doc_index.get(bare_ref) {
3219 for &idx in indices {
3220 if !matched_indices.contains(&idx) {
3221 matched_indices.push(idx);
3222 }
3223 }
3224 }
3225 }
3226 if !matched_indices.is_empty() {
3228 for &idx in &matched_indices {
3229 let event = &event_log.events[idx];
3230 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3231 entry.header.ocpm_event_ids.push(event.event_id);
3232 }
3233 for obj_ref in &event.object_refs {
3234 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3235 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3236 }
3237 }
3238 if entry.header.ocpm_case_id.is_none() {
3239 entry.header.ocpm_case_id = event.case_id;
3240 }
3241 }
3242 annotated += 1;
3243 }
3244 }
3245 debug!(
3246 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3247 annotated
3248 );
3249 }
3250 }
3251
3252 if let Some(ref mut event_log) = ocpm.event_log {
3256 let synthesized =
3257 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3258 if synthesized > 0 {
3259 info!(
3260 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3261 );
3262 }
3263
3264 let anomaly_events =
3269 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3270 if anomaly_events > 0 {
3271 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3272 }
3273
3274 let p2p_cfg = &self.config.ocpm.p2p_process;
3279 let any_imperfection = p2p_cfg.rework_probability > 0.0
3280 || p2p_cfg.skip_step_probability > 0.0
3281 || p2p_cfg.out_of_order_probability > 0.0;
3282 if any_imperfection {
3283 use rand_chacha::rand_core::SeedableRng;
3284 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3285 rework_rate: p2p_cfg.rework_probability,
3286 skip_rate: p2p_cfg.skip_step_probability,
3287 out_of_order_rate: p2p_cfg.out_of_order_probability,
3288 };
3289 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3290 let stats =
3291 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3292 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3293 info!(
3294 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3295 stats.rework, stats.skipped, stats.out_of_order
3296 );
3297 }
3298 }
3299 }
3300
3301 let sales_kpi_budgets =
3303 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3304
3305 let treasury =
3309 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3310
3311 if !treasury.journal_entries.is_empty() {
3313 debug!(
3314 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3315 treasury.journal_entries.len()
3316 );
3317 entries.extend(treasury.journal_entries.iter().cloned());
3318 }
3319
3320 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3322
3323 if !tax.tax_posting_journal_entries.is_empty() {
3325 debug!(
3326 "Merging {} tax posting JEs into GL",
3327 tax.tax_posting_journal_entries.len()
3328 );
3329 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3330 }
3331
3332 {
3350 use datasynth_core::fraud_bias::{
3351 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3352 };
3353 use rand_chacha::rand_core::SeedableRng;
3354 let cfg = FraudBehavioralBiasConfig::default();
3355 if cfg.enabled {
3356 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3357 let mut swept = 0usize;
3358 for entry in entries.iter_mut() {
3359 if entry.header.is_fraud && !entry.header.is_anomaly {
3360 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3361 swept += 1;
3362 }
3363 }
3364 if swept > 0 {
3365 info!(
3366 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3367 non-anomaly fraud entries (covers late-added JEs from \
3368 ECL / provisions / treasury / tax / period-close)"
3369 );
3370 }
3371 }
3372 }
3373
3374 {
3378 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3379
3380 let framework_str = {
3381 use datasynth_config::schema::AccountingFrameworkConfig;
3382 match self
3383 .config
3384 .accounting_standards
3385 .framework
3386 .unwrap_or_default()
3387 {
3388 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3389 "IFRS"
3390 }
3391 _ => "US_GAAP",
3392 }
3393 };
3394
3395 let depreciation_total: rust_decimal::Decimal = entries
3397 .iter()
3398 .filter(|je| je.header.document_type == "CL")
3399 .flat_map(|je| je.lines.iter())
3400 .filter(|l| l.gl_account.starts_with("6000"))
3401 .map(|l| l.debit_amount)
3402 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3403
3404 let interest_paid: rust_decimal::Decimal = entries
3406 .iter()
3407 .flat_map(|je| je.lines.iter())
3408 .filter(|l| l.gl_account.starts_with("7100"))
3409 .map(|l| l.debit_amount)
3410 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3411
3412 let tax_paid: rust_decimal::Decimal = entries
3414 .iter()
3415 .flat_map(|je| je.lines.iter())
3416 .filter(|l| l.gl_account.starts_with("8000"))
3417 .map(|l| l.debit_amount)
3418 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3419
3420 let capex: rust_decimal::Decimal = entries
3422 .iter()
3423 .flat_map(|je| je.lines.iter())
3424 .filter(|l| l.gl_account.starts_with("1500"))
3425 .map(|l| l.debit_amount)
3426 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3427
3428 let dividends_paid: rust_decimal::Decimal = entries
3430 .iter()
3431 .flat_map(|je| je.lines.iter())
3432 .filter(|l| l.gl_account == "2170")
3433 .map(|l| l.debit_amount)
3434 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3435
3436 let cf_data = CashFlowSourceData {
3437 depreciation_total,
3438 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3440 delta_ap: rust_decimal::Decimal::ZERO,
3441 delta_inventory: rust_decimal::Decimal::ZERO,
3442 capex,
3443 debt_issuance: rust_decimal::Decimal::ZERO,
3444 debt_repayment: rust_decimal::Decimal::ZERO,
3445 interest_paid,
3446 tax_paid,
3447 dividends_paid,
3448 framework: framework_str.to_string(),
3449 };
3450
3451 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3452 if !enhanced_cf_items.is_empty() {
3453 use datasynth_core::models::StatementType;
3455 let merge_count = enhanced_cf_items.len();
3456 for stmt in financial_reporting
3457 .financial_statements
3458 .iter_mut()
3459 .chain(financial_reporting.consolidated_statements.iter_mut())
3460 .chain(
3461 financial_reporting
3462 .standalone_statements
3463 .values_mut()
3464 .flat_map(|v| v.iter_mut()),
3465 )
3466 {
3467 if stmt.statement_type == StatementType::CashFlowStatement {
3468 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3469 }
3470 }
3471 info!(
3472 "Enhanced cash flow: {} supplementary items merged into CF statements",
3473 merge_count
3474 );
3475 }
3476 }
3477
3478 self.generate_notes_to_financial_statements(
3481 &mut financial_reporting,
3482 &accounting_standards,
3483 &tax,
3484 &hr,
3485 &audit,
3486 &treasury,
3487 );
3488
3489 if self.config.companies.len() >= 2 && !entries.is_empty() {
3493 let companies: Vec<(String, String)> = self
3494 .config
3495 .companies
3496 .iter()
3497 .map(|c| (c.code.clone(), c.name.clone()))
3498 .collect();
3499 let ic_elim: rust_decimal::Decimal =
3500 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3501 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3502 .unwrap_or(NaiveDate::MIN);
3503 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3504 let period_label = format!(
3505 "{}-{:02}",
3506 end_date.year(),
3507 (end_date - chrono::Days::new(1)).month()
3508 );
3509
3510 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3511 let (je_segments, je_recon) =
3512 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3513 if !je_segments.is_empty() {
3514 info!(
3515 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3516 je_segments.len(),
3517 ic_elim,
3518 );
3519 if financial_reporting.segment_reports.is_empty() {
3521 financial_reporting.segment_reports = je_segments;
3522 financial_reporting.segment_reconciliations = vec![je_recon];
3523 } else {
3524 financial_reporting.segment_reports.extend(je_segments);
3525 financial_reporting.segment_reconciliations.push(je_recon);
3526 }
3527 }
3528 }
3529
3530 let esg_snap =
3532 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3533
3534 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3536
3537 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3539
3540 let disruption_events = self.phase_disruption_events(&mut stats)?;
3542
3543 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3545
3546 let (entity_relationship_graph, cross_process_links) =
3548 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3549
3550 let industry_output = self.phase_industry_data(&mut stats);
3552
3553 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3555
3556 if self.config.diffusion.enabled
3574 && (self.config.diffusion.backend == "neural"
3575 || self.config.diffusion.backend == "hybrid")
3576 {
3577 let neural = &self.config.diffusion.neural;
3578 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3579 stats.neural_hybrid_weight = Some(weight);
3580 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3581 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3582 warn!(
3583 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3584 the neural/hybrid training path is not yet shipped. Config \
3585 is captured in stats (weight={weight:.2}, strategy={}, \
3586 columns={}) but no neural training runs. Statistical \
3587 diffusion (backend='statistical') continues to work.",
3588 self.config.diffusion.backend,
3589 neural.hybrid_strategy,
3590 neural.neural_columns.len(),
3591 );
3592 }
3593
3594 self.phase_hypergraph_export(
3596 &coa,
3597 &entries,
3598 &document_flows,
3599 &sourcing,
3600 &hr,
3601 &manufacturing_snap,
3602 &banking,
3603 &audit,
3604 &financial_reporting,
3605 &ocpm,
3606 &compliance_regulations,
3607 &mut stats,
3608 )?;
3609
3610 if self.phase_config.generate_graph_export {
3613 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3614 }
3615
3616 if self.config.streaming.enabled {
3618 info!("Note: streaming config is enabled but batch mode does not use it");
3619 }
3620 if self.config.vendor_network.enabled {
3621 debug!("Vendor network config available; relationship graph generation is partial");
3622 }
3623 if self.config.customer_segmentation.enabled {
3624 debug!("Customer segmentation config available; segment-aware generation is partial");
3625 }
3626
3627 let resource_stats = self.resource_guard.stats();
3629 info!(
3630 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3631 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3632 resource_stats.disk.estimated_bytes_written,
3633 resource_stats.degradation_level
3634 );
3635
3636 if let Some(ref sink) = self.phase_sink {
3638 if let Err(e) = sink.flush() {
3639 warn!("Stream sink flush failed: {e}");
3640 }
3641 }
3642
3643 let lineage = self.build_lineage_graph();
3645
3646 let gate_result = if self.config.quality_gates.enabled {
3648 let profile_name = &self.config.quality_gates.profile;
3649 match datasynth_eval::gates::get_profile(profile_name) {
3650 Some(profile) => {
3651 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3653
3654 if balance_validation.validated {
3656 eval.coherence.balance =
3657 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3658 equation_balanced: balance_validation.is_balanced,
3659 max_imbalance: (balance_validation.total_debits
3660 - balance_validation.total_credits)
3661 .abs(),
3662 periods_evaluated: 1,
3663 periods_imbalanced: if balance_validation.is_balanced {
3664 0
3665 } else {
3666 1
3667 },
3668 period_results: Vec::new(),
3669 companies_evaluated: self.config.companies.len(),
3670 });
3671 }
3672
3673 eval.coherence.passes = balance_validation.is_balanced;
3675 if !balance_validation.is_balanced {
3676 eval.coherence
3677 .failures
3678 .push("Balance sheet equation not satisfied".to_string());
3679 }
3680
3681 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3683 eval.statistical.passes = !entries.is_empty();
3684
3685 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3688
3689 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3690 info!(
3691 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3692 profile_name, result.gates_passed, result.gates_total, result.summary
3693 );
3694 Some(result)
3695 }
3696 None => {
3697 warn!(
3698 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3699 profile_name
3700 );
3701 None
3702 }
3703 }
3704 } else {
3705 None
3706 };
3707
3708 let internal_controls = if self.config.internal_controls.enabled {
3710 InternalControl::standard_controls()
3711 } else {
3712 Vec::new()
3713 };
3714
3715 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3719
3720 let statistical_validation = self.phase_statistical_validation(&entries)?;
3725
3726 let interconnectivity = self.phase_interconnectivity();
3730
3731 Ok(EnhancedGenerationResult {
3732 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3733 master_data: std::mem::take(&mut self.master_data),
3734 document_flows,
3735 subledger,
3736 ocpm,
3737 audit,
3738 banking,
3739 graph_export,
3740 sourcing,
3741 financial_reporting,
3742 hr,
3743 accounting_standards,
3744 manufacturing: manufacturing_snap,
3745 sales_kpi_budgets,
3746 tax,
3747 esg: esg_snap,
3748 treasury,
3749 project_accounting,
3750 process_evolution,
3751 organizational_events,
3752 disruption_events,
3753 intercompany,
3754 journal_entries: entries,
3755 anomaly_labels,
3756 balance_validation,
3757 data_quality_stats,
3758 quality_issues,
3759 statistics: stats,
3760 lineage: Some(lineage),
3761 gate_result,
3762 internal_controls,
3763 sod_violations,
3764 opening_balances,
3765 subledger_reconciliation,
3766 counterfactual_pairs,
3767 red_flags,
3768 collusion_rings,
3769 temporal_vendor_chains,
3770 entity_relationship_graph,
3771 cross_process_links,
3772 industry_output,
3773 compliance_regulations,
3774 analytics_metadata,
3775 statistical_validation,
3776 interconnectivity,
3777 })
3778 }
3779
3780 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3784 use rand::{RngExt, SeedableRng};
3785 use rand_chacha::ChaCha8Rng;
3786
3787 let mut snap = InterconnectivitySnapshot::default();
3788 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3789
3790 let vn = &self.config.vendor_network;
3792 if vn.enabled {
3793 let total = self.master_data.vendors.len();
3794 if total > 0 {
3795 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3796 let remaining_after_t1 = total.saturating_sub(tier1_count);
3797 let depth = vn.depth.clamp(1, 3);
3798 let tier2_count = if depth >= 2 {
3799 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3800 (tier1_count * avg).min(remaining_after_t1)
3801 } else {
3802 0
3803 };
3804 let tier3_count = total
3805 .saturating_sub(tier1_count)
3806 .saturating_sub(tier2_count);
3807
3808 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3809 let tier = if idx < tier1_count {
3810 1
3811 } else if idx < tier1_count + tier2_count {
3812 2
3813 } else {
3814 3
3815 };
3816 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3817
3818 let cl = &vn.clusters;
3820 let roll: f64 = rng.random();
3821 let cluster = if roll < cl.reliable_strategic {
3822 "reliable_strategic"
3823 } else if roll < cl.reliable_strategic + cl.standard_operational {
3824 "standard_operational"
3825 } else if roll
3826 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3827 {
3828 "transactional"
3829 } else {
3830 "problematic"
3831 };
3832 snap.vendor_clusters
3833 .push((vendor.vendor_id.clone(), cluster.to_string()));
3834 }
3835 let _ = tier3_count; }
3837 }
3838
3839 let cs = &self.config.customer_segmentation;
3841 if cs.enabled {
3842 let seg = &cs.value_segments;
3843 for customer in &self.master_data.customers {
3844 let roll: f64 = rng.random();
3845 let value_segment = if roll < seg.enterprise.customer_share {
3846 "enterprise"
3847 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3848 "mid_market"
3849 } else if roll
3850 < seg.enterprise.customer_share
3851 + seg.mid_market.customer_share
3852 + seg.smb.customer_share
3853 {
3854 "smb"
3855 } else {
3856 "consumer"
3857 };
3858 snap.customer_value_segments
3859 .push((customer.customer_id.clone(), value_segment.to_string()));
3860
3861 let roll2: f64 = rng.random();
3862 let life = &cs.lifecycle;
3863 let lifecycle = if roll2 < life.prospect_rate {
3864 "prospect"
3865 } else if roll2 < life.prospect_rate + life.new_rate {
3866 "new"
3867 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3868 "growth"
3869 } else if roll2
3870 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3871 {
3872 "mature"
3873 } else if roll2
3874 < life.prospect_rate
3875 + life.new_rate
3876 + life.growth_rate
3877 + life.mature_rate
3878 + life.at_risk_rate
3879 {
3880 "at_risk"
3881 } else if roll2
3882 < life.prospect_rate
3883 + life.new_rate
3884 + life.growth_rate
3885 + life.mature_rate
3886 + life.at_risk_rate
3887 + life.churned_rate
3888 {
3889 "churned"
3890 } else {
3891 "won_back"
3892 };
3893 snap.customer_lifecycle_stages
3894 .push((customer.customer_id.clone(), lifecycle.to_string()));
3895 }
3896 }
3897
3898 let is = &self.config.industry_specific;
3900 if is.enabled {
3901 snap.industry_metadata.push(format!(
3902 "industry_specific.enabled=true (industry={:?})",
3903 self.config.global.industry
3904 ));
3905 }
3906
3907 snap
3908 }
3909
3910 fn phase_chart_of_accounts(
3916 &mut self,
3917 stats: &mut EnhancedGenerationStatistics,
3918 ) -> SynthResult<Arc<ChartOfAccounts>> {
3919 info!("Phase 1: Generating Chart of Accounts");
3920 let coa = self.generate_coa()?;
3921 stats.accounts_count = coa.account_count();
3922 info!(
3923 "Chart of Accounts generated: {} accounts",
3924 stats.accounts_count
3925 );
3926 self.check_resources_with_log("post-coa")?;
3927 Ok(coa)
3928 }
3929
3930 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3932 if self.phase_config.generate_master_data {
3933 info!("Phase 2: Generating Master Data");
3934 self.generate_master_data()?;
3935 stats.vendor_count = self.master_data.vendors.len();
3936 stats.customer_count = self.master_data.customers.len();
3937 stats.material_count = self.master_data.materials.len();
3938 stats.asset_count = self.master_data.assets.len();
3939 stats.employee_count = self.master_data.employees.len();
3940 info!(
3941 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3942 stats.vendor_count, stats.customer_count, stats.material_count,
3943 stats.asset_count, stats.employee_count
3944 );
3945 self.check_resources_with_log("post-master-data")?;
3946 } else {
3947 debug!("Phase 2: Skipped (master data generation disabled)");
3948 }
3949 Ok(())
3950 }
3951
3952 fn phase_document_flows(
3954 &mut self,
3955 stats: &mut EnhancedGenerationStatistics,
3956 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3957 let mut document_flows = DocumentFlowSnapshot::default();
3958 let mut subledger = SubledgerSnapshot::default();
3959 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3962
3963 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3964 info!("Phase 3: Generating Document Flows");
3965 self.generate_document_flows(&mut document_flows)?;
3966 stats.p2p_chain_count = document_flows.p2p_chains.len();
3967 stats.o2c_chain_count = document_flows.o2c_chains.len();
3968 info!(
3969 "Document flows generated: {} P2P chains, {} O2C chains",
3970 stats.p2p_chain_count, stats.o2c_chain_count
3971 );
3972
3973 debug!("Phase 3b: Linking document flows to subledgers");
3975 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3976 stats.ap_invoice_count = subledger.ap_invoices.len();
3977 stats.ar_invoice_count = subledger.ar_invoices.len();
3978 debug!(
3979 "Subledgers linked: {} AP invoices, {} AR invoices",
3980 stats.ap_invoice_count, stats.ar_invoice_count
3981 );
3982
3983 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3988 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3989 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3990 debug!("Payment settlements applied to AP and AR subledgers");
3991
3992 if let Ok(start_date) =
3995 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3996 {
3997 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3998 - chrono::Days::new(1);
3999 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4000 for company in &self.config.companies {
4007 let ar_report = ARAgingReport::from_invoices(
4008 company.code.clone(),
4009 &subledger.ar_invoices,
4010 as_of_date,
4011 );
4012 subledger.ar_aging_reports.push(ar_report);
4013
4014 let ap_report = APAgingReport::from_invoices(
4015 company.code.clone(),
4016 &subledger.ap_invoices,
4017 as_of_date,
4018 );
4019 subledger.ap_aging_reports.push(ap_report);
4020 }
4021 debug!(
4022 "AR/AP aging reports built: {} AR, {} AP",
4023 subledger.ar_aging_reports.len(),
4024 subledger.ap_aging_reports.len()
4025 );
4026
4027 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4029 {
4030 use datasynth_generators::DunningGenerator;
4031 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4032 for company in &self.config.companies {
4033 let currency = company.currency.as_str();
4034 let mut company_invoices: Vec<
4037 datasynth_core::models::subledger::ar::ARInvoice,
4038 > = subledger
4039 .ar_invoices
4040 .iter()
4041 .filter(|inv| inv.company_code == company.code)
4042 .cloned()
4043 .collect();
4044
4045 if company_invoices.is_empty() {
4046 continue;
4047 }
4048
4049 let result = dunning_gen.execute_dunning_run(
4050 &company.code,
4051 as_of_date,
4052 &mut company_invoices,
4053 currency,
4054 );
4055
4056 for updated in &company_invoices {
4058 if let Some(orig) = subledger
4059 .ar_invoices
4060 .iter_mut()
4061 .find(|i| i.invoice_number == updated.invoice_number)
4062 {
4063 orig.dunning_info = updated.dunning_info.clone();
4064 }
4065 }
4066
4067 subledger.dunning_runs.push(result.dunning_run);
4068 subledger.dunning_letters.extend(result.letters);
4069 dunning_journal_entries.extend(result.journal_entries);
4071 }
4072 debug!(
4073 "Dunning runs complete: {} runs, {} letters",
4074 subledger.dunning_runs.len(),
4075 subledger.dunning_letters.len()
4076 );
4077 }
4078 }
4079
4080 self.check_resources_with_log("post-document-flows")?;
4081 } else {
4082 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4083 }
4084
4085 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4087 if !self.master_data.assets.is_empty() {
4088 debug!("Generating FA subledger records");
4089 let company_code = self
4090 .config
4091 .companies
4092 .first()
4093 .map(|c| c.code.as_str())
4094 .unwrap_or("1000");
4095 let currency = self
4096 .config
4097 .companies
4098 .first()
4099 .map(|c| c.currency.as_str())
4100 .unwrap_or("USD");
4101
4102 let mut fa_gen = datasynth_generators::FAGenerator::new(
4103 datasynth_generators::FAGeneratorConfig::default(),
4104 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4105 );
4106
4107 for asset in &self.master_data.assets {
4108 let (record, je) = fa_gen.generate_asset_acquisition(
4109 company_code,
4110 &format!("{:?}", asset.asset_class),
4111 &asset.description,
4112 asset.acquisition_date,
4113 currency,
4114 asset.cost_center.as_deref(),
4115 );
4116 subledger.fa_records.push(record);
4117 fa_journal_entries.push(je);
4118 }
4119
4120 stats.fa_subledger_count = subledger.fa_records.len();
4121 debug!(
4122 "FA subledger records generated: {} (with {} acquisition JEs)",
4123 stats.fa_subledger_count,
4124 fa_journal_entries.len()
4125 );
4126 }
4127
4128 if !self.master_data.materials.is_empty() {
4130 debug!("Generating Inventory subledger records");
4131 let first_company = self.config.companies.first();
4132 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4133 let inv_currency = first_company
4134 .map(|c| c.currency.clone())
4135 .unwrap_or_else(|| "USD".to_string());
4136
4137 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4138 datasynth_generators::InventoryGeneratorConfig::default(),
4139 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4140 inv_currency.clone(),
4141 );
4142
4143 for (i, material) in self.master_data.materials.iter().enumerate() {
4144 let plant = format!("PLANT{:02}", (i % 3) + 1);
4145 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4146 let initial_qty = rust_decimal::Decimal::from(
4147 material
4148 .safety_stock
4149 .to_string()
4150 .parse::<i64>()
4151 .unwrap_or(100),
4152 );
4153
4154 let position = inv_gen.generate_position(
4155 company_code,
4156 &plant,
4157 &storage_loc,
4158 &material.material_id,
4159 &material.description,
4160 initial_qty,
4161 Some(material.standard_cost),
4162 &inv_currency,
4163 );
4164 subledger.inventory_positions.push(position);
4165 }
4166
4167 stats.inventory_subledger_count = subledger.inventory_positions.len();
4168 debug!(
4169 "Inventory subledger records generated: {}",
4170 stats.inventory_subledger_count
4171 );
4172 }
4173
4174 if !subledger.fa_records.is_empty() {
4176 if let Ok(start_date) =
4177 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4178 {
4179 let company_code = self
4180 .config
4181 .companies
4182 .first()
4183 .map(|c| c.code.as_str())
4184 .unwrap_or("1000");
4185 let fiscal_year = start_date.year();
4186 let start_period = start_date.month();
4187 let end_period =
4188 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4189
4190 let depr_cfg = FaDepreciationScheduleConfig {
4191 fiscal_year,
4192 start_period,
4193 end_period,
4194 seed_offset: 800,
4195 };
4196 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4197 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4198 let run_count = runs.len();
4199 subledger.depreciation_runs = runs;
4200 debug!(
4201 "Depreciation runs generated: {} runs for {} periods",
4202 run_count, self.config.global.period_months
4203 );
4204 }
4205 }
4206
4207 if !subledger.inventory_positions.is_empty() {
4209 if let Ok(start_date) =
4210 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4211 {
4212 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4213 - chrono::Days::new(1);
4214
4215 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4216 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4217
4218 for company in &self.config.companies {
4219 let result = inv_val_gen.generate(
4220 &company.code,
4221 &subledger.inventory_positions,
4222 as_of_date,
4223 );
4224 subledger.inventory_valuations.push(result);
4225 }
4226 debug!(
4227 "Inventory valuations generated: {} company reports",
4228 subledger.inventory_valuations.len()
4229 );
4230 }
4231 }
4232
4233 Ok((document_flows, subledger, fa_journal_entries))
4234 }
4235
4236 #[allow(clippy::too_many_arguments)]
4238 fn phase_ocpm_events(
4239 &mut self,
4240 document_flows: &DocumentFlowSnapshot,
4241 sourcing: &SourcingSnapshot,
4242 hr: &HrSnapshot,
4243 manufacturing: &ManufacturingSnapshot,
4244 banking: &BankingSnapshot,
4245 audit: &AuditSnapshot,
4246 financial_reporting: &FinancialReportingSnapshot,
4247 stats: &mut EnhancedGenerationStatistics,
4248 ) -> SynthResult<OcpmSnapshot> {
4249 let degradation = self.check_resources()?;
4250 if degradation >= DegradationLevel::Reduced {
4251 debug!(
4252 "Phase skipped due to resource pressure (degradation: {:?})",
4253 degradation
4254 );
4255 return Ok(OcpmSnapshot::default());
4256 }
4257 if self.phase_config.generate_ocpm_events {
4258 info!("Phase 3c: Generating OCPM Events");
4259 let ocpm_snapshot = self.generate_ocpm_events(
4260 document_flows,
4261 sourcing,
4262 hr,
4263 manufacturing,
4264 banking,
4265 audit,
4266 financial_reporting,
4267 )?;
4268 stats.ocpm_event_count = ocpm_snapshot.event_count;
4269 stats.ocpm_object_count = ocpm_snapshot.object_count;
4270 stats.ocpm_case_count = ocpm_snapshot.case_count;
4271 info!(
4272 "OCPM events generated: {} events, {} objects, {} cases",
4273 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4274 );
4275 self.check_resources_with_log("post-ocpm")?;
4276 Ok(ocpm_snapshot)
4277 } else {
4278 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4279 Ok(OcpmSnapshot::default())
4280 }
4281 }
4282
4283 fn phase_journal_entries(
4285 &mut self,
4286 coa: &Arc<ChartOfAccounts>,
4287 document_flows: &DocumentFlowSnapshot,
4288 _stats: &mut EnhancedGenerationStatistics,
4289 ) -> SynthResult<Vec<JournalEntry>> {
4290 let mut entries = Vec::new();
4291
4292 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4294 debug!("Phase 4a: Generating JEs from document flows");
4295 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4296 debug!("Generated {} JEs from document flows", flow_entries.len());
4297 entries.extend(flow_entries);
4298 }
4299
4300 if self.phase_config.generate_journal_entries {
4302 info!("Phase 4: Generating Journal Entries");
4303 let je_entries = self.generate_journal_entries(coa)?;
4304 info!("Generated {} standalone journal entries", je_entries.len());
4305 entries.extend(je_entries);
4306 } else {
4307 debug!("Phase 4: Skipped (journal entry generation disabled)");
4308 }
4309
4310 if let Some(ctx) = &self.shard_context {
4314 if !ctx.extra_journal_entries.is_empty() {
4315 debug!(
4316 "Phase 4c: appending {} shard-mode IC journal entries",
4317 ctx.extra_journal_entries.len()
4318 );
4319 entries.extend(ctx.extra_journal_entries.iter().cloned());
4320 }
4321 }
4322
4323 if !entries.is_empty() {
4324 self.check_resources_with_log("post-journal-entries")?;
4327 }
4328
4329 Ok(entries)
4330 }
4331
4332 fn phase_anomaly_injection(
4334 &mut self,
4335 entries: &mut [JournalEntry],
4336 actions: &DegradationActions,
4337 stats: &mut EnhancedGenerationStatistics,
4338 ) -> SynthResult<AnomalyLabels> {
4339 if self.phase_config.inject_anomalies
4340 && !entries.is_empty()
4341 && !actions.skip_anomaly_injection
4342 {
4343 info!("Phase 5: Injecting Anomalies");
4344 let result = self.inject_anomalies(entries)?;
4345 stats.anomalies_injected = result.labels.len();
4346 info!("Injected {} anomalies", stats.anomalies_injected);
4347 self.check_resources_with_log("post-anomaly-injection")?;
4348 Ok(result)
4349 } else if actions.skip_anomaly_injection {
4350 warn!("Phase 5: Skipped due to resource degradation");
4351 Ok(AnomalyLabels::default())
4352 } else {
4353 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4354 Ok(AnomalyLabels::default())
4355 }
4356 }
4357
4358 fn phase_balance_validation(
4360 &mut self,
4361 entries: &[JournalEntry],
4362 ) -> SynthResult<BalanceValidationResult> {
4363 if self.phase_config.validate_balances && !entries.is_empty() {
4364 debug!("Phase 6: Validating Balances");
4365 let balance_validation = self.validate_journal_entries(entries)?;
4366 if balance_validation.is_balanced {
4367 debug!("Balance validation passed");
4368 } else {
4369 warn!(
4370 "Balance validation found {} errors",
4371 balance_validation.validation_errors.len()
4372 );
4373 }
4374 Ok(balance_validation)
4375 } else {
4376 Ok(BalanceValidationResult::default())
4377 }
4378 }
4379
4380 fn validate_coa_coverage(
4387 &self,
4388 entries: &[JournalEntry],
4389 coa: &ChartOfAccounts,
4390 ) -> SynthResult<()> {
4391 if entries.is_empty() {
4392 return Ok(());
4393 }
4394 let coa_set: std::collections::HashSet<&str> = coa
4395 .accounts
4396 .iter()
4397 .map(|a| a.account_number.as_str())
4398 .collect();
4399 let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4400 for je in entries {
4401 for line in je.lines.iter() {
4402 if !coa_set.contains(line.gl_account.as_str()) {
4403 missing.insert(line.gl_account.clone());
4404 }
4405 }
4406 }
4407 if missing.is_empty() {
4408 debug!("COA coverage validation passed");
4409 return Ok(());
4410 }
4411 let msg = format!(
4412 "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4413 missing.len(),
4414 missing.iter().take(10).collect::<Vec<_>>()
4415 );
4416 if self.phase_config.validate_coa_coverage_strict {
4417 Err(SynthError::generation(msg))
4418 } else {
4419 warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4420 Ok(())
4421 }
4422 }
4423
4424 fn phase_data_quality_injection(
4426 &mut self,
4427 entries: &mut [JournalEntry],
4428 actions: &DegradationActions,
4429 stats: &mut EnhancedGenerationStatistics,
4430 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4431 if self.phase_config.inject_data_quality
4432 && !entries.is_empty()
4433 && !actions.skip_data_quality
4434 {
4435 info!("Phase 7: Injecting Data Quality Variations");
4436 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4437 stats.data_quality_issues = dq_stats.records_with_issues;
4438 info!("Injected {} data quality issues", stats.data_quality_issues);
4439 self.check_resources_with_log("post-data-quality")?;
4440 Ok((dq_stats, quality_issues))
4441 } else if actions.skip_data_quality {
4442 warn!("Phase 7: Skipped due to resource degradation");
4443 Ok((stats_with_denominator(entries.len()), Vec::new()))
4447 } else {
4448 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4449 Ok((stats_with_denominator(entries.len()), Vec::new()))
4450 }
4451 }
4452
4453 fn phase_period_close(
4463 &mut self,
4464 entries: &mut Vec<JournalEntry>,
4465 subledger: &SubledgerSnapshot,
4466 stats: &mut EnhancedGenerationStatistics,
4467 ) -> SynthResult<()> {
4468 if !self.phase_config.generate_period_close || entries.is_empty() {
4469 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4470 return Ok(());
4471 }
4472
4473 info!("Phase 10b: Generating period-close journal entries");
4474
4475 use datasynth_core::accounts::{
4476 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4477 };
4478 use rust_decimal::Decimal;
4479
4480 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4481 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4482 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4483 let close_date = end_date - chrono::Days::new(1);
4485
4486 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4491 .config
4492 .companies
4493 .iter()
4494 .map(|c| c.code.clone())
4495 .collect();
4496
4497 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4499 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4500
4501 let period_months = self.config.global.period_months;
4505 for asset in &subledger.fa_records {
4506 use datasynth_core::models::subledger::fa::AssetStatus;
4508 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4509 continue;
4510 }
4511 let useful_life_months = asset.useful_life_months();
4512 if useful_life_months == 0 {
4513 continue;
4515 }
4516 let salvage_value = asset.salvage_value();
4517 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4518 if depreciable_base == Decimal::ZERO {
4519 continue;
4520 }
4521 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4522 * Decimal::from(period_months))
4523 .round_dp(2);
4524 if period_depr <= Decimal::ZERO {
4525 continue;
4526 }
4527
4528 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4529 depr_header.document_type = "CL".to_string();
4530 depr_header.header_text = Some(format!(
4531 "Depreciation - {} {}",
4532 asset.asset_number, asset.description
4533 ));
4534 depr_header.created_by = "CLOSE_ENGINE".to_string();
4535 depr_header.source = TransactionSource::Automated;
4536 depr_header.business_process = Some(BusinessProcess::R2R);
4537
4538 let doc_id = depr_header.document_id;
4539 let mut depr_je = JournalEntry::new(depr_header);
4540
4541 depr_je.add_line(JournalEntryLine::debit(
4543 doc_id,
4544 1,
4545 expense_accounts::DEPRECIATION.to_string(),
4546 period_depr,
4547 ));
4548 depr_je.add_line(JournalEntryLine::credit(
4550 doc_id,
4551 2,
4552 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4553 period_depr,
4554 ));
4555
4556 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4557 close_jes.push(depr_je);
4558 }
4559
4560 if !subledger.fa_records.is_empty() {
4561 debug!(
4562 "Generated {} depreciation JEs from {} FA records",
4563 close_jes.len(),
4564 subledger.fa_records.len()
4565 );
4566 }
4567
4568 {
4572 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4573 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4574 if let Some(ctx) = &self.temporal_context {
4577 accrual_gen.set_temporal_context(Arc::clone(ctx));
4578 }
4579
4580 let accrual_items: &[(&str, &str, &str)] = &[
4582 ("Accrued Utilities", "6200", "2100"),
4583 ("Accrued Rent", "6300", "2100"),
4584 ("Accrued Interest", "6100", "2150"),
4585 ];
4586
4587 for company_code in &company_codes {
4588 let company_revenue: Decimal = entries
4590 .iter()
4591 .filter(|e| e.header.company_code == *company_code)
4592 .flat_map(|e| e.lines.iter())
4593 .filter(|l| l.gl_account.starts_with('4'))
4594 .map(|l| l.credit_amount - l.debit_amount)
4595 .fold(Decimal::ZERO, |acc, v| acc + v);
4596
4597 if company_revenue <= Decimal::ZERO {
4598 continue;
4599 }
4600
4601 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4603 if accrual_base <= Decimal::ZERO {
4604 continue;
4605 }
4606
4607 for (description, expense_acct, liability_acct) in accrual_items {
4608 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4609 company_code,
4610 description,
4611 accrual_base,
4612 expense_acct,
4613 liability_acct,
4614 close_date,
4615 None,
4616 );
4617 close_jes.push(accrual_je);
4618 if let Some(rev_je) = reversal_je {
4619 close_jes.push(rev_je);
4620 }
4621 }
4622 }
4623
4624 debug!(
4625 "Generated accrual entries for {} companies",
4626 company_codes.len()
4627 );
4628 }
4629
4630 for company_code in &company_codes {
4631 let mut total_revenue = Decimal::ZERO;
4636 let mut total_expenses = Decimal::ZERO;
4637
4638 for entry in entries.iter() {
4639 if entry.header.company_code != *company_code {
4640 continue;
4641 }
4642 for line in &entry.lines {
4643 let category = AccountCategory::from_account(&line.gl_account);
4644 match category {
4645 AccountCategory::Revenue => {
4646 total_revenue += line.credit_amount - line.debit_amount;
4648 }
4649 AccountCategory::Cogs
4650 | AccountCategory::OperatingExpense
4651 | AccountCategory::OtherIncomeExpense
4652 | AccountCategory::Tax => {
4653 total_expenses += line.debit_amount - line.credit_amount;
4655 }
4656 _ => {}
4657 }
4658 }
4659 }
4660
4661 let pre_tax_income = total_revenue - total_expenses;
4662
4663 if pre_tax_income == Decimal::ZERO {
4665 debug!(
4666 "Company {}: no pre-tax income, skipping period close",
4667 company_code
4668 );
4669 continue;
4670 }
4671
4672 if pre_tax_income > Decimal::ZERO {
4674 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4676
4677 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4678 tax_header.document_type = "CL".to_string();
4679 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4680 tax_header.created_by = "CLOSE_ENGINE".to_string();
4681 tax_header.source = TransactionSource::Automated;
4682 tax_header.business_process = Some(BusinessProcess::R2R);
4683
4684 let doc_id = tax_header.document_id;
4685 let mut tax_je = JournalEntry::new(tax_header);
4686
4687 tax_je.add_line(JournalEntryLine::debit(
4689 doc_id,
4690 1,
4691 tax_accounts::TAX_EXPENSE.to_string(),
4692 tax_amount,
4693 ));
4694 tax_je.add_line(JournalEntryLine::credit(
4696 doc_id,
4697 2,
4698 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4699 tax_amount,
4700 ));
4701
4702 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4703 close_jes.push(tax_je);
4704 } else {
4705 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4708 if dta_amount > Decimal::ZERO {
4709 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4710 dta_header.document_type = "CL".to_string();
4711 dta_header.header_text =
4712 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4713 dta_header.created_by = "CLOSE_ENGINE".to_string();
4714 dta_header.source = TransactionSource::Automated;
4715 dta_header.business_process = Some(BusinessProcess::R2R);
4716
4717 let doc_id = dta_header.document_id;
4718 let mut dta_je = JournalEntry::new(dta_header);
4719
4720 dta_je.add_line(JournalEntryLine::debit(
4722 doc_id,
4723 1,
4724 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4725 dta_amount,
4726 ));
4727 dta_je.add_line(JournalEntryLine::credit(
4730 doc_id,
4731 2,
4732 tax_accounts::TAX_EXPENSE.to_string(),
4733 dta_amount,
4734 ));
4735
4736 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4737 close_jes.push(dta_je);
4738 debug!(
4739 "Company {}: loss year — recognised DTA of {}",
4740 company_code, dta_amount
4741 );
4742 }
4743 }
4744
4745 let tax_provision = if pre_tax_income > Decimal::ZERO {
4751 (pre_tax_income * tax_rate).round_dp(2)
4752 } else {
4753 Decimal::ZERO
4754 };
4755 let net_income = pre_tax_income - tax_provision;
4756
4757 if net_income > Decimal::ZERO {
4758 use datasynth_generators::DividendGenerator;
4759 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4761 let currency_str = self
4762 .config
4763 .companies
4764 .iter()
4765 .find(|c| c.code == *company_code)
4766 .map(|c| c.currency.as_str())
4767 .unwrap_or("USD");
4768 let div_result = div_gen.generate(
4769 company_code,
4770 close_date,
4771 Decimal::new(1, 0), dividend_amount,
4773 currency_str,
4774 );
4775 let div_je_count = div_result.journal_entries.len();
4776 close_jes.extend(div_result.journal_entries);
4777 debug!(
4778 "Company {}: declared dividend of {} ({} JEs)",
4779 company_code, dividend_amount, div_je_count
4780 );
4781 }
4782
4783 if net_income != Decimal::ZERO {
4788 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4789 close_header.document_type = "CL".to_string();
4790 close_header.header_text =
4791 Some(format!("Income statement close - {}", company_code));
4792 close_header.created_by = "CLOSE_ENGINE".to_string();
4793 close_header.source = TransactionSource::Automated;
4794 close_header.business_process = Some(BusinessProcess::R2R);
4795
4796 let doc_id = close_header.document_id;
4797 let mut close_je = JournalEntry::new(close_header);
4798
4799 let abs_net_income = net_income.abs();
4800
4801 if net_income > Decimal::ZERO {
4802 close_je.add_line(JournalEntryLine::debit(
4804 doc_id,
4805 1,
4806 equity_accounts::INCOME_SUMMARY.to_string(),
4807 abs_net_income,
4808 ));
4809 close_je.add_line(JournalEntryLine::credit(
4810 doc_id,
4811 2,
4812 equity_accounts::RETAINED_EARNINGS.to_string(),
4813 abs_net_income,
4814 ));
4815 } else {
4816 close_je.add_line(JournalEntryLine::debit(
4818 doc_id,
4819 1,
4820 equity_accounts::RETAINED_EARNINGS.to_string(),
4821 abs_net_income,
4822 ));
4823 close_je.add_line(JournalEntryLine::credit(
4824 doc_id,
4825 2,
4826 equity_accounts::INCOME_SUMMARY.to_string(),
4827 abs_net_income,
4828 ));
4829 }
4830
4831 debug_assert!(
4832 close_je.is_balanced(),
4833 "Income statement closing JE must be balanced"
4834 );
4835 close_jes.push(close_je);
4836 }
4837 }
4838
4839 let close_count = close_jes.len();
4840 if close_count > 0 {
4841 info!("Generated {} period-close journal entries", close_count);
4842 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4843 entries.extend(close_jes);
4844 stats.period_close_je_count = close_count;
4845
4846 stats.total_entries = entries.len() as u64;
4848 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4849 } else {
4850 debug!("No period-close entries generated (no income statement activity)");
4851 }
4852
4853 Ok(())
4854 }
4855
4856 fn phase_audit_data(
4858 &mut self,
4859 entries: &[JournalEntry],
4860 stats: &mut EnhancedGenerationStatistics,
4861 ) -> SynthResult<AuditSnapshot> {
4862 if self.phase_config.generate_audit {
4863 info!("Phase 8: Generating Audit Data");
4864 let audit_snapshot = self.generate_audit_data(entries)?;
4865 stats.audit_engagement_count = audit_snapshot.engagements.len();
4866 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4867 stats.audit_evidence_count = audit_snapshot.evidence.len();
4868 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4869 stats.audit_finding_count = audit_snapshot.findings.len();
4870 stats.audit_judgment_count = audit_snapshot.judgments.len();
4871 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4872 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4873 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4874 stats.audit_sample_count = audit_snapshot.samples.len();
4875 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4876 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4877 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4878 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4879 stats.audit_related_party_transaction_count =
4880 audit_snapshot.related_party_transactions.len();
4881 info!(
4882 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4883 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4884 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4885 {} RP transactions",
4886 stats.audit_engagement_count,
4887 stats.audit_workpaper_count,
4888 stats.audit_evidence_count,
4889 stats.audit_risk_count,
4890 stats.audit_finding_count,
4891 stats.audit_judgment_count,
4892 stats.audit_confirmation_count,
4893 stats.audit_procedure_step_count,
4894 stats.audit_sample_count,
4895 stats.audit_analytical_result_count,
4896 stats.audit_ia_function_count,
4897 stats.audit_ia_report_count,
4898 stats.audit_related_party_count,
4899 stats.audit_related_party_transaction_count,
4900 );
4901 self.check_resources_with_log("post-audit")?;
4902 Ok(audit_snapshot)
4903 } else {
4904 debug!("Phase 8: Skipped (audit generation disabled)");
4905 Ok(AuditSnapshot::default())
4906 }
4907 }
4908
4909 fn phase_banking_data(
4911 &mut self,
4912 stats: &mut EnhancedGenerationStatistics,
4913 ) -> SynthResult<BankingSnapshot> {
4914 if self.phase_config.generate_banking {
4915 info!("Phase 9: Generating Banking KYC/AML Data");
4916 let banking_snapshot = self.generate_banking_data()?;
4917 stats.banking_customer_count = banking_snapshot.customers.len();
4918 stats.banking_account_count = banking_snapshot.accounts.len();
4919 stats.banking_transaction_count = banking_snapshot.transactions.len();
4920 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4921 info!(
4922 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4923 stats.banking_customer_count, stats.banking_account_count,
4924 stats.banking_transaction_count, stats.banking_suspicious_count
4925 );
4926 self.check_resources_with_log("post-banking")?;
4927 Ok(banking_snapshot)
4928 } else {
4929 debug!("Phase 9: Skipped (banking generation disabled)");
4930 Ok(BankingSnapshot::default())
4931 }
4932 }
4933
4934 fn phase_graph_export(
4936 &mut self,
4937 entries: &[JournalEntry],
4938 coa: &Arc<ChartOfAccounts>,
4939 stats: &mut EnhancedGenerationStatistics,
4940 ) -> SynthResult<GraphExportSnapshot> {
4941 if self.phase_config.generate_graph_export && !entries.is_empty() {
4942 info!("Phase 10: Exporting Accounting Network Graphs");
4943 match self.export_graphs(entries, coa, stats) {
4944 Ok(snapshot) => {
4945 info!(
4946 "Graph export complete: {} graphs ({} nodes, {} edges)",
4947 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4948 );
4949 Ok(snapshot)
4950 }
4951 Err(e) => {
4952 warn!("Phase 10: Graph export failed: {}", e);
4953 Ok(GraphExportSnapshot::default())
4954 }
4955 }
4956 } else {
4957 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4958 Ok(GraphExportSnapshot::default())
4959 }
4960 }
4961
4962 #[allow(clippy::too_many_arguments)]
4964 fn phase_hypergraph_export(
4965 &self,
4966 coa: &Arc<ChartOfAccounts>,
4967 entries: &[JournalEntry],
4968 document_flows: &DocumentFlowSnapshot,
4969 sourcing: &SourcingSnapshot,
4970 hr: &HrSnapshot,
4971 manufacturing: &ManufacturingSnapshot,
4972 banking: &BankingSnapshot,
4973 audit: &AuditSnapshot,
4974 financial_reporting: &FinancialReportingSnapshot,
4975 ocpm: &OcpmSnapshot,
4976 compliance: &ComplianceRegulationsSnapshot,
4977 stats: &mut EnhancedGenerationStatistics,
4978 ) -> SynthResult<()> {
4979 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4980 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4981 match self.export_hypergraph(
4982 coa,
4983 entries,
4984 document_flows,
4985 sourcing,
4986 hr,
4987 manufacturing,
4988 banking,
4989 audit,
4990 financial_reporting,
4991 ocpm,
4992 compliance,
4993 stats,
4994 ) {
4995 Ok(info) => {
4996 info!(
4997 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4998 info.node_count, info.edge_count, info.hyperedge_count
4999 );
5000 }
5001 Err(e) => {
5002 warn!("Phase 10b: Hypergraph export failed: {}", e);
5003 }
5004 }
5005 } else {
5006 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5007 }
5008 Ok(())
5009 }
5010
5011 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5017 if !self.config.llm.enabled {
5018 debug!("Phase 11: Skipped (LLM enrichment disabled)");
5019 return;
5020 }
5021
5022 info!("Phase 11: Starting LLM Enrichment");
5023 let start = std::time::Instant::now();
5024
5025 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5026 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5029 let schema_provider = &self.config.llm.provider;
5030 let api_key_env = match schema_provider.as_str() {
5031 "openai" => Some("OPENAI_API_KEY"),
5032 "anthropic" => Some("ANTHROPIC_API_KEY"),
5033 "custom" => Some("LLM_API_KEY"),
5034 _ => None,
5035 };
5036 if let Some(key_env) = api_key_env {
5037 if std::env::var(key_env).is_ok() {
5038 let llm_config = datasynth_core::llm::LlmConfig {
5039 model: self.config.llm.model.clone(),
5040 api_key_env: key_env.to_string(),
5041 ..datasynth_core::llm::LlmConfig::default()
5042 };
5043 match HttpLlmProvider::new(llm_config) {
5044 Ok(p) => Arc::new(p),
5045 Err(e) => {
5046 warn!(
5047 "Failed to create HttpLlmProvider: {}; falling back to mock",
5048 e
5049 );
5050 Arc::new(MockLlmProvider::new(self.seed))
5051 }
5052 }
5053 } else {
5054 Arc::new(MockLlmProvider::new(self.seed))
5055 }
5056 } else {
5057 Arc::new(MockLlmProvider::new(self.seed))
5058 }
5059 };
5060 let industry = format!("{:?}", self.config.global.industry);
5064
5065 let vendor_enricher =
5066 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5067 let max_vendors = self
5068 .config
5069 .llm
5070 .max_vendor_enrichments
5071 .min(self.master_data.vendors.len());
5072 let mut vendors_enriched = 0usize;
5073 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5074 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5075 Ok(name) => {
5076 vendor.name = name;
5077 vendors_enriched += 1;
5078 }
5079 Err(e) => warn!(
5080 "LLM vendor enrichment failed for {}: {}",
5081 vendor.vendor_id, e
5082 ),
5083 }
5084 }
5085
5086 let mut customers_enriched = 0usize;
5087 if self.config.llm.enrich_customers {
5088 let customer_enricher =
5089 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5090 &provider,
5091 ));
5092 let max_customers = self
5093 .config
5094 .llm
5095 .max_customer_enrichments
5096 .min(self.master_data.customers.len());
5097 for customer in self.master_data.customers.iter_mut().take(max_customers) {
5098 match customer_enricher.enrich_customer_name(
5099 &industry,
5100 "general",
5101 &customer.country,
5102 ) {
5103 Ok(name) => {
5104 customer.name = name;
5105 customers_enriched += 1;
5106 }
5107 Err(e) => warn!(
5108 "LLM customer enrichment failed for {}: {}",
5109 customer.customer_id, e
5110 ),
5111 }
5112 }
5113 }
5114
5115 let mut materials_enriched = 0usize;
5116 if self.config.llm.enrich_materials {
5117 let material_enricher =
5118 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5119 &provider,
5120 ));
5121 let max_materials = self
5122 .config
5123 .llm
5124 .max_material_enrichments
5125 .min(self.master_data.materials.len());
5126 for material in self.master_data.materials.iter_mut().take(max_materials) {
5127 let material_type = format!("{:?}", material.material_type);
5128 match material_enricher.enrich_material_description(&material_type, &industry) {
5129 Ok(desc) => {
5130 material.description = desc;
5131 materials_enriched += 1;
5132 }
5133 Err(e) => warn!(
5134 "LLM material enrichment failed for {}: {}",
5135 material.material_id, e
5136 ),
5137 }
5138 }
5139 }
5140
5141 (vendors_enriched, customers_enriched, materials_enriched)
5142 }));
5143
5144 match result {
5145 Ok((v, c, m)) => {
5146 stats.llm_vendors_enriched = v;
5147 stats.llm_customers_enriched = c;
5148 stats.llm_materials_enriched = m;
5149 let elapsed = start.elapsed();
5150 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5151 info!(
5152 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5153 v, c, m, stats.llm_enrichment_ms
5154 );
5155 }
5156 Err(_) => {
5157 let elapsed = start.elapsed();
5158 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5159 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5160 }
5161 }
5162 }
5163
5164 fn phase_diffusion_enhancement(
5176 &self,
5177 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5178 stats: &mut EnhancedGenerationStatistics,
5179 ) {
5180 if !self.config.diffusion.enabled {
5181 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5182 return;
5183 }
5184
5185 info!("Phase 12: Starting Diffusion Enhancement");
5186 let start = std::time::Instant::now();
5187
5188 let backend_choice = self.config.diffusion.backend.as_str();
5189 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5190
5191 if use_neural {
5192 #[cfg(feature = "neural")]
5193 {
5194 match self.run_neural_diffusion_phase(entries) {
5195 Ok(sample_count) => {
5196 stats.diffusion_samples_generated = sample_count;
5197 let elapsed = start.elapsed();
5198 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5199 info!(
5200 "Phase 12 complete ({}): {} samples in {}ms",
5201 backend_choice, sample_count, stats.diffusion_enhancement_ms
5202 );
5203 return;
5204 }
5205 Err(e) => {
5206 warn!(
5207 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5208 );
5209 }
5211 }
5212 }
5213 #[cfg(not(feature = "neural"))]
5214 {
5215 warn!(
5216 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5217 not compiled in — falling back to statistical. Rebuild with \
5218 `--features neural` (or `neural-cuda` for GPU) to enable.",
5219 backend_choice
5220 );
5221 }
5222 } else if !matches!(backend_choice, "statistical" | "") {
5223 warn!(
5224 "Phase 12: unknown backend '{}', falling back to statistical",
5225 backend_choice
5226 );
5227 }
5228
5229 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5231 let means = vec![5000.0, 3.0, 2.0];
5232 let stds = vec![2000.0, 1.5, 1.0];
5233
5234 let diffusion_config = DiffusionConfig {
5235 n_steps: self.config.diffusion.n_steps,
5236 seed: self.seed,
5237 ..Default::default()
5238 };
5239
5240 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5241 let n_samples = self.config.diffusion.sample_size;
5242 let n_features = 3;
5243 backend.generate(n_samples, n_features, self.seed).len()
5244 }));
5245
5246 match result {
5247 Ok(sample_count) => {
5248 stats.diffusion_samples_generated = sample_count;
5249 let elapsed = start.elapsed();
5250 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5251 info!(
5252 "Phase 12 complete (statistical): {} samples in {}ms",
5253 sample_count, stats.diffusion_enhancement_ms
5254 );
5255 }
5256 Err(_) => {
5257 let elapsed = start.elapsed();
5258 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5259 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5260 }
5261 }
5262 }
5263
5264 #[cfg(feature = "neural")]
5269 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5270 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5271
5272 if entries.is_empty() {
5273 return Err(SynthError::generation(
5274 "neural diffusion: no journal entries available as training data",
5275 ));
5276 }
5277
5278 let training_data: Vec<Vec<f64>> = entries
5279 .iter()
5280 .take(5000)
5281 .map(|je| {
5282 let total_amount: f64 = je
5283 .lines
5284 .iter()
5285 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5286 .map(|l| {
5287 use rust_decimal::prelude::ToPrimitive;
5288 l.debit_amount.to_f64().unwrap_or(0.0)
5289 })
5290 .sum();
5291 let line_count = je.lines.len() as f64;
5292 let approval_level = je
5295 .header
5296 .approval_workflow
5297 .as_ref()
5298 .map(|w| w.required_levels as f64)
5299 .unwrap_or(1.0);
5300 vec![total_amount, line_count, approval_level]
5301 })
5302 .collect();
5303
5304 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5305
5306 let cfg = &self.config.diffusion;
5307 let neural_cfg = &cfg.neural;
5308
5309 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5310 neural_cfg.checkpoint_path.as_ref()
5311 {
5312 let path = std::path::Path::new(ckpt_path);
5313 info!(
5314 " Neural diffusion: loading checkpoint from {}",
5315 path.display()
5316 );
5317 NeuralDiffusionBackend::load(path)
5318 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5319 } else {
5320 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5321 info!(
5322 " Neural diffusion: training score network on {} rows × {} features, \
5323 {} epochs, hidden_dims={:?}",
5324 training_data.len(),
5325 n_features,
5326 neural_cfg.training_epochs,
5327 neural_cfg.hidden_dims
5328 );
5329 let training_config = NeuralTrainingConfig {
5330 n_steps: cfg.n_steps,
5331 schedule: cfg.schedule.clone(),
5332 hidden_dims: neural_cfg.hidden_dims.clone(),
5333 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5334 learning_rate: neural_cfg.learning_rate,
5335 epochs: neural_cfg.training_epochs,
5336 batch_size: neural_cfg.batch_size,
5337 };
5338 let (backend, report) =
5339 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5340 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5341 info!(
5342 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5343 report.epochs_completed, report.final_loss
5344 );
5345 backend
5346 };
5347
5348 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5349 Ok(samples.len())
5350 }
5351
5352 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5359 if !self.config.causal.enabled {
5360 debug!("Phase 13: Skipped (causal generation disabled)");
5361 return;
5362 }
5363
5364 info!("Phase 13: Starting Causal Overlay");
5365 let start = std::time::Instant::now();
5366
5367 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5368 let graph = match self.config.causal.template.as_str() {
5370 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5371 _ => CausalGraph::fraud_detection_template(),
5372 };
5373
5374 let scm = StructuralCausalModel::new(graph.clone())
5375 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5376
5377 let n_samples = self.config.causal.sample_size;
5378 let samples = scm
5379 .generate(n_samples, self.seed)
5380 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5381
5382 let validation_passed = if self.config.causal.validate {
5384 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5385 if report.valid {
5386 info!(
5387 "Causal validation passed: all {} checks OK",
5388 report.checks.len()
5389 );
5390 } else {
5391 warn!(
5392 "Causal validation: {} violations detected: {:?}",
5393 report.violations.len(),
5394 report.violations
5395 );
5396 }
5397 Some(report.valid)
5398 } else {
5399 None
5400 };
5401
5402 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5403 }));
5404
5405 match result {
5406 Ok(Ok((sample_count, validation_passed))) => {
5407 stats.causal_samples_generated = sample_count;
5408 stats.causal_validation_passed = validation_passed;
5409 let elapsed = start.elapsed();
5410 stats.causal_generation_ms = elapsed.as_millis() as u64;
5411 info!(
5412 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5413 sample_count, stats.causal_generation_ms, validation_passed,
5414 );
5415 }
5416 Ok(Err(e)) => {
5417 let elapsed = start.elapsed();
5418 stats.causal_generation_ms = elapsed.as_millis() as u64;
5419 warn!("Phase 13: Causal generation failed: {}", e);
5420 }
5421 Err(_) => {
5422 let elapsed = start.elapsed();
5423 stats.causal_generation_ms = elapsed.as_millis() as u64;
5424 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5425 }
5426 }
5427 }
5428
5429 fn phase_sourcing_data(
5431 &mut self,
5432 stats: &mut EnhancedGenerationStatistics,
5433 ) -> SynthResult<SourcingSnapshot> {
5434 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5435 debug!("Phase 14: Skipped (sourcing generation disabled)");
5436 return Ok(SourcingSnapshot::default());
5437 }
5438 let degradation = self.check_resources()?;
5439 if degradation >= DegradationLevel::Reduced {
5440 debug!(
5441 "Phase skipped due to resource pressure (degradation: {:?})",
5442 degradation
5443 );
5444 return Ok(SourcingSnapshot::default());
5445 }
5446
5447 info!("Phase 14: Generating S2C Sourcing Data");
5448 let seed = self.seed;
5449
5450 let vendor_ids: Vec<String> = self
5452 .master_data
5453 .vendors
5454 .iter()
5455 .map(|v| v.vendor_id.clone())
5456 .collect();
5457 if vendor_ids.is_empty() {
5458 debug!("Phase 14: Skipped (no vendors available)");
5459 return Ok(SourcingSnapshot::default());
5460 }
5461
5462 let categories: Vec<(String, String)> = vec![
5463 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5464 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5465 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5466 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5467 ("CAT-LOG".to_string(), "Logistics".to_string()),
5468 ];
5469 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5470 .iter()
5471 .map(|(id, name)| {
5472 (
5473 id.clone(),
5474 name.clone(),
5475 rust_decimal::Decimal::from(100_000),
5476 )
5477 })
5478 .collect();
5479
5480 let company_code = self
5481 .config
5482 .companies
5483 .first()
5484 .map(|c| c.code.as_str())
5485 .unwrap_or("1000");
5486 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5487 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5488 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5489 let fiscal_year = start_date.year() as u16;
5490 let owner_ids: Vec<String> = self
5491 .master_data
5492 .employees
5493 .iter()
5494 .take(5)
5495 .map(|e| e.employee_id.clone())
5496 .collect();
5497 let owner_id = owner_ids
5498 .first()
5499 .map(std::string::String::as_str)
5500 .unwrap_or("BUYER-001");
5501
5502 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5504 let spend_analyses =
5505 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5506
5507 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5509 let sourcing_projects = if owner_ids.is_empty() {
5510 Vec::new()
5511 } else {
5512 project_gen.generate(
5513 company_code,
5514 &categories_with_spend,
5515 &owner_ids,
5516 start_date,
5517 self.config.global.period_months,
5518 )
5519 };
5520 stats.sourcing_project_count = sourcing_projects.len();
5521
5522 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5524 let mut qual_gen = QualificationGenerator::new(seed + 2);
5525 let qualifications = qual_gen.generate(
5526 company_code,
5527 &qual_vendor_ids,
5528 sourcing_projects.first().map(|p| p.project_id.as_str()),
5529 owner_id,
5530 start_date,
5531 );
5532
5533 let mut rfx_gen = RfxGenerator::new(seed + 3);
5535 let rfx_events: Vec<RfxEvent> = sourcing_projects
5536 .iter()
5537 .map(|proj| {
5538 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5539 rfx_gen.generate(
5540 company_code,
5541 &proj.project_id,
5542 &proj.category_id,
5543 &qualified_vids,
5544 owner_id,
5545 start_date,
5546 50000.0,
5547 )
5548 })
5549 .collect();
5550 stats.rfx_event_count = rfx_events.len();
5551
5552 let mut bid_gen = BidGenerator::new(seed + 4);
5554 let mut all_bids = Vec::new();
5555 for rfx in &rfx_events {
5556 let bidder_count = vendor_ids.len().clamp(2, 5);
5557 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5558 let bids = bid_gen.generate(rfx, &responding, start_date);
5559 all_bids.extend(bids);
5560 }
5561 stats.bid_count = all_bids.len();
5562
5563 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5565 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5566 .iter()
5567 .map(|rfx| {
5568 let rfx_bids: Vec<SupplierBid> = all_bids
5569 .iter()
5570 .filter(|b| b.rfx_id == rfx.rfx_id)
5571 .cloned()
5572 .collect();
5573 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5574 })
5575 .collect();
5576
5577 let mut contract_gen = ContractGenerator::new(seed + 6);
5579 let contracts: Vec<ProcurementContract> = bid_evaluations
5580 .iter()
5581 .zip(rfx_events.iter())
5582 .filter_map(|(eval, rfx)| {
5583 eval.ranked_bids.first().and_then(|winner| {
5584 all_bids
5585 .iter()
5586 .find(|b| b.bid_id == winner.bid_id)
5587 .map(|winning_bid| {
5588 contract_gen.generate_from_bid(
5589 winning_bid,
5590 Some(&rfx.sourcing_project_id),
5591 &rfx.category_id,
5592 owner_id,
5593 start_date,
5594 )
5595 })
5596 })
5597 })
5598 .collect();
5599 stats.contract_count = contracts.len();
5600
5601 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5603 let catalog_items = catalog_gen.generate(&contracts);
5604 stats.catalog_item_count = catalog_items.len();
5605
5606 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5608 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5609 .iter()
5610 .fold(
5611 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5612 |mut acc, c| {
5613 acc.entry(c.vendor_id.clone()).or_default().push(c);
5614 acc
5615 },
5616 )
5617 .into_iter()
5618 .collect();
5619 let scorecards = scorecard_gen.generate(
5620 company_code,
5621 &vendor_contracts,
5622 start_date,
5623 end_date,
5624 owner_id,
5625 );
5626 stats.scorecard_count = scorecards.len();
5627
5628 let mut sourcing_projects = sourcing_projects;
5631 for project in &mut sourcing_projects {
5632 project.rfx_ids = rfx_events
5634 .iter()
5635 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5636 .map(|rfx| rfx.rfx_id.clone())
5637 .collect();
5638
5639 project.contract_id = contracts
5641 .iter()
5642 .find(|c| {
5643 c.sourcing_project_id
5644 .as_deref()
5645 .is_some_and(|sp| sp == project.project_id)
5646 })
5647 .map(|c| c.contract_id.clone());
5648
5649 project.spend_analysis_id = spend_analyses
5651 .iter()
5652 .find(|sa| sa.category_id == project.category_id)
5653 .map(|sa| sa.category_id.clone());
5654 }
5655
5656 info!(
5657 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5658 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5659 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5660 );
5661 self.check_resources_with_log("post-sourcing")?;
5662
5663 Ok(SourcingSnapshot {
5664 spend_analyses,
5665 sourcing_projects,
5666 qualifications,
5667 rfx_events,
5668 bids: all_bids,
5669 bid_evaluations,
5670 contracts,
5671 catalog_items,
5672 scorecards,
5673 })
5674 }
5675
5676 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5682 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5683
5684 let parent_code = self
5685 .config
5686 .companies
5687 .first()
5688 .map(|c| c.code.clone())
5689 .unwrap_or_else(|| "PARENT".to_string());
5690
5691 let mut group = GroupStructure::new(parent_code);
5692
5693 for company in self.config.companies.iter().skip(1) {
5694 let sub =
5695 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5696 group.add_subsidiary(sub);
5697 }
5698
5699 group
5700 }
5701
5702 fn phase_intercompany(
5704 &mut self,
5705 journal_entries: &[JournalEntry],
5706 stats: &mut EnhancedGenerationStatistics,
5707 ) -> SynthResult<IntercompanySnapshot> {
5708 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5710 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5711 return Ok(IntercompanySnapshot::default());
5712 }
5713
5714 if self.config.companies.len() < 2 {
5716 debug!(
5717 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5718 self.config.companies.len()
5719 );
5720 return Ok(IntercompanySnapshot::default());
5721 }
5722
5723 info!("Phase 14b: Generating Intercompany Transactions");
5724
5725 let group_structure = self.build_group_structure();
5728 debug!(
5729 "Group structure built: parent={}, subsidiaries={}",
5730 group_structure.parent_entity,
5731 group_structure.subsidiaries.len()
5732 );
5733
5734 let seed = self.seed;
5735 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5736 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5737 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5738
5739 let parent_code = self.config.companies[0].code.clone();
5742 let mut ownership_structure =
5743 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5744
5745 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5746 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5747 format!("REL{:03}", i + 1),
5748 parent_code.clone(),
5749 company.code.clone(),
5750 rust_decimal::Decimal::from(100), start_date,
5752 );
5753 ownership_structure.add_relationship(relationship);
5754 }
5755
5756 let tp_method = match self.config.intercompany.transfer_pricing_method {
5758 datasynth_config::schema::TransferPricingMethod::CostPlus => {
5759 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5760 }
5761 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5762 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5763 }
5764 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5765 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5766 }
5767 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5768 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5769 }
5770 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5771 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5772 }
5773 };
5774
5775 let ic_currency = self
5777 .config
5778 .companies
5779 .first()
5780 .map(|c| c.currency.clone())
5781 .unwrap_or_else(|| "USD".to_string());
5782 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5783 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5784 transfer_pricing_method: tp_method,
5785 markup_percent: rust_decimal::Decimal::from_f64_retain(
5786 self.config.intercompany.markup_percent,
5787 )
5788 .unwrap_or(rust_decimal::Decimal::from(5)),
5789 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5790 default_currency: ic_currency,
5791 ..Default::default()
5792 };
5793
5794 let mut ic_generator = datasynth_generators::ICGenerator::new(
5796 ic_gen_config,
5797 ownership_structure.clone(),
5798 seed + 50,
5799 );
5800
5801 let transactions_per_day = 3;
5804 let matched_pairs = ic_generator.generate_transactions_for_period(
5805 start_date,
5806 end_date,
5807 transactions_per_day,
5808 );
5809
5810 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5812 debug!(
5813 "Generated {} IC seller invoices, {} IC buyer POs",
5814 ic_doc_chains.seller_invoices.len(),
5815 ic_doc_chains.buyer_orders.len()
5816 );
5817
5818 let mut seller_entries = Vec::new();
5820 let mut buyer_entries = Vec::new();
5821 let fiscal_year = start_date.year();
5822
5823 for pair in &matched_pairs {
5824 let fiscal_period = pair.posting_date.month();
5825 let (seller_je, buyer_je) =
5826 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5827 seller_entries.push(seller_je);
5828 buyer_entries.push(buyer_je);
5829 }
5830
5831 let matching_config = datasynth_generators::ICMatchingConfig {
5833 base_currency: self
5834 .config
5835 .companies
5836 .first()
5837 .map(|c| c.currency.clone())
5838 .unwrap_or_else(|| "USD".to_string()),
5839 ..Default::default()
5840 };
5841 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5842 matching_engine.load_matched_pairs(&matched_pairs);
5843 let matching_result = matching_engine.run_matching(end_date);
5844
5845 let mut elimination_entries = Vec::new();
5847 if self.config.intercompany.generate_eliminations {
5848 let elim_config = datasynth_generators::EliminationConfig {
5849 consolidation_entity: "GROUP".to_string(),
5850 base_currency: self
5851 .config
5852 .companies
5853 .first()
5854 .map(|c| c.currency.clone())
5855 .unwrap_or_else(|| "USD".to_string()),
5856 ..Default::default()
5857 };
5858
5859 let mut elim_generator =
5860 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5861
5862 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5863 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5864 matching_result
5865 .matched_balances
5866 .iter()
5867 .chain(matching_result.unmatched_balances.iter())
5868 .cloned()
5869 .collect();
5870
5871 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5883 std::collections::HashMap::new();
5884 let mut equity_amounts: std::collections::HashMap<
5885 String,
5886 std::collections::HashMap<String, rust_decimal::Decimal>,
5887 > = std::collections::HashMap::new();
5888 {
5889 use rust_decimal::Decimal;
5890 let hundred = Decimal::from(100u32);
5891 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5895 for sub in &group_structure.subsidiaries {
5896 let net_assets = {
5897 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5898 if na > Decimal::ZERO {
5899 na
5900 } else {
5901 Decimal::from(1_000_000u64)
5902 }
5903 };
5904 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5906 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5907
5908 let mut eq_map = std::collections::HashMap::new();
5911 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5912 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5913 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5914 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5915 }
5916 }
5917
5918 let journal = elim_generator.generate_eliminations(
5919 &fiscal_period,
5920 end_date,
5921 &all_balances,
5922 &matched_pairs,
5923 &investment_amounts,
5924 &equity_amounts,
5925 );
5926
5927 elimination_entries = journal.entries.clone();
5928 }
5929
5930 let matched_pair_count = matched_pairs.len();
5931 let elimination_entry_count = elimination_entries.len();
5932 let match_rate = matching_result.match_rate;
5933
5934 stats.ic_matched_pair_count = matched_pair_count;
5935 stats.ic_elimination_count = elimination_entry_count;
5936 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5937
5938 info!(
5939 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5940 matched_pair_count,
5941 stats.ic_transaction_count,
5942 seller_entries.len(),
5943 buyer_entries.len(),
5944 elimination_entry_count,
5945 match_rate * 100.0
5946 );
5947 self.check_resources_with_log("post-intercompany")?;
5948
5949 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5953 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5954 use rust_decimal::Decimal;
5955
5956 let eight_pct = Decimal::new(8, 2); group_structure
5959 .subsidiaries
5960 .iter()
5961 .filter(|sub| {
5962 sub.nci_percentage > Decimal::ZERO
5963 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5964 })
5965 .map(|sub| {
5966 let net_assets_from_jes =
5970 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5971
5972 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5973 net_assets_from_jes.round_dp(2)
5974 } else {
5975 Decimal::from(1_000_000u64)
5977 };
5978
5979 let net_income = (net_assets * eight_pct).round_dp(2);
5981
5982 NciMeasurement::compute(
5983 sub.entity_code.clone(),
5984 sub.nci_percentage,
5985 net_assets,
5986 net_income,
5987 )
5988 })
5989 .collect()
5990 };
5991
5992 if !nci_measurements.is_empty() {
5993 info!(
5994 "NCI measurements: {} subsidiaries with non-controlling interests",
5995 nci_measurements.len()
5996 );
5997 }
5998
5999 Ok(IntercompanySnapshot {
6000 group_structure: Some(group_structure),
6001 matched_pairs,
6002 seller_journal_entries: seller_entries,
6003 buyer_journal_entries: buyer_entries,
6004 elimination_entries,
6005 nci_measurements,
6006 ic_document_chains: Some(ic_doc_chains),
6007 matched_pair_count,
6008 elimination_entry_count,
6009 match_rate,
6010 })
6011 }
6012
6013 fn phase_financial_reporting(
6015 &mut self,
6016 document_flows: &DocumentFlowSnapshot,
6017 journal_entries: &[JournalEntry],
6018 coa: &Arc<ChartOfAccounts>,
6019 _hr: &HrSnapshot,
6020 _audit: &AuditSnapshot,
6021 stats: &mut EnhancedGenerationStatistics,
6022 ) -> SynthResult<FinancialReportingSnapshot> {
6023 let fs_enabled = self.phase_config.generate_financial_statements
6024 || self.config.financial_reporting.enabled;
6025 let br_enabled = self.phase_config.generate_bank_reconciliation;
6026
6027 if !fs_enabled && !br_enabled {
6028 debug!("Phase 15: Skipped (financial reporting disabled)");
6029 return Ok(FinancialReportingSnapshot::default());
6030 }
6031
6032 info!("Phase 15: Generating Financial Reporting Data");
6033
6034 let seed = self.seed;
6035 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6036 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6037
6038 let mut financial_statements = Vec::new();
6039 let mut bank_reconciliations = Vec::new();
6040 let mut trial_balances = Vec::new();
6041 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6042 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6043 Vec::new();
6044 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6046 std::collections::HashMap::new();
6047 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6049 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6051
6052 if fs_enabled {
6060 let has_journal_entries = !journal_entries.is_empty();
6061
6062 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6065 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6067
6068 let elimination_entries: Vec<&JournalEntry> = journal_entries
6070 .iter()
6071 .filter(|je| je.header.is_elimination)
6072 .collect();
6073
6074 for period in 0..self.config.global.period_months {
6076 let period_start = start_date + chrono::Months::new(period);
6077 let period_end =
6078 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6079 let fiscal_year = period_end.year() as u16;
6080 let fiscal_period = period_end.month() as u8;
6081 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6082
6083 let mut entity_tb_map: std::collections::HashMap<
6086 String,
6087 std::collections::HashMap<String, rust_decimal::Decimal>,
6088 > = std::collections::HashMap::new();
6089
6090 for (company_idx, company) in self.config.companies.iter().enumerate() {
6092 let company_code = company.code.as_str();
6093 let currency = company.currency.as_str();
6094 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6097 let mut company_fs_gen =
6098 FinancialStatementGenerator::new(seed + company_seed_offset);
6099
6100 if has_journal_entries {
6101 let tb_entries = Self::build_cumulative_trial_balance(
6102 journal_entries,
6103 coa,
6104 company_code,
6105 start_date,
6106 period_end,
6107 fiscal_year,
6108 fiscal_period,
6109 );
6110
6111 let entity_cat_map =
6113 entity_tb_map.entry(company_code.to_string()).or_default();
6114 for tb_entry in &tb_entries {
6115 let net = tb_entry.debit_balance - tb_entry.credit_balance;
6116 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6117 }
6118
6119 let stmts = company_fs_gen.generate(
6120 company_code,
6121 currency,
6122 &tb_entries,
6123 period_start,
6124 period_end,
6125 fiscal_year,
6126 fiscal_period,
6127 None,
6128 "SYS-AUTOCLOSE",
6129 );
6130
6131 let mut entity_stmts = Vec::new();
6132 for stmt in stmts {
6133 if stmt.statement_type == StatementType::CashFlowStatement {
6134 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6135 let cf_items = Self::build_cash_flow_from_trial_balances(
6136 &tb_entries,
6137 None,
6138 net_income,
6139 );
6140 entity_stmts.push(FinancialStatement {
6141 cash_flow_items: cf_items,
6142 ..stmt
6143 });
6144 } else {
6145 entity_stmts.push(stmt);
6146 }
6147 }
6148
6149 financial_statements.extend(entity_stmts.clone());
6151
6152 standalone_statements
6154 .entry(company_code.to_string())
6155 .or_default()
6156 .extend(entity_stmts);
6157
6158 if company_idx == 0 {
6161 trial_balances.push(PeriodTrialBalance {
6162 fiscal_year,
6163 fiscal_period,
6164 period_start,
6165 period_end,
6166 entries: tb_entries,
6167 });
6168 }
6169 } else {
6170 let tb_entries = Self::build_trial_balance_from_entries(
6172 journal_entries,
6173 coa,
6174 company_code,
6175 fiscal_year,
6176 fiscal_period,
6177 );
6178
6179 let stmts = company_fs_gen.generate(
6180 company_code,
6181 currency,
6182 &tb_entries,
6183 period_start,
6184 period_end,
6185 fiscal_year,
6186 fiscal_period,
6187 None,
6188 "SYS-AUTOCLOSE",
6189 );
6190 financial_statements.extend(stmts.clone());
6191 standalone_statements
6192 .entry(company_code.to_string())
6193 .or_default()
6194 .extend(stmts);
6195
6196 if company_idx == 0 && !tb_entries.is_empty() {
6197 trial_balances.push(PeriodTrialBalance {
6198 fiscal_year,
6199 fiscal_period,
6200 period_start,
6201 period_end,
6202 entries: tb_entries,
6203 });
6204 }
6205 }
6206 }
6207
6208 let group_currency = self
6211 .config
6212 .companies
6213 .first()
6214 .map(|c| c.currency.as_str())
6215 .unwrap_or("USD");
6216
6217 let period_eliminations: Vec<JournalEntry> = elimination_entries
6219 .iter()
6220 .filter(|je| {
6221 je.header.fiscal_year == fiscal_year
6222 && je.header.fiscal_period == fiscal_period
6223 })
6224 .map(|je| (*je).clone())
6225 .collect();
6226
6227 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6228 &entity_tb_map,
6229 &period_eliminations,
6230 &period_label,
6231 );
6232
6233 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6236 .line_items
6237 .iter()
6238 .map(|li| {
6239 let net = li.post_elimination_total;
6240 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6241 (net, rust_decimal::Decimal::ZERO)
6242 } else {
6243 (rust_decimal::Decimal::ZERO, -net)
6244 };
6245 datasynth_generators::TrialBalanceEntry {
6246 account_code: li.account_category.clone(),
6247 account_name: li.account_category.clone(),
6248 category: li.account_category.clone(),
6249 debit_balance: debit,
6250 credit_balance: credit,
6251 }
6252 })
6253 .collect();
6254
6255 let mut cons_stmts = cons_gen.generate(
6256 "GROUP",
6257 group_currency,
6258 &cons_tb,
6259 period_start,
6260 period_end,
6261 fiscal_year,
6262 fiscal_period,
6263 None,
6264 "SYS-AUTOCLOSE",
6265 );
6266
6267 let bs_categories: &[&str] = &[
6271 "CASH",
6272 "RECEIVABLES",
6273 "INVENTORY",
6274 "FIXEDASSETS",
6275 "PAYABLES",
6276 "ACCRUEDLIABILITIES",
6277 "LONGTERMDEBT",
6278 "EQUITY",
6279 ];
6280 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6281 cons_line_items.into_iter().partition(|li| {
6282 let upper = li.label.to_uppercase();
6283 bs_categories.iter().any(|c| upper == *c)
6284 });
6285
6286 for stmt in &mut cons_stmts {
6287 stmt.is_consolidated = true;
6288 match stmt.statement_type {
6289 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6290 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6291 _ => {} }
6293 }
6294
6295 consolidated_statements.extend(cons_stmts);
6296 consolidation_schedules.push(schedule);
6297 }
6298
6299 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6305 info!(
6306 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6307 stats.financial_statement_count,
6308 consolidated_statements.len(),
6309 has_journal_entries
6310 );
6311
6312 let entity_seeds: Vec<SegmentSeed> = self
6317 .config
6318 .companies
6319 .iter()
6320 .map(|c| SegmentSeed {
6321 code: c.code.clone(),
6322 name: c.name.clone(),
6323 currency: c.currency.clone(),
6324 })
6325 .collect();
6326
6327 let mut seg_gen = SegmentGenerator::new(seed + 30);
6328
6329 for period in 0..self.config.global.period_months {
6334 let period_end =
6335 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6336 let fiscal_year = period_end.year() as u16;
6337 let fiscal_period = period_end.month() as u8;
6338 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6339
6340 use datasynth_core::models::StatementType;
6341
6342 let cons_is = consolidated_statements.iter().find(|s| {
6344 s.fiscal_year == fiscal_year
6345 && s.fiscal_period == fiscal_period
6346 && s.statement_type == StatementType::IncomeStatement
6347 });
6348 let cons_bs = consolidated_statements.iter().find(|s| {
6349 s.fiscal_year == fiscal_year
6350 && s.fiscal_period == fiscal_period
6351 && s.statement_type == StatementType::BalanceSheet
6352 });
6353
6354 let is_stmt = cons_is.or_else(|| {
6356 financial_statements.iter().find(|s| {
6357 s.fiscal_year == fiscal_year
6358 && s.fiscal_period == fiscal_period
6359 && s.statement_type == StatementType::IncomeStatement
6360 })
6361 });
6362 let bs_stmt = cons_bs.or_else(|| {
6363 financial_statements.iter().find(|s| {
6364 s.fiscal_year == fiscal_year
6365 && s.fiscal_period == fiscal_period
6366 && s.statement_type == StatementType::BalanceSheet
6367 })
6368 });
6369
6370 let consolidated_revenue = is_stmt
6371 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6372 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6374
6375 let consolidated_profit = is_stmt
6376 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6377 .map(|li| li.amount)
6378 .unwrap_or(rust_decimal::Decimal::ZERO);
6379
6380 let consolidated_assets = bs_stmt
6381 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6382 .map(|li| li.amount)
6383 .unwrap_or(rust_decimal::Decimal::ZERO);
6384
6385 if consolidated_revenue == rust_decimal::Decimal::ZERO
6387 && consolidated_assets == rust_decimal::Decimal::ZERO
6388 {
6389 continue;
6390 }
6391
6392 let group_code = self
6393 .config
6394 .companies
6395 .first()
6396 .map(|c| c.code.as_str())
6397 .unwrap_or("GROUP");
6398
6399 let total_depr: rust_decimal::Decimal = journal_entries
6402 .iter()
6403 .filter(|je| je.header.document_type == "CL")
6404 .flat_map(|je| je.lines.iter())
6405 .filter(|l| l.gl_account.starts_with("6000"))
6406 .map(|l| l.debit_amount)
6407 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6408 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6409 Some(total_depr)
6410 } else {
6411 None
6412 };
6413
6414 let (segs, recon) = seg_gen.generate(
6415 group_code,
6416 &period_label,
6417 consolidated_revenue,
6418 consolidated_profit,
6419 consolidated_assets,
6420 &entity_seeds,
6421 depr_param,
6422 );
6423 segment_reports.extend(segs);
6424 segment_reconciliations.push(recon);
6425 }
6426
6427 info!(
6428 "Segment reports generated: {} segments, {} reconciliations",
6429 segment_reports.len(),
6430 segment_reconciliations.len()
6431 );
6432 }
6433
6434 if br_enabled && !document_flows.payments.is_empty() {
6436 let employee_ids: Vec<String> = self
6437 .master_data
6438 .employees
6439 .iter()
6440 .map(|e| e.employee_id.clone())
6441 .collect();
6442 let mut br_gen =
6443 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6444
6445 for company in &self.config.companies {
6447 let company_payments: Vec<PaymentReference> = document_flows
6448 .payments
6449 .iter()
6450 .filter(|p| p.header.company_code == company.code)
6451 .map(|p| PaymentReference {
6452 id: p.header.document_id.clone(),
6453 amount: if p.is_vendor { p.amount } else { -p.amount },
6454 date: p.header.document_date,
6455 reference: p
6456 .check_number
6457 .clone()
6458 .or_else(|| p.wire_reference.clone())
6459 .unwrap_or_else(|| p.header.document_id.clone()),
6460 })
6461 .collect();
6462
6463 if company_payments.is_empty() {
6464 continue;
6465 }
6466
6467 let bank_account_id = format!("{}-MAIN", company.code);
6468
6469 for period in 0..self.config.global.period_months {
6471 let period_start = start_date + chrono::Months::new(period);
6472 let period_end =
6473 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6474
6475 let period_payments: Vec<PaymentReference> = company_payments
6476 .iter()
6477 .filter(|p| p.date >= period_start && p.date <= period_end)
6478 .cloned()
6479 .collect();
6480
6481 let recon = br_gen.generate(
6482 &company.code,
6483 &bank_account_id,
6484 period_start,
6485 period_end,
6486 &company.currency,
6487 &period_payments,
6488 );
6489 bank_reconciliations.push(recon);
6490 }
6491 }
6492 info!(
6493 "Bank reconciliations generated: {} reconciliations",
6494 bank_reconciliations.len()
6495 );
6496 }
6497
6498 stats.bank_reconciliation_count = bank_reconciliations.len();
6499 self.check_resources_with_log("post-financial-reporting")?;
6500
6501 if !trial_balances.is_empty() {
6502 info!(
6503 "Period-close trial balances captured: {} periods",
6504 trial_balances.len()
6505 );
6506 }
6507
6508 let notes_to_financial_statements = Vec::new();
6512
6513 Ok(FinancialReportingSnapshot {
6514 financial_statements,
6515 standalone_statements,
6516 consolidated_statements,
6517 consolidation_schedules,
6518 bank_reconciliations,
6519 trial_balances,
6520 segment_reports,
6521 segment_reconciliations,
6522 notes_to_financial_statements,
6523 })
6524 }
6525
6526 fn generate_notes_to_financial_statements(
6533 &self,
6534 financial_reporting: &mut FinancialReportingSnapshot,
6535 accounting_standards: &AccountingStandardsSnapshot,
6536 tax: &TaxSnapshot,
6537 hr: &HrSnapshot,
6538 audit: &AuditSnapshot,
6539 treasury: &TreasurySnapshot,
6540 ) {
6541 use datasynth_config::schema::AccountingFrameworkConfig;
6542 use datasynth_core::models::StatementType;
6543 use datasynth_generators::period_close::notes_generator::{
6544 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6545 };
6546
6547 let seed = self.seed;
6548 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6549 {
6550 Ok(d) => d,
6551 Err(_) => return,
6552 };
6553
6554 let mut notes_gen = NotesGenerator::new(seed + 4235);
6555
6556 for company in &self.config.companies {
6557 let last_period_end = start_date
6558 + chrono::Months::new(self.config.global.period_months)
6559 - chrono::Days::new(1);
6560 let fiscal_year = last_period_end.year() as u16;
6561
6562 let entity_is = financial_reporting
6564 .standalone_statements
6565 .get(&company.code)
6566 .and_then(|stmts| {
6567 stmts.iter().find(|s| {
6568 s.fiscal_year == fiscal_year
6569 && s.statement_type == StatementType::IncomeStatement
6570 })
6571 });
6572 let entity_bs = financial_reporting
6573 .standalone_statements
6574 .get(&company.code)
6575 .and_then(|stmts| {
6576 stmts.iter().find(|s| {
6577 s.fiscal_year == fiscal_year
6578 && s.statement_type == StatementType::BalanceSheet
6579 })
6580 });
6581
6582 let revenue_amount = entity_is
6584 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6585 .map(|li| li.amount);
6586 let ppe_gross = entity_bs
6587 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6588 .map(|li| li.amount);
6589
6590 let framework = match self
6591 .config
6592 .accounting_standards
6593 .framework
6594 .unwrap_or_default()
6595 {
6596 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6597 "IFRS".to_string()
6598 }
6599 _ => "US GAAP".to_string(),
6600 };
6601
6602 let (entity_dta, entity_dtl) = {
6605 let mut dta = rust_decimal::Decimal::ZERO;
6606 let mut dtl = rust_decimal::Decimal::ZERO;
6607 for rf in &tax.deferred_tax.rollforwards {
6608 if rf.entity_code == company.code {
6609 dta += rf.closing_dta;
6610 dtl += rf.closing_dtl;
6611 }
6612 }
6613 (
6614 if dta > rust_decimal::Decimal::ZERO {
6615 Some(dta)
6616 } else {
6617 None
6618 },
6619 if dtl > rust_decimal::Decimal::ZERO {
6620 Some(dtl)
6621 } else {
6622 None
6623 },
6624 )
6625 };
6626
6627 let entity_provisions: Vec<_> = accounting_standards
6630 .provisions
6631 .iter()
6632 .filter(|p| p.entity_code == company.code)
6633 .collect();
6634 let provision_count = entity_provisions.len();
6635 let total_provisions = if provision_count > 0 {
6636 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6637 } else {
6638 None
6639 };
6640
6641 let entity_pension_plan_count = hr
6643 .pension_plans
6644 .iter()
6645 .filter(|p| p.entity_code == company.code)
6646 .count();
6647 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6648 let sum: rust_decimal::Decimal = hr
6649 .pension_disclosures
6650 .iter()
6651 .filter(|d| {
6652 hr.pension_plans
6653 .iter()
6654 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6655 })
6656 .map(|d| d.net_pension_liability)
6657 .sum();
6658 let plan_assets_sum: rust_decimal::Decimal = hr
6659 .pension_plan_assets
6660 .iter()
6661 .filter(|a| {
6662 hr.pension_plans
6663 .iter()
6664 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6665 })
6666 .map(|a| a.fair_value_closing)
6667 .sum();
6668 if entity_pension_plan_count > 0 {
6669 Some(sum + plan_assets_sum)
6670 } else {
6671 None
6672 }
6673 };
6674 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6675 let sum: rust_decimal::Decimal = hr
6676 .pension_plan_assets
6677 .iter()
6678 .filter(|a| {
6679 hr.pension_plans
6680 .iter()
6681 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6682 })
6683 .map(|a| a.fair_value_closing)
6684 .sum();
6685 if entity_pension_plan_count > 0 {
6686 Some(sum)
6687 } else {
6688 None
6689 }
6690 };
6691
6692 let rp_count = audit.related_party_transactions.len();
6695 let se_count = audit.subsequent_events.len();
6696 let adjusting_count = audit
6697 .subsequent_events
6698 .iter()
6699 .filter(|e| {
6700 matches!(
6701 e.classification,
6702 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6703 )
6704 })
6705 .count();
6706
6707 let ctx = NotesGeneratorContext {
6708 entity_code: company.code.clone(),
6709 framework,
6710 period: format!("FY{}", fiscal_year),
6711 period_end: last_period_end,
6712 currency: company.currency.clone(),
6713 revenue_amount,
6714 total_ppe_gross: ppe_gross,
6715 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6716 deferred_tax_asset: entity_dta,
6718 deferred_tax_liability: entity_dtl,
6719 provision_count,
6721 total_provisions,
6722 pension_plan_count: entity_pension_plan_count,
6724 total_dbo: entity_total_dbo,
6725 total_plan_assets: entity_total_plan_assets,
6726 related_party_transaction_count: rp_count,
6728 subsequent_event_count: se_count,
6729 adjusting_event_count: adjusting_count,
6730 ..NotesGeneratorContext::default()
6731 };
6732
6733 let entity_notes = notes_gen.generate(&ctx);
6734 let standard_note_count = entity_notes.len() as u32;
6735 info!(
6736 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6737 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6738 );
6739 financial_reporting
6740 .notes_to_financial_statements
6741 .extend(entity_notes);
6742
6743 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6745 .debt_instruments
6746 .iter()
6747 .filter(|d| d.entity_id == company.code)
6748 .map(|d| {
6749 (
6750 format!("{:?}", d.instrument_type),
6751 d.principal,
6752 d.maturity_date.to_string(),
6753 )
6754 })
6755 .collect();
6756
6757 let hedge_count = treasury.hedge_relationships.len();
6758 let effective_hedges = treasury
6759 .hedge_relationships
6760 .iter()
6761 .filter(|h| h.is_effective)
6762 .count();
6763 let total_notional: rust_decimal::Decimal = treasury
6764 .hedging_instruments
6765 .iter()
6766 .map(|h| h.notional_amount)
6767 .sum();
6768 let total_fair_value: rust_decimal::Decimal = treasury
6769 .hedging_instruments
6770 .iter()
6771 .map(|h| h.fair_value)
6772 .sum();
6773
6774 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6776 .provisions
6777 .iter()
6778 .filter(|p| p.entity_code == company.code)
6779 .map(|p| p.id.as_str())
6780 .collect();
6781 let provision_movements: Vec<(
6782 String,
6783 rust_decimal::Decimal,
6784 rust_decimal::Decimal,
6785 rust_decimal::Decimal,
6786 )> = accounting_standards
6787 .provision_movements
6788 .iter()
6789 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6790 .map(|m| {
6791 let prov_type = accounting_standards
6792 .provisions
6793 .iter()
6794 .find(|p| p.id == m.provision_id)
6795 .map(|p| format!("{:?}", p.provision_type))
6796 .unwrap_or_else(|| "Unknown".to_string());
6797 (prov_type, m.opening, m.additions, m.closing)
6798 })
6799 .collect();
6800
6801 let enhanced_ctx = EnhancedNotesContext {
6802 entity_code: company.code.clone(),
6803 period: format!("FY{}", fiscal_year),
6804 currency: company.currency.clone(),
6805 finished_goods_value: rust_decimal::Decimal::ZERO,
6807 wip_value: rust_decimal::Decimal::ZERO,
6808 raw_materials_value: rust_decimal::Decimal::ZERO,
6809 debt_instruments,
6810 hedge_count,
6811 effective_hedges,
6812 total_notional,
6813 total_fair_value,
6814 provision_movements,
6815 };
6816
6817 let enhanced_notes =
6818 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6819 if !enhanced_notes.is_empty() {
6820 info!(
6821 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6822 company.code,
6823 enhanced_notes.len(),
6824 enhanced_ctx.debt_instruments.len(),
6825 hedge_count,
6826 enhanced_ctx.provision_movements.len(),
6827 );
6828 financial_reporting
6829 .notes_to_financial_statements
6830 .extend(enhanced_notes);
6831 }
6832 }
6833 }
6834
6835 fn build_trial_balance_from_entries(
6841 journal_entries: &[JournalEntry],
6842 coa: &ChartOfAccounts,
6843 company_code: &str,
6844 fiscal_year: u16,
6845 fiscal_period: u8,
6846 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6847 use rust_decimal::Decimal;
6848
6849 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6851 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6852
6853 for je in journal_entries {
6854 if je.header.company_code != company_code
6856 || je.header.fiscal_year != fiscal_year
6857 || je.header.fiscal_period != fiscal_period
6858 {
6859 continue;
6860 }
6861
6862 for line in &je.lines {
6863 let acct = &line.gl_account;
6864 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6865 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6866 }
6867 }
6868
6869 let mut all_accounts: Vec<&String> = account_debits
6871 .keys()
6872 .chain(account_credits.keys())
6873 .collect::<std::collections::HashSet<_>>()
6874 .into_iter()
6875 .collect();
6876 all_accounts.sort();
6877
6878 let mut entries = Vec::new();
6879
6880 for acct_number in all_accounts {
6881 let debit = account_debits
6882 .get(acct_number)
6883 .copied()
6884 .unwrap_or(Decimal::ZERO);
6885 let credit = account_credits
6886 .get(acct_number)
6887 .copied()
6888 .unwrap_or(Decimal::ZERO);
6889
6890 if debit.is_zero() && credit.is_zero() {
6891 continue;
6892 }
6893
6894 let account_name = coa
6896 .get_account(acct_number)
6897 .map(|gl| gl.short_description.clone())
6898 .unwrap_or_else(|| format!("Account {acct_number}"));
6899
6900 let category = Self::category_from_account_code(acct_number);
6905
6906 entries.push(datasynth_generators::TrialBalanceEntry {
6907 account_code: acct_number.clone(),
6908 account_name,
6909 category,
6910 debit_balance: debit,
6911 credit_balance: credit,
6912 });
6913 }
6914
6915 entries
6916 }
6917
6918 fn build_cumulative_trial_balance(
6925 journal_entries: &[JournalEntry],
6926 coa: &ChartOfAccounts,
6927 company_code: &str,
6928 start_date: NaiveDate,
6929 period_end: NaiveDate,
6930 fiscal_year: u16,
6931 fiscal_period: u8,
6932 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6933 use rust_decimal::Decimal;
6934
6935 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6937 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6938
6939 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6941 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6942
6943 for je in journal_entries {
6944 if je.header.company_code != company_code {
6945 continue;
6946 }
6947
6948 for line in &je.lines {
6949 let acct = &line.gl_account;
6950 let category = Self::category_from_account_code(acct);
6951 let is_bs_account = matches!(
6952 category.as_str(),
6953 "Cash"
6954 | "Receivables"
6955 | "Inventory"
6956 | "FixedAssets"
6957 | "Payables"
6958 | "AccruedLiabilities"
6959 | "LongTermDebt"
6960 | "Equity"
6961 );
6962
6963 if is_bs_account {
6964 if je.header.document_date <= period_end
6966 && je.header.document_date >= start_date
6967 {
6968 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6969 line.debit_amount;
6970 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6971 line.credit_amount;
6972 }
6973 } else {
6974 if je.header.fiscal_year == fiscal_year
6976 && je.header.fiscal_period == fiscal_period
6977 {
6978 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6979 line.debit_amount;
6980 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6981 line.credit_amount;
6982 }
6983 }
6984 }
6985 }
6986
6987 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6989 all_accounts.extend(bs_debits.keys().cloned());
6990 all_accounts.extend(bs_credits.keys().cloned());
6991 all_accounts.extend(is_debits.keys().cloned());
6992 all_accounts.extend(is_credits.keys().cloned());
6993
6994 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6995 sorted_accounts.sort();
6996
6997 let mut entries = Vec::new();
6998
6999 for acct_number in &sorted_accounts {
7000 let category = Self::category_from_account_code(acct_number);
7001 let is_bs_account = matches!(
7002 category.as_str(),
7003 "Cash"
7004 | "Receivables"
7005 | "Inventory"
7006 | "FixedAssets"
7007 | "Payables"
7008 | "AccruedLiabilities"
7009 | "LongTermDebt"
7010 | "Equity"
7011 );
7012
7013 let (debit, credit) = if is_bs_account {
7014 (
7015 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7016 bs_credits
7017 .get(acct_number)
7018 .copied()
7019 .unwrap_or(Decimal::ZERO),
7020 )
7021 } else {
7022 (
7023 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7024 is_credits
7025 .get(acct_number)
7026 .copied()
7027 .unwrap_or(Decimal::ZERO),
7028 )
7029 };
7030
7031 if debit.is_zero() && credit.is_zero() {
7032 continue;
7033 }
7034
7035 let account_name = coa
7036 .get_account(acct_number)
7037 .map(|gl| gl.short_description.clone())
7038 .unwrap_or_else(|| format!("Account {acct_number}"));
7039
7040 entries.push(datasynth_generators::TrialBalanceEntry {
7041 account_code: acct_number.clone(),
7042 account_name,
7043 category,
7044 debit_balance: debit,
7045 credit_balance: credit,
7046 });
7047 }
7048
7049 entries
7050 }
7051
7052 fn build_cash_flow_from_trial_balances(
7057 current_tb: &[datasynth_generators::TrialBalanceEntry],
7058 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7059 net_income: rust_decimal::Decimal,
7060 ) -> Vec<CashFlowItem> {
7061 use rust_decimal::Decimal;
7062
7063 let aggregate =
7065 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7066 let mut map: HashMap<String, Decimal> = HashMap::new();
7067 for entry in tb {
7068 let net = entry.debit_balance - entry.credit_balance;
7069 *map.entry(entry.category.clone()).or_default() += net;
7070 }
7071 map
7072 };
7073
7074 let current = aggregate(current_tb);
7075 let prior = prior_tb.map(aggregate);
7076
7077 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7079 *map.get(key).unwrap_or(&Decimal::ZERO)
7080 };
7081
7082 let change = |key: &str| -> Decimal {
7084 let curr = get(¤t, key);
7085 match &prior {
7086 Some(p) => curr - get(p, key),
7087 None => curr,
7088 }
7089 };
7090
7091 let fixed_asset_change = change("FixedAssets");
7094 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7095 -fixed_asset_change
7096 } else {
7097 Decimal::ZERO
7098 };
7099
7100 let ar_change = change("Receivables");
7102 let inventory_change = change("Inventory");
7103 let ap_change = change("Payables");
7105 let accrued_change = change("AccruedLiabilities");
7106
7107 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7108 + (-ap_change)
7109 + (-accrued_change);
7110
7111 let capex = if fixed_asset_change > Decimal::ZERO {
7113 -fixed_asset_change
7114 } else {
7115 Decimal::ZERO
7116 };
7117 let investing_cf = capex;
7118
7119 let debt_change = -change("LongTermDebt");
7121 let equity_change = -change("Equity");
7122 let financing_cf = debt_change + equity_change;
7123
7124 let net_change = operating_cf + investing_cf + financing_cf;
7125
7126 vec![
7127 CashFlowItem {
7128 item_code: "CF-NI".to_string(),
7129 label: "Net Income".to_string(),
7130 category: CashFlowCategory::Operating,
7131 amount: net_income,
7132 amount_prior: None,
7133 sort_order: 1,
7134 is_total: false,
7135 },
7136 CashFlowItem {
7137 item_code: "CF-DEP".to_string(),
7138 label: "Depreciation & Amortization".to_string(),
7139 category: CashFlowCategory::Operating,
7140 amount: depreciation_addback,
7141 amount_prior: None,
7142 sort_order: 2,
7143 is_total: false,
7144 },
7145 CashFlowItem {
7146 item_code: "CF-AR".to_string(),
7147 label: "Change in Accounts Receivable".to_string(),
7148 category: CashFlowCategory::Operating,
7149 amount: -ar_change,
7150 amount_prior: None,
7151 sort_order: 3,
7152 is_total: false,
7153 },
7154 CashFlowItem {
7155 item_code: "CF-AP".to_string(),
7156 label: "Change in Accounts Payable".to_string(),
7157 category: CashFlowCategory::Operating,
7158 amount: -ap_change,
7159 amount_prior: None,
7160 sort_order: 4,
7161 is_total: false,
7162 },
7163 CashFlowItem {
7164 item_code: "CF-INV".to_string(),
7165 label: "Change in Inventory".to_string(),
7166 category: CashFlowCategory::Operating,
7167 amount: -inventory_change,
7168 amount_prior: None,
7169 sort_order: 5,
7170 is_total: false,
7171 },
7172 CashFlowItem {
7173 item_code: "CF-OP".to_string(),
7174 label: "Net Cash from Operating Activities".to_string(),
7175 category: CashFlowCategory::Operating,
7176 amount: operating_cf,
7177 amount_prior: None,
7178 sort_order: 6,
7179 is_total: true,
7180 },
7181 CashFlowItem {
7182 item_code: "CF-CAPEX".to_string(),
7183 label: "Capital Expenditures".to_string(),
7184 category: CashFlowCategory::Investing,
7185 amount: capex,
7186 amount_prior: None,
7187 sort_order: 7,
7188 is_total: false,
7189 },
7190 CashFlowItem {
7191 item_code: "CF-INV-T".to_string(),
7192 label: "Net Cash from Investing Activities".to_string(),
7193 category: CashFlowCategory::Investing,
7194 amount: investing_cf,
7195 amount_prior: None,
7196 sort_order: 8,
7197 is_total: true,
7198 },
7199 CashFlowItem {
7200 item_code: "CF-DEBT".to_string(),
7201 label: "Net Borrowings / (Repayments)".to_string(),
7202 category: CashFlowCategory::Financing,
7203 amount: debt_change,
7204 amount_prior: None,
7205 sort_order: 9,
7206 is_total: false,
7207 },
7208 CashFlowItem {
7209 item_code: "CF-EQ".to_string(),
7210 label: "Equity Changes".to_string(),
7211 category: CashFlowCategory::Financing,
7212 amount: equity_change,
7213 amount_prior: None,
7214 sort_order: 10,
7215 is_total: false,
7216 },
7217 CashFlowItem {
7218 item_code: "CF-FIN-T".to_string(),
7219 label: "Net Cash from Financing Activities".to_string(),
7220 category: CashFlowCategory::Financing,
7221 amount: financing_cf,
7222 amount_prior: None,
7223 sort_order: 11,
7224 is_total: true,
7225 },
7226 CashFlowItem {
7227 item_code: "CF-NET".to_string(),
7228 label: "Net Change in Cash".to_string(),
7229 category: CashFlowCategory::Operating,
7230 amount: net_change,
7231 amount_prior: None,
7232 sort_order: 12,
7233 is_total: true,
7234 },
7235 ]
7236 }
7237
7238 fn calculate_net_income_from_tb(
7242 tb: &[datasynth_generators::TrialBalanceEntry],
7243 ) -> rust_decimal::Decimal {
7244 use rust_decimal::Decimal;
7245
7246 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7247 for entry in tb {
7248 let net = entry.debit_balance - entry.credit_balance;
7249 *aggregated.entry(entry.category.clone()).or_default() += net;
7250 }
7251
7252 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7253 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7254 let opex = *aggregated
7255 .get("OperatingExpenses")
7256 .unwrap_or(&Decimal::ZERO);
7257 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7258 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7259
7260 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7263 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7265 operating_income - tax
7266 }
7267
7268 fn category_from_account_code(code: &str) -> String {
7275 let prefix: String = code.chars().take(2).collect();
7276 match prefix.as_str() {
7277 "10" => "Cash",
7278 "11" => "Receivables",
7279 "12" | "13" | "14" => "Inventory",
7280 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7281 "20" => "Payables",
7282 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7283 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7284 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7285 "40" | "41" | "42" | "43" | "44" => "Revenue",
7286 "50" | "51" | "52" => "CostOfSales",
7287 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7288 "OperatingExpenses"
7289 }
7290 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7291 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7292 _ => "OperatingExpenses",
7293 }
7294 .to_string()
7295 }
7296
7297 fn phase_hr_data(
7299 &mut self,
7300 stats: &mut EnhancedGenerationStatistics,
7301 ) -> SynthResult<HrSnapshot> {
7302 if !self.phase_config.generate_hr {
7303 debug!("Phase 16: Skipped (HR generation disabled)");
7304 return Ok(HrSnapshot::default());
7305 }
7306
7307 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7308
7309 let seed = self.seed;
7310 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7311 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7312 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7313 let company_code = self
7314 .config
7315 .companies
7316 .first()
7317 .map(|c| c.code.as_str())
7318 .unwrap_or("1000");
7319 let currency = self
7320 .config
7321 .companies
7322 .first()
7323 .map(|c| c.currency.as_str())
7324 .unwrap_or("USD");
7325
7326 let employee_ids: Vec<String> = self
7327 .master_data
7328 .employees
7329 .iter()
7330 .map(|e| e.employee_id.clone())
7331 .collect();
7332
7333 if employee_ids.is_empty() {
7334 debug!("Phase 16: Skipped (no employees available)");
7335 return Ok(HrSnapshot::default());
7336 }
7337
7338 let cost_center_ids: Vec<String> = self
7341 .master_data
7342 .employees
7343 .iter()
7344 .filter_map(|e| e.cost_center.clone())
7345 .collect::<std::collections::HashSet<_>>()
7346 .into_iter()
7347 .collect();
7348
7349 let mut snapshot = HrSnapshot::default();
7350
7351 if self.config.hr.payroll.enabled {
7353 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7354 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7355
7356 let payroll_pack = self.primary_pack();
7358
7359 payroll_gen.set_country_pack(payroll_pack.clone());
7362
7363 let employees_with_salary: Vec<(
7364 String,
7365 rust_decimal::Decimal,
7366 Option<String>,
7367 Option<String>,
7368 )> = self
7369 .master_data
7370 .employees
7371 .iter()
7372 .map(|e| {
7373 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7376 e.base_salary
7377 } else {
7378 rust_decimal::Decimal::from(60_000)
7379 };
7380 (
7381 e.employee_id.clone(),
7382 annual, e.cost_center.clone(),
7384 e.department_id.clone(),
7385 )
7386 })
7387 .collect();
7388
7389 let change_history = &self.master_data.employee_change_history;
7392 let has_changes = !change_history.is_empty();
7393 if has_changes {
7394 debug!(
7395 "Payroll will incorporate {} employee change events",
7396 change_history.len()
7397 );
7398 }
7399
7400 for month in 0..self.config.global.period_months {
7401 let period_start = start_date + chrono::Months::new(month);
7402 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7403 let (run, items) = if has_changes {
7404 payroll_gen.generate_with_changes(
7405 company_code,
7406 &employees_with_salary,
7407 period_start,
7408 period_end,
7409 currency,
7410 change_history,
7411 )
7412 } else {
7413 payroll_gen.generate(
7414 company_code,
7415 &employees_with_salary,
7416 period_start,
7417 period_end,
7418 currency,
7419 )
7420 };
7421 snapshot.payroll_runs.push(run);
7422 snapshot.payroll_run_count += 1;
7423 snapshot.payroll_line_item_count += items.len();
7424 snapshot.payroll_line_items.extend(items);
7425 }
7426 }
7427
7428 if self.config.hr.time_attendance.enabled {
7430 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7431 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7432 if let Some(ctx) = &self.temporal_context {
7436 time_gen.set_temporal_context(Arc::clone(ctx));
7437 }
7438 let entries = time_gen.generate(
7439 &employee_ids,
7440 start_date,
7441 end_date,
7442 &self.config.hr.time_attendance,
7443 );
7444 snapshot.time_entry_count = entries.len();
7445 snapshot.time_entries = entries;
7446 }
7447
7448 if self.config.hr.expenses.enabled {
7450 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7451 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7452 expense_gen.set_country_pack(self.primary_pack().clone());
7453 if let Some(ctx) = &self.temporal_context {
7456 expense_gen.set_temporal_context(Arc::clone(ctx));
7457 }
7458 let company_currency = self
7459 .config
7460 .companies
7461 .first()
7462 .map(|c| c.currency.as_str())
7463 .unwrap_or("USD");
7464 let reports = expense_gen.generate_with_currency(
7465 &employee_ids,
7466 start_date,
7467 end_date,
7468 &self.config.hr.expenses,
7469 company_currency,
7470 );
7471 snapshot.expense_report_count = reports.len();
7472 snapshot.expense_reports = reports;
7473 }
7474
7475 if self.config.hr.payroll.enabled {
7477 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7478 let employee_pairs: Vec<(String, String)> = self
7479 .master_data
7480 .employees
7481 .iter()
7482 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7483 .collect();
7484 let enrollments =
7485 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7486 snapshot.benefit_enrollment_count = enrollments.len();
7487 snapshot.benefit_enrollments = enrollments;
7488 }
7489
7490 if self.phase_config.generate_hr {
7492 let entity_name = self
7493 .config
7494 .companies
7495 .first()
7496 .map(|c| c.name.as_str())
7497 .unwrap_or("Entity");
7498 let period_months = self.config.global.period_months;
7499 let period_label = {
7500 let y = start_date.year();
7501 let m = start_date.month();
7502 if period_months >= 12 {
7503 format!("FY{y}")
7504 } else {
7505 format!("{y}-{m:02}")
7506 }
7507 };
7508 let reporting_date =
7509 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7510
7511 let avg_salary: Option<rust_decimal::Decimal> = {
7516 let employee_count = employee_ids.len();
7517 if self.config.hr.payroll.enabled
7518 && employee_count > 0
7519 && !snapshot.payroll_runs.is_empty()
7520 {
7521 let total_gross: rust_decimal::Decimal = snapshot
7523 .payroll_runs
7524 .iter()
7525 .filter(|r| r.company_code == company_code)
7526 .map(|r| r.total_gross)
7527 .sum();
7528 if total_gross > rust_decimal::Decimal::ZERO {
7529 let annual_total = if period_months > 0 && period_months < 12 {
7531 total_gross * rust_decimal::Decimal::from(12u32)
7532 / rust_decimal::Decimal::from(period_months)
7533 } else {
7534 total_gross
7535 };
7536 Some(
7537 (annual_total / rust_decimal::Decimal::from(employee_count))
7538 .round_dp(2),
7539 )
7540 } else {
7541 None
7542 }
7543 } else {
7544 None
7545 }
7546 };
7547
7548 let mut pension_gen =
7549 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7550 let pension_snap = pension_gen.generate(
7551 company_code,
7552 entity_name,
7553 &period_label,
7554 reporting_date,
7555 employee_ids.len(),
7556 currency,
7557 avg_salary,
7558 period_months,
7559 );
7560 snapshot.pension_plan_count = pension_snap.plans.len();
7561 snapshot.pension_plans = pension_snap.plans;
7562 snapshot.pension_obligations = pension_snap.obligations;
7563 snapshot.pension_plan_assets = pension_snap.plan_assets;
7564 snapshot.pension_disclosures = pension_snap.disclosures;
7565 snapshot.pension_journal_entries = pension_snap.journal_entries;
7570 }
7571
7572 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7574 let period_months = self.config.global.period_months;
7575 let period_label = {
7576 let y = start_date.year();
7577 let m = start_date.month();
7578 if period_months >= 12 {
7579 format!("FY{y}")
7580 } else {
7581 format!("{y}-{m:02}")
7582 }
7583 };
7584 let reporting_date =
7585 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7586
7587 let mut stock_comp_gen =
7588 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7589 let stock_snap = stock_comp_gen.generate(
7590 company_code,
7591 &employee_ids,
7592 start_date,
7593 &period_label,
7594 reporting_date,
7595 currency,
7596 );
7597 snapshot.stock_grant_count = stock_snap.grants.len();
7598 snapshot.stock_grants = stock_snap.grants;
7599 snapshot.stock_comp_expenses = stock_snap.expenses;
7600 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7601 }
7602
7603 stats.payroll_run_count = snapshot.payroll_run_count;
7604 stats.time_entry_count = snapshot.time_entry_count;
7605 stats.expense_report_count = snapshot.expense_report_count;
7606 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7607 stats.pension_plan_count = snapshot.pension_plan_count;
7608 stats.stock_grant_count = snapshot.stock_grant_count;
7609
7610 info!(
7611 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7612 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7613 snapshot.time_entry_count, snapshot.expense_report_count,
7614 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7615 snapshot.stock_grant_count
7616 );
7617 self.check_resources_with_log("post-hr")?;
7618
7619 Ok(snapshot)
7620 }
7621
7622 fn phase_accounting_standards(
7624 &mut self,
7625 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7626 journal_entries: &[JournalEntry],
7627 stats: &mut EnhancedGenerationStatistics,
7628 ) -> SynthResult<AccountingStandardsSnapshot> {
7629 if !self.phase_config.generate_accounting_standards {
7630 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7631 return Ok(AccountingStandardsSnapshot::default());
7632 }
7633 info!("Phase 17: Generating Accounting Standards Data");
7634
7635 let seed = self.seed;
7636 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7637 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7638 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7639 let company_code = self
7640 .config
7641 .companies
7642 .first()
7643 .map(|c| c.code.as_str())
7644 .unwrap_or("1000");
7645 let currency = self
7646 .config
7647 .companies
7648 .first()
7649 .map(|c| c.currency.as_str())
7650 .unwrap_or("USD");
7651
7652 let framework = match self.config.accounting_standards.framework {
7657 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7658 datasynth_standards::framework::AccountingFramework::UsGaap
7659 }
7660 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7661 datasynth_standards::framework::AccountingFramework::Ifrs
7662 }
7663 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7664 datasynth_standards::framework::AccountingFramework::DualReporting
7665 }
7666 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7667 datasynth_standards::framework::AccountingFramework::FrenchGaap
7668 }
7669 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7670 datasynth_standards::framework::AccountingFramework::GermanGaap
7671 }
7672 None => {
7673 let pack = self.primary_pack();
7675 let pack_fw = pack.accounting.framework.as_str();
7676 match pack_fw {
7677 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7678 "dual_reporting" => {
7679 datasynth_standards::framework::AccountingFramework::DualReporting
7680 }
7681 "french_gaap" => {
7682 datasynth_standards::framework::AccountingFramework::FrenchGaap
7683 }
7684 "german_gaap" | "hgb" => {
7685 datasynth_standards::framework::AccountingFramework::GermanGaap
7686 }
7687 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7689 }
7690 }
7691 };
7692
7693 let mut snapshot = AccountingStandardsSnapshot::default();
7694
7695 if self.config.accounting_standards.revenue_recognition.enabled {
7697 let customer_ids: Vec<String> = self
7698 .master_data
7699 .customers
7700 .iter()
7701 .map(|c| c.customer_id.clone())
7702 .collect();
7703
7704 if !customer_ids.is_empty() {
7705 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7706 let contracts = rev_gen.generate(
7707 company_code,
7708 &customer_ids,
7709 start_date,
7710 end_date,
7711 currency,
7712 &self.config.accounting_standards.revenue_recognition,
7713 framework,
7714 );
7715 snapshot.revenue_contract_count = contracts.len();
7716 snapshot.contracts = contracts;
7717 }
7718 }
7719
7720 if self.config.accounting_standards.impairment.enabled {
7722 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7723 .master_data
7724 .assets
7725 .iter()
7726 .map(|a| {
7727 (
7728 a.asset_id.clone(),
7729 a.description.clone(),
7730 a.acquisition_cost,
7731 )
7732 })
7733 .collect();
7734
7735 if !asset_data.is_empty() {
7736 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7737 let tests = imp_gen.generate(
7738 company_code,
7739 &asset_data,
7740 end_date,
7741 &self.config.accounting_standards.impairment,
7742 framework,
7743 );
7744 snapshot.impairment_test_count = tests.len();
7745 snapshot.impairment_tests = tests;
7746 }
7747 }
7748
7749 if self
7751 .config
7752 .accounting_standards
7753 .business_combinations
7754 .enabled
7755 {
7756 let bc_config = &self.config.accounting_standards.business_combinations;
7757 let framework_str = match framework {
7758 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7759 _ => "US_GAAP",
7760 };
7761 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7762 let bc_snap = bc_gen.generate(
7763 company_code,
7764 currency,
7765 start_date,
7766 end_date,
7767 bc_config.acquisition_count,
7768 framework_str,
7769 );
7770 snapshot.business_combination_count = bc_snap.combinations.len();
7771 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7772 snapshot.business_combinations = bc_snap.combinations;
7773 }
7774
7775 if self
7777 .config
7778 .accounting_standards
7779 .expected_credit_loss
7780 .enabled
7781 {
7782 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7783 let framework_str = match framework {
7784 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7785 _ => "ASC_326",
7786 };
7787
7788 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7791
7792 let mut ecl_gen = EclGenerator::new(seed + 43);
7793
7794 let bucket_exposures: Vec<(
7796 datasynth_core::models::subledger::ar::AgingBucket,
7797 rust_decimal::Decimal,
7798 )> = if ar_aging_reports.is_empty() {
7799 use datasynth_core::models::subledger::ar::AgingBucket;
7801 vec![
7802 (
7803 AgingBucket::Current,
7804 rust_decimal::Decimal::from(500_000_u32),
7805 ),
7806 (
7807 AgingBucket::Days1To30,
7808 rust_decimal::Decimal::from(120_000_u32),
7809 ),
7810 (
7811 AgingBucket::Days31To60,
7812 rust_decimal::Decimal::from(45_000_u32),
7813 ),
7814 (
7815 AgingBucket::Days61To90,
7816 rust_decimal::Decimal::from(15_000_u32),
7817 ),
7818 (
7819 AgingBucket::Over90Days,
7820 rust_decimal::Decimal::from(8_000_u32),
7821 ),
7822 ]
7823 } else {
7824 use datasynth_core::models::subledger::ar::AgingBucket;
7825 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7827 std::collections::HashMap::new();
7828 for report in ar_aging_reports {
7829 for (bucket, amount) in &report.bucket_totals {
7830 *totals.entry(*bucket).or_default() += amount;
7831 }
7832 }
7833 AgingBucket::all()
7834 .into_iter()
7835 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7836 .collect()
7837 };
7838
7839 let ecl_snap = ecl_gen.generate(
7840 company_code,
7841 end_date,
7842 &bucket_exposures,
7843 ecl_config,
7844 &period_label,
7845 framework_str,
7846 );
7847
7848 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7849 snapshot.ecl_models = ecl_snap.ecl_models;
7850 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7851 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7852 }
7853
7854 {
7856 let framework_str = match framework {
7857 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7858 _ => "US_GAAP",
7859 };
7860
7861 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7866 .max(rust_decimal::Decimal::from(100_000_u32));
7867
7868 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7869
7870 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7871 let prov_snap = prov_gen.generate(
7872 company_code,
7873 currency,
7874 revenue_proxy,
7875 end_date,
7876 &period_label,
7877 framework_str,
7878 None, );
7880
7881 snapshot.provision_count = prov_snap.provisions.len();
7882 snapshot.provisions = prov_snap.provisions;
7883 snapshot.provision_movements = prov_snap.movements;
7884 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7885 snapshot.provision_journal_entries = prov_snap.journal_entries;
7886 }
7887
7888 {
7892 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7893
7894 let presentation_currency = self
7895 .config
7896 .global
7897 .presentation_currency
7898 .clone()
7899 .unwrap_or_else(|| self.config.global.group_currency.clone());
7900
7901 let mut rate_table = FxRateTable::new(&presentation_currency);
7904
7905 let base_rates = base_rates_usd();
7909 for (ccy, rate) in &base_rates {
7910 rate_table.add_rate(FxRate::new(
7911 ccy,
7912 "USD",
7913 RateType::Closing,
7914 end_date,
7915 *rate,
7916 "SYNTHETIC",
7917 ));
7918 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7921 rate_table.add_rate(FxRate::new(
7922 ccy,
7923 "USD",
7924 RateType::Average,
7925 end_date,
7926 avg,
7927 "SYNTHETIC",
7928 ));
7929 }
7930
7931 let mut translation_results = Vec::new();
7932 for company in &self.config.companies {
7933 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7936 .max(rust_decimal::Decimal::from(100_000_u32));
7937
7938 let func_ccy = company
7939 .functional_currency
7940 .clone()
7941 .unwrap_or_else(|| company.currency.clone());
7942
7943 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7944 &company.code,
7945 &func_ccy,
7946 &presentation_currency,
7947 &ias21_period_label,
7948 end_date,
7949 company_revenue,
7950 &rate_table,
7951 );
7952 translation_results.push(result);
7953 }
7954
7955 snapshot.currency_translation_count = translation_results.len();
7956 snapshot.currency_translation_results = translation_results;
7957 }
7958
7959 stats.revenue_contract_count = snapshot.revenue_contract_count;
7960 stats.impairment_test_count = snapshot.impairment_test_count;
7961 stats.business_combination_count = snapshot.business_combination_count;
7962 stats.ecl_model_count = snapshot.ecl_model_count;
7963 stats.provision_count = snapshot.provision_count;
7964
7965 if self.config.accounting_standards.leases.enabled {
7969 use datasynth_generators::standards::LeaseGenerator;
7970 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7971 .unwrap_or_else(|_| {
7972 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7973 });
7974 let framework =
7975 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7976 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7977 for company in &self.config.companies {
7978 let leases = lease_gen.generate(
7979 &company.code,
7980 start_date,
7981 &self.config.accounting_standards.leases,
7982 framework,
7983 );
7984 snapshot.lease_count += leases.len();
7985 snapshot.leases.extend(leases);
7986 }
7987 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7988 }
7989
7990 if self.config.accounting_standards.fair_value.enabled {
7994 use datasynth_generators::standards::FairValueGenerator;
7995 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7996 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7997 + chrono::Months::new(self.config.global.period_months);
7998 let framework =
7999 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8000 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8001 for company in &self.config.companies {
8002 let measurements = fv_gen.generate(
8003 &company.code,
8004 end_date,
8005 &company.currency,
8006 &self.config.accounting_standards.fair_value,
8007 framework,
8008 );
8009 snapshot.fair_value_measurement_count += measurements.len();
8010 snapshot.fair_value_measurements.extend(measurements);
8011 }
8012 info!(
8013 "v3.3.1 fair value measurements: {}",
8014 snapshot.fair_value_measurement_count
8015 );
8016 }
8017
8018 if self.config.accounting_standards.generate_differences
8022 && matches!(
8023 self.config.accounting_standards.framework,
8024 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8025 )
8026 {
8027 use datasynth_generators::standards::FrameworkReconciliationGenerator;
8028 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8029 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8030 + chrono::Months::new(self.config.global.period_months);
8031 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8032 for company in &self.config.companies {
8033 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8034 snapshot.framework_difference_count += records.len();
8035 snapshot.framework_differences.extend(records);
8036 snapshot.framework_reconciliations.push(reconciliation);
8037 }
8038 info!(
8039 "v3.3.1 framework reconciliation: {} differences across {} entities",
8040 snapshot.framework_difference_count,
8041 snapshot.framework_reconciliations.len()
8042 );
8043 }
8044
8045 info!(
8046 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8047 snapshot.revenue_contract_count,
8048 snapshot.impairment_test_count,
8049 snapshot.business_combination_count,
8050 snapshot.ecl_model_count,
8051 snapshot.provision_count,
8052 snapshot.currency_translation_count,
8053 snapshot.lease_count,
8054 snapshot.fair_value_measurement_count,
8055 snapshot.framework_difference_count,
8056 );
8057 self.check_resources_with_log("post-accounting-standards")?;
8058
8059 Ok(snapshot)
8060 }
8061
8062 fn resolve_accounting_framework(
8066 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8067 ) -> datasynth_standards::framework::AccountingFramework {
8068 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8069 use datasynth_standards::framework::AccountingFramework as Fw;
8070 match cfg {
8071 Some(Cfg::Ifrs) => Fw::Ifrs,
8072 Some(Cfg::DualReporting) => Fw::DualReporting,
8073 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8074 Some(Cfg::GermanGaap) => Fw::GermanGaap,
8075 _ => Fw::UsGaap,
8076 }
8077 }
8078
8079 fn phase_manufacturing(
8081 &mut self,
8082 stats: &mut EnhancedGenerationStatistics,
8083 ) -> SynthResult<ManufacturingSnapshot> {
8084 if !self.phase_config.generate_manufacturing {
8085 debug!("Phase 18: Skipped (manufacturing generation disabled)");
8086 return Ok(ManufacturingSnapshot::default());
8087 }
8088 info!("Phase 18: Generating Manufacturing Data");
8089
8090 let seed = self.seed;
8091 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8092 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8093 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8094 let company_code = self
8095 .config
8096 .companies
8097 .first()
8098 .map(|c| c.code.as_str())
8099 .unwrap_or("1000");
8100
8101 let material_data: Vec<(String, String)> = self
8102 .master_data
8103 .materials
8104 .iter()
8105 .map(|m| (m.material_id.clone(), m.description.clone()))
8106 .collect();
8107
8108 if material_data.is_empty() {
8109 debug!("Phase 18: Skipped (no materials available)");
8110 return Ok(ManufacturingSnapshot::default());
8111 }
8112
8113 let mut snapshot = ManufacturingSnapshot::default();
8114
8115 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8117 if let Some(ctx) = &self.temporal_context {
8119 prod_gen.set_temporal_context(Arc::clone(ctx));
8120 }
8121 let production_orders = prod_gen.generate(
8122 company_code,
8123 &material_data,
8124 start_date,
8125 end_date,
8126 &self.config.manufacturing.production_orders,
8127 &self.config.manufacturing.costing,
8128 &self.config.manufacturing.routing,
8129 );
8130 snapshot.production_order_count = production_orders.len();
8131
8132 let inspection_data: Vec<(String, String, String)> = production_orders
8134 .iter()
8135 .map(|po| {
8136 (
8137 po.order_id.clone(),
8138 po.material_id.clone(),
8139 po.material_description.clone(),
8140 )
8141 })
8142 .collect();
8143
8144 snapshot.production_orders = production_orders;
8145
8146 if !inspection_data.is_empty() {
8147 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8148 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8149 snapshot.quality_inspection_count = inspections.len();
8150 snapshot.quality_inspections = inspections;
8151 }
8152
8153 let storage_locations: Vec<(String, String)> = material_data
8155 .iter()
8156 .enumerate()
8157 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8158 .collect();
8159
8160 let employee_ids: Vec<String> = self
8161 .master_data
8162 .employees
8163 .iter()
8164 .map(|e| e.employee_id.clone())
8165 .collect();
8166 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8167 .with_employee_pool(employee_ids);
8168 let mut cycle_count_total = 0usize;
8169 for month in 0..self.config.global.period_months {
8170 let count_date = start_date + chrono::Months::new(month);
8171 let items_per_count = storage_locations.len().clamp(10, 50);
8172 let cc = cc_gen.generate(
8173 company_code,
8174 &storage_locations,
8175 count_date,
8176 items_per_count,
8177 );
8178 snapshot.cycle_counts.push(cc);
8179 cycle_count_total += 1;
8180 }
8181 snapshot.cycle_count_count = cycle_count_total;
8182
8183 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8185 let bom_components = bom_gen.generate(company_code, &material_data);
8186 snapshot.bom_component_count = bom_components.len();
8187 snapshot.bom_components = bom_components;
8188
8189 let currency = self
8191 .config
8192 .companies
8193 .first()
8194 .map(|c| c.currency.as_str())
8195 .unwrap_or("USD");
8196 let production_order_ids: Vec<String> = snapshot
8197 .production_orders
8198 .iter()
8199 .map(|po| po.order_id.clone())
8200 .collect();
8201 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8202 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8203 company_code,
8204 &material_data,
8205 start_date,
8206 end_date,
8207 2,
8208 currency,
8209 &production_order_ids,
8210 );
8211 snapshot.inventory_movement_count = inventory_movements.len();
8212 snapshot.inventory_movements = inventory_movements;
8213
8214 stats.production_order_count = snapshot.production_order_count;
8215 stats.quality_inspection_count = snapshot.quality_inspection_count;
8216 stats.cycle_count_count = snapshot.cycle_count_count;
8217 stats.bom_component_count = snapshot.bom_component_count;
8218 stats.inventory_movement_count = snapshot.inventory_movement_count;
8219
8220 info!(
8221 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8222 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8223 snapshot.bom_component_count, snapshot.inventory_movement_count
8224 );
8225 self.check_resources_with_log("post-manufacturing")?;
8226
8227 Ok(snapshot)
8228 }
8229
8230 fn phase_sales_kpi_budgets(
8232 &mut self,
8233 coa: &Arc<ChartOfAccounts>,
8234 financial_reporting: &FinancialReportingSnapshot,
8235 stats: &mut EnhancedGenerationStatistics,
8236 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8237 if !self.phase_config.generate_sales_kpi_budgets {
8238 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8239 return Ok(SalesKpiBudgetsSnapshot::default());
8240 }
8241 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8242
8243 let seed = self.seed;
8244 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8245 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8246 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8247 let company_code = self
8248 .config
8249 .companies
8250 .first()
8251 .map(|c| c.code.as_str())
8252 .unwrap_or("1000");
8253
8254 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8255
8256 if self.config.sales_quotes.enabled {
8258 let customer_data: Vec<(String, String)> = self
8259 .master_data
8260 .customers
8261 .iter()
8262 .map(|c| (c.customer_id.clone(), c.name.clone()))
8263 .collect();
8264 let material_data: Vec<(String, String)> = self
8265 .master_data
8266 .materials
8267 .iter()
8268 .map(|m| (m.material_id.clone(), m.description.clone()))
8269 .collect();
8270
8271 if !customer_data.is_empty() && !material_data.is_empty() {
8272 let employee_ids: Vec<String> = self
8273 .master_data
8274 .employees
8275 .iter()
8276 .map(|e| e.employee_id.clone())
8277 .collect();
8278 let customer_ids: Vec<String> = self
8279 .master_data
8280 .customers
8281 .iter()
8282 .map(|c| c.customer_id.clone())
8283 .collect();
8284 let company_currency = self
8285 .config
8286 .companies
8287 .first()
8288 .map(|c| c.currency.as_str())
8289 .unwrap_or("USD");
8290
8291 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8292 .with_pools(employee_ids, customer_ids);
8293 let quotes = quote_gen.generate_with_currency(
8294 company_code,
8295 &customer_data,
8296 &material_data,
8297 start_date,
8298 end_date,
8299 &self.config.sales_quotes,
8300 company_currency,
8301 );
8302 snapshot.sales_quote_count = quotes.len();
8303 snapshot.sales_quotes = quotes;
8304 }
8305 }
8306
8307 if self.config.financial_reporting.management_kpis.enabled {
8309 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8310 let mut kpis = kpi_gen.generate(
8311 company_code,
8312 start_date,
8313 end_date,
8314 &self.config.financial_reporting.management_kpis,
8315 );
8316
8317 {
8319 use rust_decimal::Decimal;
8320
8321 if let Some(income_stmt) =
8322 financial_reporting.financial_statements.iter().find(|fs| {
8323 fs.statement_type == StatementType::IncomeStatement
8324 && fs.company_code == company_code
8325 })
8326 {
8327 let total_revenue: Decimal = income_stmt
8329 .line_items
8330 .iter()
8331 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8332 .map(|li| li.amount)
8333 .sum();
8334 let total_cogs: Decimal = income_stmt
8335 .line_items
8336 .iter()
8337 .filter(|li| {
8338 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8339 && !li.is_total
8340 })
8341 .map(|li| li.amount.abs())
8342 .sum();
8343 let total_opex: Decimal = income_stmt
8344 .line_items
8345 .iter()
8346 .filter(|li| {
8347 li.section.contains("Expense")
8348 && !li.is_total
8349 && !li.section.contains("Cost")
8350 })
8351 .map(|li| li.amount.abs())
8352 .sum();
8353
8354 if total_revenue > Decimal::ZERO {
8355 let hundred = Decimal::from(100);
8356 let gross_margin_pct =
8357 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8358 let operating_income = total_revenue - total_cogs - total_opex;
8359 let op_margin_pct =
8360 (operating_income * hundred / total_revenue).round_dp(2);
8361
8362 for kpi in &mut kpis {
8364 if kpi.name == "Gross Margin" {
8365 kpi.value = gross_margin_pct;
8366 } else if kpi.name == "Operating Margin" {
8367 kpi.value = op_margin_pct;
8368 }
8369 }
8370 }
8371 }
8372
8373 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8375 fs.statement_type == StatementType::BalanceSheet
8376 && fs.company_code == company_code
8377 }) {
8378 let current_assets: Decimal = bs
8379 .line_items
8380 .iter()
8381 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8382 .map(|li| li.amount)
8383 .sum();
8384 let current_liabilities: Decimal = bs
8385 .line_items
8386 .iter()
8387 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8388 .map(|li| li.amount.abs())
8389 .sum();
8390
8391 if current_liabilities > Decimal::ZERO {
8392 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8393 for kpi in &mut kpis {
8394 if kpi.name == "Current Ratio" {
8395 kpi.value = current_ratio;
8396 }
8397 }
8398 }
8399 }
8400 }
8401
8402 snapshot.kpi_count = kpis.len();
8403 snapshot.kpis = kpis;
8404 }
8405
8406 if self.config.financial_reporting.budgets.enabled {
8408 let account_data: Vec<(String, String)> = coa
8409 .accounts
8410 .iter()
8411 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8412 .collect();
8413
8414 if !account_data.is_empty() {
8415 let fiscal_year = start_date.year() as u32;
8416 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8417 let budget = budget_gen.generate(
8418 company_code,
8419 fiscal_year,
8420 &account_data,
8421 &self.config.financial_reporting.budgets,
8422 );
8423 snapshot.budget_line_count = budget.line_items.len();
8424 snapshot.budgets.push(budget);
8425 }
8426 }
8427
8428 stats.sales_quote_count = snapshot.sales_quote_count;
8429 stats.kpi_count = snapshot.kpi_count;
8430 stats.budget_line_count = snapshot.budget_line_count;
8431
8432 info!(
8433 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8434 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8435 );
8436 self.check_resources_with_log("post-sales-kpi-budgets")?;
8437
8438 Ok(snapshot)
8439 }
8440
8441 fn compute_pre_tax_income(
8448 company_code: &str,
8449 journal_entries: &[JournalEntry],
8450 ) -> rust_decimal::Decimal {
8451 use datasynth_core::accounts::AccountCategory;
8452 use rust_decimal::Decimal;
8453
8454 let mut total_revenue = Decimal::ZERO;
8455 let mut total_expenses = Decimal::ZERO;
8456
8457 for je in journal_entries {
8458 if je.header.company_code != company_code {
8459 continue;
8460 }
8461 for line in &je.lines {
8462 let cat = AccountCategory::from_account(&line.gl_account);
8463 match cat {
8464 AccountCategory::Revenue => {
8465 total_revenue += line.credit_amount - line.debit_amount;
8466 }
8467 AccountCategory::Cogs
8468 | AccountCategory::OperatingExpense
8469 | AccountCategory::OtherIncomeExpense => {
8470 total_expenses += line.debit_amount - line.credit_amount;
8471 }
8472 _ => {}
8473 }
8474 }
8475 }
8476
8477 let pti = (total_revenue - total_expenses).round_dp(2);
8478 if pti == rust_decimal::Decimal::ZERO {
8479 rust_decimal::Decimal::from(1_000_000u32)
8482 } else {
8483 pti
8484 }
8485 }
8486
8487 fn phase_tax_generation(
8489 &mut self,
8490 document_flows: &DocumentFlowSnapshot,
8491 journal_entries: &[JournalEntry],
8492 stats: &mut EnhancedGenerationStatistics,
8493 ) -> SynthResult<TaxSnapshot> {
8494 if !self.phase_config.generate_tax {
8495 debug!("Phase 20: Skipped (tax generation disabled)");
8496 return Ok(TaxSnapshot::default());
8497 }
8498 info!("Phase 20: Generating Tax Data");
8499
8500 let seed = self.seed;
8501 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8502 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8503 let fiscal_year = start_date.year();
8504 let company_code = self
8505 .config
8506 .companies
8507 .first()
8508 .map(|c| c.code.as_str())
8509 .unwrap_or("1000");
8510
8511 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8512 seed + 370,
8513 self.config.tax.clone(),
8514 );
8515
8516 let pack = self.primary_pack().clone();
8517 let (jurisdictions, codes) =
8518 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8519
8520 let mut provisions = Vec::new();
8522 if self.config.tax.provisions.enabled {
8523 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8524 for company in &self.config.companies {
8525 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8526 let statutory_rate = rust_decimal::Decimal::new(
8527 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8528 2,
8529 );
8530 let provision = provision_gen.generate(
8531 &company.code,
8532 start_date,
8533 pre_tax_income,
8534 statutory_rate,
8535 );
8536 provisions.push(provision);
8537 }
8538 }
8539
8540 let mut tax_lines = Vec::new();
8542 if !codes.is_empty() {
8543 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8544 datasynth_generators::TaxLineGeneratorConfig::default(),
8545 codes.clone(),
8546 seed + 372,
8547 );
8548
8549 let buyer_country = self
8552 .config
8553 .companies
8554 .first()
8555 .map(|c| c.country.as_str())
8556 .unwrap_or("US");
8557 for vi in &document_flows.vendor_invoices {
8558 let lines = tax_line_gen.generate_for_document(
8559 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8560 &vi.header.document_id,
8561 buyer_country, buyer_country,
8563 vi.payable_amount,
8564 vi.header.document_date,
8565 None,
8566 );
8567 tax_lines.extend(lines);
8568 }
8569
8570 for ci in &document_flows.customer_invoices {
8572 let lines = tax_line_gen.generate_for_document(
8573 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8574 &ci.header.document_id,
8575 buyer_country, buyer_country,
8577 ci.total_gross_amount,
8578 ci.header.document_date,
8579 None,
8580 );
8581 tax_lines.extend(lines);
8582 }
8583 }
8584
8585 let deferred_tax = {
8587 let companies: Vec<(&str, &str)> = self
8588 .config
8589 .companies
8590 .iter()
8591 .map(|c| (c.code.as_str(), c.country.as_str()))
8592 .collect();
8593 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8594 deferred_gen.generate(&companies, start_date, journal_entries)
8595 };
8596
8597 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8600 std::collections::HashMap::new();
8601 for vi in &document_flows.vendor_invoices {
8602 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8603 }
8604 for ci in &document_flows.customer_invoices {
8605 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8606 }
8607
8608 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8611 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8612 &tax_lines,
8613 company_code,
8614 &doc_dates,
8615 end_date,
8616 );
8617 debug!("Generated {} tax posting JEs", jes.len());
8618 jes
8619 } else {
8620 Vec::new()
8621 };
8622
8623 let snapshot = TaxSnapshot {
8624 jurisdiction_count: jurisdictions.len(),
8625 code_count: codes.len(),
8626 jurisdictions,
8627 codes,
8628 tax_provisions: provisions,
8629 tax_lines,
8630 tax_returns: Vec::new(),
8631 withholding_records: Vec::new(),
8632 tax_anomaly_labels: Vec::new(),
8633 deferred_tax,
8634 tax_posting_journal_entries,
8635 };
8636
8637 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8638 stats.tax_code_count = snapshot.code_count;
8639 stats.tax_provision_count = snapshot.tax_provisions.len();
8640 stats.tax_line_count = snapshot.tax_lines.len();
8641
8642 info!(
8643 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8644 snapshot.jurisdiction_count,
8645 snapshot.code_count,
8646 snapshot.tax_provisions.len(),
8647 snapshot.deferred_tax.temporary_differences.len(),
8648 snapshot.deferred_tax.journal_entries.len(),
8649 snapshot.tax_posting_journal_entries.len(),
8650 );
8651 self.check_resources_with_log("post-tax")?;
8652
8653 Ok(snapshot)
8654 }
8655
8656 fn phase_esg_generation(
8658 &mut self,
8659 document_flows: &DocumentFlowSnapshot,
8660 manufacturing: &ManufacturingSnapshot,
8661 stats: &mut EnhancedGenerationStatistics,
8662 ) -> SynthResult<EsgSnapshot> {
8663 if !self.phase_config.generate_esg {
8664 debug!("Phase 21: Skipped (ESG generation disabled)");
8665 return Ok(EsgSnapshot::default());
8666 }
8667 let degradation = self.check_resources()?;
8668 if degradation >= DegradationLevel::Reduced {
8669 debug!(
8670 "Phase skipped due to resource pressure (degradation: {:?})",
8671 degradation
8672 );
8673 return Ok(EsgSnapshot::default());
8674 }
8675 info!("Phase 21: Generating ESG Data");
8676
8677 let seed = self.seed;
8678 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8679 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8680 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8681 let entity_id = self
8682 .config
8683 .companies
8684 .first()
8685 .map(|c| c.code.as_str())
8686 .unwrap_or("1000");
8687
8688 let esg_cfg = &self.config.esg;
8689 let mut snapshot = EsgSnapshot::default();
8690
8691 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8693 esg_cfg.environmental.energy.clone(),
8694 seed + 80,
8695 );
8696 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8697
8698 let facility_count = esg_cfg.environmental.energy.facility_count;
8700 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8701 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8702
8703 let mut waste_gen = datasynth_generators::WasteGenerator::new(
8705 seed + 82,
8706 esg_cfg.environmental.waste.diversion_target,
8707 facility_count,
8708 );
8709 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8710
8711 let mut emission_gen =
8713 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8714
8715 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8717 .iter()
8718 .map(|e| datasynth_generators::EnergyInput {
8719 facility_id: e.facility_id.clone(),
8720 energy_type: match e.energy_source {
8721 EnergySourceType::NaturalGas => {
8722 datasynth_generators::EnergyInputType::NaturalGas
8723 }
8724 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8725 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8726 _ => datasynth_generators::EnergyInputType::Electricity,
8727 },
8728 consumption_kwh: e.consumption_kwh,
8729 period: e.period,
8730 })
8731 .collect();
8732
8733 if !manufacturing.production_orders.is_empty() {
8735 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8736 &manufacturing.production_orders,
8737 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
8740 if !mfg_energy.is_empty() {
8741 info!(
8742 "ESG: {} energy inputs derived from {} production orders",
8743 mfg_energy.len(),
8744 manufacturing.production_orders.len(),
8745 );
8746 energy_inputs.extend(mfg_energy);
8747 }
8748 }
8749
8750 let mut emissions = Vec::new();
8751 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8752 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8753
8754 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8756 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8757 for payment in &document_flows.payments {
8758 if payment.is_vendor {
8759 *totals
8760 .entry(payment.business_partner_id.clone())
8761 .or_default() += payment.amount;
8762 }
8763 }
8764 totals
8765 };
8766 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8767 .master_data
8768 .vendors
8769 .iter()
8770 .map(|v| {
8771 let spend = vendor_payment_totals
8772 .get(&v.vendor_id)
8773 .copied()
8774 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8775 datasynth_generators::VendorSpendInput {
8776 vendor_id: v.vendor_id.clone(),
8777 category: format!("{:?}", v.vendor_type).to_lowercase(),
8778 spend,
8779 country: v.country.clone(),
8780 }
8781 })
8782 .collect();
8783 if !vendor_spend.is_empty() {
8784 emissions.extend(emission_gen.generate_scope3_purchased_goods(
8785 entity_id,
8786 &vendor_spend,
8787 start_date,
8788 end_date,
8789 ));
8790 }
8791
8792 let headcount = self.master_data.employees.len() as u32;
8794 if headcount > 0 {
8795 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8796 emissions.extend(emission_gen.generate_scope3_business_travel(
8797 entity_id,
8798 travel_spend,
8799 start_date,
8800 ));
8801 emissions
8802 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8803 }
8804
8805 snapshot.emission_count = emissions.len();
8806 snapshot.emissions = emissions;
8807 snapshot.energy = energy_records;
8808
8809 let mut workforce_gen =
8811 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8812 let total_headcount = headcount.max(100);
8813 snapshot.diversity =
8814 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8815 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8816
8817 if !self.master_data.employees.is_empty() {
8819 let hr_diversity = workforce_gen.generate_diversity_from_employees(
8820 entity_id,
8821 &self.master_data.employees,
8822 end_date,
8823 );
8824 if !hr_diversity.is_empty() {
8825 info!(
8826 "ESG: {} diversity metrics derived from {} actual employees",
8827 hr_diversity.len(),
8828 self.master_data.employees.len(),
8829 );
8830 snapshot.diversity.extend(hr_diversity);
8831 }
8832 }
8833
8834 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8835 entity_id,
8836 facility_count,
8837 start_date,
8838 end_date,
8839 );
8840
8841 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
8844 entity_id,
8845 &snapshot.safety_incidents,
8846 total_hours,
8847 start_date,
8848 );
8849 snapshot.safety_metrics = vec![safety_metric];
8850
8851 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8853 seed + 85,
8854 esg_cfg.governance.board_size,
8855 esg_cfg.governance.independence_target,
8856 );
8857 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8858
8859 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8861 esg_cfg.supply_chain_esg.clone(),
8862 seed + 86,
8863 );
8864 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8865 .master_data
8866 .vendors
8867 .iter()
8868 .map(|v| datasynth_generators::VendorInput {
8869 vendor_id: v.vendor_id.clone(),
8870 country: v.country.clone(),
8871 industry: format!("{:?}", v.vendor_type).to_lowercase(),
8872 quality_score: None,
8873 })
8874 .collect();
8875 snapshot.supplier_assessments =
8876 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8877
8878 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8880 seed + 87,
8881 esg_cfg.reporting.clone(),
8882 esg_cfg.climate_scenarios.clone(),
8883 );
8884 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8885 snapshot.disclosures = disclosure_gen.generate_disclosures(
8886 entity_id,
8887 &snapshot.materiality,
8888 start_date,
8889 end_date,
8890 );
8891 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8892 snapshot.disclosure_count = snapshot.disclosures.len();
8893
8894 if esg_cfg.anomaly_rate > 0.0 {
8896 let mut anomaly_injector =
8897 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8898 let mut labels = Vec::new();
8899 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8900 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8901 labels.extend(
8902 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8903 );
8904 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8905 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8906 snapshot.anomaly_labels = labels;
8907 }
8908
8909 stats.esg_emission_count = snapshot.emission_count;
8910 stats.esg_disclosure_count = snapshot.disclosure_count;
8911
8912 info!(
8913 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8914 snapshot.emission_count,
8915 snapshot.disclosure_count,
8916 snapshot.supplier_assessments.len()
8917 );
8918 self.check_resources_with_log("post-esg")?;
8919
8920 Ok(snapshot)
8921 }
8922
8923 fn phase_treasury_data(
8925 &mut self,
8926 document_flows: &DocumentFlowSnapshot,
8927 subledger: &SubledgerSnapshot,
8928 intercompany: &IntercompanySnapshot,
8929 stats: &mut EnhancedGenerationStatistics,
8930 ) -> SynthResult<TreasurySnapshot> {
8931 if !self.phase_config.generate_treasury {
8932 debug!("Phase 22: Skipped (treasury generation disabled)");
8933 return Ok(TreasurySnapshot::default());
8934 }
8935 let degradation = self.check_resources()?;
8936 if degradation >= DegradationLevel::Reduced {
8937 debug!(
8938 "Phase skipped due to resource pressure (degradation: {:?})",
8939 degradation
8940 );
8941 return Ok(TreasurySnapshot::default());
8942 }
8943 info!("Phase 22: Generating Treasury Data");
8944
8945 let seed = self.seed;
8946 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8947 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8948 let currency = self
8949 .config
8950 .companies
8951 .first()
8952 .map(|c| c.currency.as_str())
8953 .unwrap_or("USD");
8954 let entity_id = self
8955 .config
8956 .companies
8957 .first()
8958 .map(|c| c.code.as_str())
8959 .unwrap_or("1000");
8960
8961 let mut snapshot = TreasurySnapshot::default();
8962
8963 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8965 self.config.treasury.debt.clone(),
8966 seed + 90,
8967 );
8968 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8969
8970 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8972 self.config.treasury.hedging.clone(),
8973 seed + 91,
8974 );
8975 for debt in &snapshot.debt_instruments {
8976 if debt.rate_type == InterestRateType::Variable {
8977 let swap = hedge_gen.generate_ir_swap(
8978 currency,
8979 debt.principal,
8980 debt.origination_date,
8981 debt.maturity_date,
8982 );
8983 snapshot.hedging_instruments.push(swap);
8984 }
8985 }
8986
8987 {
8990 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8991 for payment in &document_flows.payments {
8992 if payment.currency != currency {
8993 let entry = fx_map
8994 .entry(payment.currency.clone())
8995 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8996 entry.0 += payment.amount;
8997 if payment.header.document_date > entry.1 {
8999 entry.1 = payment.header.document_date;
9000 }
9001 }
9002 }
9003 if !fx_map.is_empty() {
9004 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9005 .into_iter()
9006 .map(|(foreign_ccy, (net_amount, settlement_date))| {
9007 datasynth_generators::treasury::FxExposure {
9008 currency_pair: format!("{foreign_ccy}/{currency}"),
9009 foreign_currency: foreign_ccy,
9010 net_amount,
9011 settlement_date,
9012 description: "AP payment FX exposure".to_string(),
9013 }
9014 })
9015 .collect();
9016 let (fx_instruments, fx_relationships) =
9017 hedge_gen.generate(start_date, &fx_exposures);
9018 snapshot.hedging_instruments.extend(fx_instruments);
9019 snapshot.hedge_relationships.extend(fx_relationships);
9020 }
9021 }
9022
9023 if self.config.treasury.anomaly_rate > 0.0 {
9025 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9026 seed + 92,
9027 self.config.treasury.anomaly_rate,
9028 );
9029 let mut labels = Vec::new();
9030 labels.extend(
9031 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9032 );
9033 snapshot.treasury_anomaly_labels = labels;
9034 }
9035
9036 if self.config.treasury.cash_positioning.enabled {
9038 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9039
9040 for payment in &document_flows.payments {
9042 cash_flows.push(datasynth_generators::treasury::CashFlow {
9043 date: payment.header.document_date,
9044 account_id: format!("{entity_id}-MAIN"),
9045 amount: payment.amount,
9046 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9047 });
9048 }
9049
9050 for chain in &document_flows.o2c_chains {
9052 if let Some(ref receipt) = chain.customer_receipt {
9053 cash_flows.push(datasynth_generators::treasury::CashFlow {
9054 date: receipt.header.document_date,
9055 account_id: format!("{entity_id}-MAIN"),
9056 amount: receipt.amount,
9057 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9058 });
9059 }
9060 for receipt in &chain.remainder_receipts {
9062 cash_flows.push(datasynth_generators::treasury::CashFlow {
9063 date: receipt.header.document_date,
9064 account_id: format!("{entity_id}-MAIN"),
9065 amount: receipt.amount,
9066 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9067 });
9068 }
9069 }
9070
9071 if !cash_flows.is_empty() {
9072 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9073 self.config.treasury.cash_positioning.clone(),
9074 seed + 93,
9075 );
9076 let account_id = format!("{entity_id}-MAIN");
9077 snapshot.cash_positions = cash_gen.generate(
9078 entity_id,
9079 &account_id,
9080 currency,
9081 &cash_flows,
9082 start_date,
9083 start_date + chrono::Months::new(self.config.global.period_months),
9084 rust_decimal::Decimal::new(1_000_000, 0), );
9086 }
9087 }
9088
9089 if self.config.treasury.cash_forecasting.enabled {
9091 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9092
9093 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9095 .ar_invoices
9096 .iter()
9097 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9098 .map(|inv| {
9099 let days_past_due = if inv.due_date < end_date {
9100 (end_date - inv.due_date).num_days().max(0) as u32
9101 } else {
9102 0
9103 };
9104 datasynth_generators::treasury::ArAgingItem {
9105 expected_date: inv.due_date,
9106 amount: inv.amount_remaining,
9107 days_past_due,
9108 document_id: inv.invoice_number.clone(),
9109 }
9110 })
9111 .collect();
9112
9113 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9115 .ap_invoices
9116 .iter()
9117 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9118 .map(|inv| datasynth_generators::treasury::ApAgingItem {
9119 payment_date: inv.due_date,
9120 amount: inv.amount_remaining,
9121 document_id: inv.invoice_number.clone(),
9122 })
9123 .collect();
9124
9125 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9126 self.config.treasury.cash_forecasting.clone(),
9127 seed + 94,
9128 );
9129 let forecast = forecast_gen.generate(
9130 entity_id,
9131 currency,
9132 end_date,
9133 &ar_items,
9134 &ap_items,
9135 &[], );
9137 snapshot.cash_forecasts.push(forecast);
9138 }
9139
9140 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9142 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9143 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9144 self.config.treasury.cash_pooling.clone(),
9145 seed + 95,
9146 );
9147
9148 let account_ids: Vec<String> = snapshot
9150 .cash_positions
9151 .iter()
9152 .map(|cp| cp.bank_account_id.clone())
9153 .collect::<std::collections::HashSet<_>>()
9154 .into_iter()
9155 .collect();
9156
9157 if let Some(pool) =
9158 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9159 {
9160 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9162 for cp in &snapshot.cash_positions {
9163 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9164 }
9165
9166 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9167 latest_balances
9168 .into_iter()
9169 .filter(|(id, _)| pool.participant_accounts.contains(id))
9170 .map(
9171 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9172 account_id: id,
9173 balance,
9174 },
9175 )
9176 .collect();
9177
9178 let sweeps =
9179 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9180 snapshot.cash_pool_sweeps = sweeps;
9181 snapshot.cash_pools.push(pool);
9182 }
9183 }
9184
9185 if self.config.treasury.bank_guarantees.enabled {
9187 let vendor_names: Vec<String> = self
9188 .master_data
9189 .vendors
9190 .iter()
9191 .map(|v| v.name.clone())
9192 .collect();
9193 if !vendor_names.is_empty() {
9194 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9195 self.config.treasury.bank_guarantees.clone(),
9196 seed + 96,
9197 );
9198 snapshot.bank_guarantees =
9199 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9200 }
9201 }
9202
9203 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9205 let entity_ids: Vec<String> = self
9206 .config
9207 .companies
9208 .iter()
9209 .map(|c| c.code.clone())
9210 .collect();
9211 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9212 .matched_pairs
9213 .iter()
9214 .map(|mp| {
9215 (
9216 mp.seller_company.clone(),
9217 mp.buyer_company.clone(),
9218 mp.amount,
9219 )
9220 })
9221 .collect();
9222 if entity_ids.len() >= 2 {
9223 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9224 self.config.treasury.netting.clone(),
9225 seed + 97,
9226 );
9227 snapshot.netting_runs = netting_gen.generate(
9228 &entity_ids,
9229 currency,
9230 start_date,
9231 self.config.global.period_months,
9232 &ic_amounts,
9233 );
9234 }
9235 }
9236
9237 {
9239 use datasynth_generators::treasury::TreasuryAccounting;
9240
9241 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9242 let mut treasury_jes = Vec::new();
9243
9244 if !snapshot.debt_instruments.is_empty() {
9246 let debt_jes =
9247 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9248 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9249 treasury_jes.extend(debt_jes);
9250 }
9251
9252 if !snapshot.hedging_instruments.is_empty() {
9254 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9255 &snapshot.hedging_instruments,
9256 &snapshot.hedge_relationships,
9257 end_date,
9258 entity_id,
9259 );
9260 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9261 treasury_jes.extend(hedge_jes);
9262 }
9263
9264 if !snapshot.cash_pool_sweeps.is_empty() {
9266 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9267 &snapshot.cash_pool_sweeps,
9268 entity_id,
9269 );
9270 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9271 treasury_jes.extend(sweep_jes);
9272 }
9273
9274 if !treasury_jes.is_empty() {
9275 debug!("Total treasury journal entries: {}", treasury_jes.len());
9276 }
9277 snapshot.journal_entries = treasury_jes;
9278 }
9279
9280 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9281 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9282 stats.cash_position_count = snapshot.cash_positions.len();
9283 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9284 stats.cash_pool_count = snapshot.cash_pools.len();
9285
9286 info!(
9287 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9288 snapshot.debt_instruments.len(),
9289 snapshot.hedging_instruments.len(),
9290 snapshot.cash_positions.len(),
9291 snapshot.cash_forecasts.len(),
9292 snapshot.cash_pools.len(),
9293 snapshot.bank_guarantees.len(),
9294 snapshot.netting_runs.len(),
9295 snapshot.journal_entries.len(),
9296 );
9297 self.check_resources_with_log("post-treasury")?;
9298
9299 Ok(snapshot)
9300 }
9301
9302 fn phase_project_accounting(
9304 &mut self,
9305 document_flows: &DocumentFlowSnapshot,
9306 hr: &HrSnapshot,
9307 stats: &mut EnhancedGenerationStatistics,
9308 ) -> SynthResult<ProjectAccountingSnapshot> {
9309 if !self.phase_config.generate_project_accounting {
9310 debug!("Phase 23: Skipped (project accounting disabled)");
9311 return Ok(ProjectAccountingSnapshot::default());
9312 }
9313 let degradation = self.check_resources()?;
9314 if degradation >= DegradationLevel::Reduced {
9315 debug!(
9316 "Phase skipped due to resource pressure (degradation: {:?})",
9317 degradation
9318 );
9319 return Ok(ProjectAccountingSnapshot::default());
9320 }
9321 info!("Phase 23: Generating Project Accounting Data");
9322
9323 let seed = self.seed;
9324 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9325 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9326 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9327 let company_code = self
9328 .config
9329 .companies
9330 .first()
9331 .map(|c| c.code.as_str())
9332 .unwrap_or("1000");
9333
9334 let mut snapshot = ProjectAccountingSnapshot::default();
9335
9336 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9338 self.config.project_accounting.clone(),
9339 seed + 95,
9340 );
9341 let pool = project_gen.generate(company_code, start_date, end_date);
9342 snapshot.projects = pool.projects.clone();
9343
9344 {
9346 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9347 Vec::new();
9348
9349 for te in &hr.time_entries {
9351 let total_hours = te.hours_regular + te.hours_overtime;
9352 if total_hours > 0.0 {
9353 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9354 id: te.entry_id.clone(),
9355 entity_id: company_code.to_string(),
9356 date: te.date,
9357 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9358 .unwrap_or(rust_decimal::Decimal::ZERO),
9359 source_type: CostSourceType::TimeEntry,
9360 hours: Some(
9361 rust_decimal::Decimal::from_f64_retain(total_hours)
9362 .unwrap_or(rust_decimal::Decimal::ZERO),
9363 ),
9364 });
9365 }
9366 }
9367
9368 for er in &hr.expense_reports {
9370 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9371 id: er.report_id.clone(),
9372 entity_id: company_code.to_string(),
9373 date: er.submission_date,
9374 amount: er.total_amount,
9375 source_type: CostSourceType::ExpenseReport,
9376 hours: None,
9377 });
9378 }
9379
9380 for po in &document_flows.purchase_orders {
9382 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9383 id: po.header.document_id.clone(),
9384 entity_id: company_code.to_string(),
9385 date: po.header.document_date,
9386 amount: po.total_net_amount,
9387 source_type: CostSourceType::PurchaseOrder,
9388 hours: None,
9389 });
9390 }
9391
9392 for vi in &document_flows.vendor_invoices {
9394 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9395 id: vi.header.document_id.clone(),
9396 entity_id: company_code.to_string(),
9397 date: vi.header.document_date,
9398 amount: vi.payable_amount,
9399 source_type: CostSourceType::VendorInvoice,
9400 hours: None,
9401 });
9402 }
9403
9404 if !source_docs.is_empty() && !pool.projects.is_empty() {
9405 let mut cost_gen =
9406 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9407 self.config.project_accounting.cost_allocation.clone(),
9408 seed + 99,
9409 );
9410 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9411 }
9412 }
9413
9414 if self.config.project_accounting.change_orders.enabled {
9416 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9417 self.config.project_accounting.change_orders.clone(),
9418 seed + 96,
9419 );
9420 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9421 }
9422
9423 if self.config.project_accounting.milestones.enabled {
9425 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9426 self.config.project_accounting.milestones.clone(),
9427 seed + 97,
9428 );
9429 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9430 }
9431
9432 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9434 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9435 self.config.project_accounting.earned_value.clone(),
9436 seed + 98,
9437 );
9438 snapshot.earned_value_metrics =
9439 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9440 }
9441
9442 if self.config.project_accounting.revenue_recognition.enabled
9444 && !snapshot.projects.is_empty()
9445 && !snapshot.cost_lines.is_empty()
9446 {
9447 use datasynth_generators::project_accounting::RevenueGenerator;
9448 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9449 let avg_contract_value =
9450 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9451 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9452
9453 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9456 snapshot
9457 .projects
9458 .iter()
9459 .filter(|p| {
9460 matches!(
9461 p.project_type,
9462 datasynth_core::models::ProjectType::Customer
9463 )
9464 })
9465 .map(|p| {
9466 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9467 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9468 } else {
9470 avg_contract_value
9471 };
9472 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9474 })
9475 .collect();
9476
9477 if !contract_values.is_empty() {
9478 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9479 snapshot.revenue_records = rev_gen.generate(
9480 &snapshot.projects,
9481 &snapshot.cost_lines,
9482 &contract_values,
9483 start_date,
9484 end_date,
9485 );
9486 debug!(
9487 "Generated {} revenue recognition records for {} customer projects",
9488 snapshot.revenue_records.len(),
9489 contract_values.len()
9490 );
9491 }
9492 }
9493
9494 stats.project_count = snapshot.projects.len();
9495 stats.project_change_order_count = snapshot.change_orders.len();
9496 stats.project_cost_line_count = snapshot.cost_lines.len();
9497
9498 info!(
9499 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9500 snapshot.projects.len(),
9501 snapshot.change_orders.len(),
9502 snapshot.milestones.len(),
9503 snapshot.earned_value_metrics.len()
9504 );
9505 self.check_resources_with_log("post-project-accounting")?;
9506
9507 Ok(snapshot)
9508 }
9509
9510 fn phase_evolution_events(
9512 &mut self,
9513 stats: &mut EnhancedGenerationStatistics,
9514 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9515 if !self.phase_config.generate_evolution_events {
9516 debug!("Phase 24: Skipped (evolution events disabled)");
9517 return Ok((Vec::new(), Vec::new()));
9518 }
9519 info!("Phase 24: Generating Process Evolution + Organizational Events");
9520
9521 let seed = self.seed;
9522 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9523 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9524 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9525
9526 let mut proc_gen =
9528 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9529 seed + 100,
9530 );
9531 let process_events = proc_gen.generate_events(start_date, end_date);
9532
9533 let company_codes: Vec<String> = self
9535 .config
9536 .companies
9537 .iter()
9538 .map(|c| c.code.clone())
9539 .collect();
9540 let mut org_gen =
9541 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9542 seed + 101,
9543 );
9544 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9545
9546 stats.process_evolution_event_count = process_events.len();
9547 stats.organizational_event_count = org_events.len();
9548
9549 info!(
9550 "Evolution events generated: {} process evolution, {} organizational",
9551 process_events.len(),
9552 org_events.len()
9553 );
9554 self.check_resources_with_log("post-evolution-events")?;
9555
9556 Ok((process_events, org_events))
9557 }
9558
9559 fn phase_disruption_events(
9562 &self,
9563 stats: &mut EnhancedGenerationStatistics,
9564 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9565 if !self.config.organizational_events.enabled {
9566 debug!("Phase 24b: Skipped (organizational events disabled)");
9567 return Ok(Vec::new());
9568 }
9569 info!("Phase 24b: Generating Disruption Events");
9570
9571 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9572 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9573 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9574
9575 let company_codes: Vec<String> = self
9576 .config
9577 .companies
9578 .iter()
9579 .map(|c| c.code.clone())
9580 .collect();
9581
9582 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9583 let events = gen.generate(start_date, end_date, &company_codes);
9584
9585 stats.disruption_event_count = events.len();
9586 info!("Disruption events generated: {} events", events.len());
9587 self.check_resources_with_log("post-disruption-events")?;
9588
9589 Ok(events)
9590 }
9591
9592 fn phase_counterfactuals(
9599 &self,
9600 journal_entries: &[JournalEntry],
9601 stats: &mut EnhancedGenerationStatistics,
9602 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9603 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9604 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9605 return Ok(Vec::new());
9606 }
9607 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9608
9609 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9610
9611 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9612
9613 let specs = [
9615 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9616 CounterfactualSpec::ShiftDate { days: -14 },
9617 CounterfactualSpec::SelfApprove,
9618 CounterfactualSpec::SplitTransaction { split_count: 3 },
9619 ];
9620
9621 let pairs: Vec<_> = journal_entries
9622 .iter()
9623 .enumerate()
9624 .map(|(i, je)| {
9625 let spec = &specs[i % specs.len()];
9626 gen.generate(je, spec)
9627 })
9628 .collect();
9629
9630 stats.counterfactual_pair_count = pairs.len();
9631 info!(
9632 "Counterfactual pairs generated: {} pairs from {} journal entries",
9633 pairs.len(),
9634 journal_entries.len()
9635 );
9636 self.check_resources_with_log("post-counterfactuals")?;
9637
9638 Ok(pairs)
9639 }
9640
9641 fn phase_red_flags(
9648 &self,
9649 anomaly_labels: &AnomalyLabels,
9650 document_flows: &DocumentFlowSnapshot,
9651 stats: &mut EnhancedGenerationStatistics,
9652 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9653 if !self.config.fraud.enabled {
9654 debug!("Phase 26: Skipped (fraud generation disabled)");
9655 return Ok(Vec::new());
9656 }
9657 info!("Phase 26: Generating Fraud Red-Flag Indicators");
9658
9659 use datasynth_generators::fraud::RedFlagGenerator;
9660
9661 let generator = RedFlagGenerator::new();
9662 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9663
9664 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9666 .labels
9667 .iter()
9668 .filter(|label| label.anomaly_type.is_intentional())
9669 .map(|label| label.document_id.as_str())
9670 .collect();
9671
9672 let mut flags = Vec::new();
9673
9674 for chain in &document_flows.p2p_chains {
9676 let doc_id = &chain.purchase_order.header.document_id;
9677 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9678 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9679 }
9680
9681 for chain in &document_flows.o2c_chains {
9683 let doc_id = &chain.sales_order.header.document_id;
9684 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9685 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9686 }
9687
9688 stats.red_flag_count = flags.len();
9689 info!(
9690 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9691 flags.len(),
9692 document_flows.p2p_chains.len(),
9693 document_flows.o2c_chains.len(),
9694 fraud_doc_ids.len()
9695 );
9696 self.check_resources_with_log("post-red-flags")?;
9697
9698 Ok(flags)
9699 }
9700
9701 fn phase_collusion_rings(
9707 &mut self,
9708 stats: &mut EnhancedGenerationStatistics,
9709 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9710 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9711 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9712 return Ok(Vec::new());
9713 }
9714 info!("Phase 26b: Generating Collusion Rings");
9715
9716 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9717 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9718 let months = self.config.global.period_months;
9719
9720 let employee_ids: Vec<String> = self
9721 .master_data
9722 .employees
9723 .iter()
9724 .map(|e| e.employee_id.clone())
9725 .collect();
9726 let vendor_ids: Vec<String> = self
9727 .master_data
9728 .vendors
9729 .iter()
9730 .map(|v| v.vendor_id.clone())
9731 .collect();
9732
9733 let mut generator =
9734 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9735 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9736
9737 stats.collusion_ring_count = rings.len();
9738 info!(
9739 "Collusion rings generated: {} rings, total members: {}",
9740 rings.len(),
9741 rings
9742 .iter()
9743 .map(datasynth_generators::fraud::CollusionRing::size)
9744 .sum::<usize>()
9745 );
9746 self.check_resources_with_log("post-collusion-rings")?;
9747
9748 Ok(rings)
9749 }
9750
9751 fn phase_temporal_attributes(
9756 &mut self,
9757 stats: &mut EnhancedGenerationStatistics,
9758 ) -> SynthResult<
9759 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9760 > {
9761 if !self.config.temporal_attributes.enabled {
9762 debug!("Phase 27: Skipped (temporal attributes disabled)");
9763 return Ok(Vec::new());
9764 }
9765 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9766
9767 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9768 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9769
9770 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9774 || self.config.temporal_attributes.enabled;
9775 let temporal_config = {
9776 let ta = &self.config.temporal_attributes;
9777 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9778 .enabled(ta.enabled)
9779 .closed_probability(ta.valid_time.closed_probability)
9780 .avg_validity_days(ta.valid_time.avg_validity_days)
9781 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9782 .with_version_chains(if generate_version_chains {
9783 ta.avg_versions_per_entity
9784 } else {
9785 1.0
9786 })
9787 .build()
9788 };
9789 let temporal_config = if self
9791 .config
9792 .temporal_attributes
9793 .transaction_time
9794 .allow_backdating
9795 {
9796 let mut c = temporal_config;
9797 c.transaction_time.allow_backdating = true;
9798 c.transaction_time.backdating_probability = self
9799 .config
9800 .temporal_attributes
9801 .transaction_time
9802 .backdating_probability;
9803 c.transaction_time.max_backdate_days = self
9804 .config
9805 .temporal_attributes
9806 .transaction_time
9807 .max_backdate_days;
9808 c
9809 } else {
9810 temporal_config
9811 };
9812 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9813 temporal_config,
9814 self.seed + 130,
9815 start_date,
9816 );
9817
9818 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9819 self.seed + 130,
9820 datasynth_core::GeneratorType::Vendor,
9821 );
9822
9823 let chains: Vec<_> = self
9824 .master_data
9825 .vendors
9826 .iter()
9827 .map(|vendor| {
9828 let id = uuid_factory.next();
9829 gen.generate_version_chain(vendor.clone(), id)
9830 })
9831 .collect();
9832
9833 stats.temporal_version_chain_count = chains.len();
9834 info!("Temporal version chains generated: {} chains", chains.len());
9835 self.check_resources_with_log("post-temporal-attributes")?;
9836
9837 Ok(chains)
9838 }
9839
9840 fn phase_entity_relationships(
9850 &self,
9851 journal_entries: &[JournalEntry],
9852 document_flows: &DocumentFlowSnapshot,
9853 stats: &mut EnhancedGenerationStatistics,
9854 ) -> SynthResult<(
9855 Option<datasynth_core::models::EntityGraph>,
9856 Vec<datasynth_core::models::CrossProcessLink>,
9857 )> {
9858 use datasynth_generators::relationships::{
9859 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9860 TransactionSummary,
9861 };
9862
9863 let rs_enabled = self.config.relationship_strength.enabled;
9864 let cpl_enabled = self.config.cross_process_links.enabled
9865 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9866
9867 if !rs_enabled && !cpl_enabled {
9868 debug!(
9869 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9870 );
9871 return Ok((None, Vec::new()));
9872 }
9873
9874 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9875
9876 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9877 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9878
9879 let company_code = self
9880 .config
9881 .companies
9882 .first()
9883 .map(|c| c.code.as_str())
9884 .unwrap_or("1000");
9885
9886 let gen_config = EntityGraphConfig {
9888 enabled: rs_enabled,
9889 cross_process: datasynth_generators::relationships::CrossProcessConfig {
9890 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9891 enable_return_flows: false,
9892 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9893 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9894 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9896 1.0
9897 } else {
9898 0.30
9899 },
9900 ..Default::default()
9901 },
9902 strength_config: datasynth_generators::relationships::StrengthConfig {
9903 transaction_volume_weight: self
9904 .config
9905 .relationship_strength
9906 .calculation
9907 .transaction_volume_weight,
9908 transaction_count_weight: self
9909 .config
9910 .relationship_strength
9911 .calculation
9912 .transaction_count_weight,
9913 duration_weight: self
9914 .config
9915 .relationship_strength
9916 .calculation
9917 .relationship_duration_weight,
9918 recency_weight: self.config.relationship_strength.calculation.recency_weight,
9919 mutual_connections_weight: self
9920 .config
9921 .relationship_strength
9922 .calculation
9923 .mutual_connections_weight,
9924 recency_half_life_days: self
9925 .config
9926 .relationship_strength
9927 .calculation
9928 .recency_half_life_days,
9929 },
9930 ..Default::default()
9931 };
9932
9933 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9934
9935 let entity_graph = if rs_enabled {
9937 let vendor_summaries: Vec<EntitySummary> = self
9939 .master_data
9940 .vendors
9941 .iter()
9942 .map(|v| {
9943 EntitySummary::new(
9944 &v.vendor_id,
9945 &v.name,
9946 datasynth_core::models::GraphEntityType::Vendor,
9947 start_date,
9948 )
9949 })
9950 .collect();
9951
9952 let customer_summaries: Vec<EntitySummary> = self
9953 .master_data
9954 .customers
9955 .iter()
9956 .map(|c| {
9957 EntitySummary::new(
9958 &c.customer_id,
9959 &c.name,
9960 datasynth_core::models::GraphEntityType::Customer,
9961 start_date,
9962 )
9963 })
9964 .collect();
9965
9966 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9971 std::collections::HashMap::new();
9972
9973 for je in journal_entries {
9974 let cc = je.header.company_code.clone();
9975 let posting_date = je.header.posting_date;
9976 for line in &je.lines {
9977 if let Some(ref tp) = line.trading_partner {
9978 let amount = if line.debit_amount > line.credit_amount {
9979 line.debit_amount
9980 } else {
9981 line.credit_amount
9982 };
9983 let entry = txn_summaries
9984 .entry((cc.clone(), tp.clone()))
9985 .or_insert_with(|| TransactionSummary {
9986 total_volume: rust_decimal::Decimal::ZERO,
9987 transaction_count: 0,
9988 first_transaction_date: posting_date,
9989 last_transaction_date: posting_date,
9990 related_entities: std::collections::HashSet::new(),
9991 });
9992 entry.total_volume += amount;
9993 entry.transaction_count += 1;
9994 if posting_date < entry.first_transaction_date {
9995 entry.first_transaction_date = posting_date;
9996 }
9997 if posting_date > entry.last_transaction_date {
9998 entry.last_transaction_date = posting_date;
9999 }
10000 entry.related_entities.insert(cc.clone());
10001 }
10002 }
10003 }
10004
10005 for chain in &document_flows.p2p_chains {
10008 let cc = chain.purchase_order.header.company_code.clone();
10009 let vendor_id = chain.purchase_order.vendor_id.clone();
10010 let po_date = chain.purchase_order.header.document_date;
10011 let amount = chain.purchase_order.total_net_amount;
10012
10013 let entry = txn_summaries
10014 .entry((cc.clone(), vendor_id))
10015 .or_insert_with(|| TransactionSummary {
10016 total_volume: rust_decimal::Decimal::ZERO,
10017 transaction_count: 0,
10018 first_transaction_date: po_date,
10019 last_transaction_date: po_date,
10020 related_entities: std::collections::HashSet::new(),
10021 });
10022 entry.total_volume += amount;
10023 entry.transaction_count += 1;
10024 if po_date < entry.first_transaction_date {
10025 entry.first_transaction_date = po_date;
10026 }
10027 if po_date > entry.last_transaction_date {
10028 entry.last_transaction_date = po_date;
10029 }
10030 entry.related_entities.insert(cc);
10031 }
10032
10033 for chain in &document_flows.o2c_chains {
10035 let cc = chain.sales_order.header.company_code.clone();
10036 let customer_id = chain.sales_order.customer_id.clone();
10037 let so_date = chain.sales_order.header.document_date;
10038 let amount = chain.sales_order.total_net_amount;
10039
10040 let entry = txn_summaries
10041 .entry((cc.clone(), customer_id))
10042 .or_insert_with(|| TransactionSummary {
10043 total_volume: rust_decimal::Decimal::ZERO,
10044 transaction_count: 0,
10045 first_transaction_date: so_date,
10046 last_transaction_date: so_date,
10047 related_entities: std::collections::HashSet::new(),
10048 });
10049 entry.total_volume += amount;
10050 entry.transaction_count += 1;
10051 if so_date < entry.first_transaction_date {
10052 entry.first_transaction_date = so_date;
10053 }
10054 if so_date > entry.last_transaction_date {
10055 entry.last_transaction_date = so_date;
10056 }
10057 entry.related_entities.insert(cc);
10058 }
10059
10060 let as_of_date = journal_entries
10061 .last()
10062 .map(|je| je.header.posting_date)
10063 .unwrap_or(start_date);
10064
10065 let graph = gen.generate_entity_graph(
10066 company_code,
10067 as_of_date,
10068 &vendor_summaries,
10069 &customer_summaries,
10070 &txn_summaries,
10071 );
10072
10073 info!(
10074 "Entity relationship graph: {} nodes, {} edges",
10075 graph.nodes.len(),
10076 graph.edges.len()
10077 );
10078 stats.entity_relationship_node_count = graph.nodes.len();
10079 stats.entity_relationship_edge_count = graph.edges.len();
10080 Some(graph)
10081 } else {
10082 None
10083 };
10084
10085 let cross_process_links = if cpl_enabled {
10087 let gr_refs: Vec<GoodsReceiptRef> = document_flows
10089 .p2p_chains
10090 .iter()
10091 .flat_map(|chain| {
10092 let vendor_id = chain.purchase_order.vendor_id.clone();
10093 let cc = chain.purchase_order.header.company_code.clone();
10094 chain.goods_receipts.iter().flat_map(move |gr| {
10095 gr.items.iter().filter_map({
10096 let doc_id = gr.header.document_id.clone();
10097 let v_id = vendor_id.clone();
10098 let company = cc.clone();
10099 let receipt_date = gr.header.document_date;
10100 move |item| {
10101 item.base
10102 .material_id
10103 .as_ref()
10104 .map(|mat_id| GoodsReceiptRef {
10105 document_id: doc_id.clone(),
10106 material_id: mat_id.clone(),
10107 quantity: item.base.quantity,
10108 receipt_date,
10109 vendor_id: v_id.clone(),
10110 company_code: company.clone(),
10111 })
10112 }
10113 })
10114 })
10115 })
10116 .collect();
10117
10118 let del_refs: Vec<DeliveryRef> = document_flows
10120 .o2c_chains
10121 .iter()
10122 .flat_map(|chain| {
10123 let customer_id = chain.sales_order.customer_id.clone();
10124 let cc = chain.sales_order.header.company_code.clone();
10125 chain.deliveries.iter().flat_map(move |del| {
10126 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10127 del.items.iter().filter_map({
10128 let doc_id = del.header.document_id.clone();
10129 let c_id = customer_id.clone();
10130 let company = cc.clone();
10131 move |item| {
10132 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10133 document_id: doc_id.clone(),
10134 material_id: mat_id.clone(),
10135 quantity: item.base.quantity,
10136 delivery_date,
10137 customer_id: c_id.clone(),
10138 company_code: company.clone(),
10139 })
10140 }
10141 })
10142 })
10143 })
10144 .collect();
10145
10146 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10147 info!("Cross-process links generated: {} links", links.len());
10148 stats.cross_process_link_count = links.len();
10149 links
10150 } else {
10151 Vec::new()
10152 };
10153
10154 self.check_resources_with_log("post-entity-relationships")?;
10155 Ok((entity_graph, cross_process_links))
10156 }
10157
10158 fn phase_industry_data(
10160 &self,
10161 stats: &mut EnhancedGenerationStatistics,
10162 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10163 if !self.config.industry_specific.enabled {
10164 return None;
10165 }
10166 info!("Phase 29: Generating industry-specific data");
10167 let output = datasynth_generators::industry::factory::generate_industry_output(
10168 self.config.global.industry,
10169 );
10170 stats.industry_gl_account_count = output.gl_accounts.len();
10171 info!(
10172 "Industry data generated: {} GL accounts for {:?}",
10173 output.gl_accounts.len(),
10174 self.config.global.industry
10175 );
10176 Some(output)
10177 }
10178
10179 fn phase_opening_balances(
10181 &mut self,
10182 coa: &Arc<ChartOfAccounts>,
10183 stats: &mut EnhancedGenerationStatistics,
10184 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10185 if !self.config.balance.generate_opening_balances {
10186 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10187 return Ok(Vec::new());
10188 }
10189 info!("Phase 3b: Generating Opening Balances");
10190
10191 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10192 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10193 let fiscal_year = start_date.year();
10194
10195 if let Some(ctx) = &self.shard_context {
10206 if !ctx.opening_balances.is_empty() {
10207 debug!(
10208 "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10209 ctx.opening_balances.len()
10210 );
10211 let mut results = Vec::new();
10212 for company in &self.config.companies {
10213 let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10214 .opening_balances
10215 .iter()
10216 .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10217 .collect();
10218 let total_assets = ctx
10219 .opening_balances
10220 .iter()
10221 .filter(|ob| {
10222 matches!(
10223 ob.account_type,
10224 AccountType::Asset | AccountType::ContraAsset
10225 )
10226 })
10227 .map(|ob| ob.net_balance())
10228 .sum::<rust_decimal::Decimal>();
10229 let total_liabilities = ctx
10230 .opening_balances
10231 .iter()
10232 .filter(|ob| {
10233 matches!(
10234 ob.account_type,
10235 AccountType::Liability | AccountType::ContraLiability
10236 )
10237 })
10238 .map(|ob| ob.net_balance())
10239 .sum::<rust_decimal::Decimal>();
10240 let total_equity = ctx
10241 .opening_balances
10242 .iter()
10243 .filter(|ob| {
10244 matches!(
10245 ob.account_type,
10246 AccountType::Equity | AccountType::ContraEquity
10247 )
10248 })
10249 .map(|ob| ob.net_balance())
10250 .sum::<rust_decimal::Decimal>();
10251 let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10252 < rust_decimal::Decimal::ONE;
10253 results.push(GeneratedOpeningBalance {
10254 company_code: company.code.clone(),
10255 as_of_date: start_date,
10256 balances,
10257 total_assets,
10258 total_liabilities,
10259 total_equity,
10260 is_balanced,
10261 calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10262 current_ratio: None,
10263 quick_ratio: None,
10264 debt_to_equity: None,
10265 working_capital: rust_decimal::Decimal::ZERO,
10266 },
10267 });
10268 }
10269 stats.opening_balance_count = results.len();
10270 info!(
10271 "Phase 3b: opening-balance carryover applied ({} companies)",
10272 results.len()
10273 );
10274 self.check_resources_with_log("post-opening-balances")?;
10275 return Ok(results);
10276 }
10277 }
10278
10279 let industry = match self.config.global.industry {
10280 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10281 IndustrySector::Retail => IndustryType::Retail,
10282 IndustrySector::FinancialServices => IndustryType::Financial,
10283 IndustrySector::Healthcare => IndustryType::Healthcare,
10284 IndustrySector::Technology => IndustryType::Technology,
10285 _ => IndustryType::Manufacturing,
10286 };
10287
10288 let config = datasynth_generators::OpeningBalanceConfig {
10289 industry,
10290 ..Default::default()
10291 };
10292 let mut gen =
10293 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10294
10295 let mut results = Vec::new();
10296 for company in &self.config.companies {
10297 let spec = OpeningBalanceSpec::new(
10298 company.code.clone(),
10299 start_date,
10300 fiscal_year,
10301 company.currency.clone(),
10302 rust_decimal::Decimal::new(10_000_000, 0),
10303 industry,
10304 );
10305 let ob = gen.generate(&spec, coa, start_date, &company.code);
10306 results.push(ob);
10307 }
10308
10309 stats.opening_balance_count = results.len();
10310 info!("Opening balances generated: {} companies", results.len());
10311 self.check_resources_with_log("post-opening-balances")?;
10312
10313 Ok(results)
10314 }
10315
10316 fn phase_subledger_reconciliation(
10318 &mut self,
10319 subledger: &SubledgerSnapshot,
10320 entries: &[JournalEntry],
10321 stats: &mut EnhancedGenerationStatistics,
10322 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10323 if !self.config.balance.reconcile_subledgers {
10324 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10325 return Ok(Vec::new());
10326 }
10327 info!("Phase 9b: Reconciling GL to subledger balances");
10328
10329 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10330 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10331 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10332
10333 let tracker_config = BalanceTrackerConfig {
10335 validate_on_each_entry: false,
10336 track_history: false,
10337 fail_on_validation_error: false,
10338 ..Default::default()
10339 };
10340 let recon_currency = self
10341 .config
10342 .companies
10343 .first()
10344 .map(|c| c.currency.clone())
10345 .unwrap_or_else(|| "USD".to_string());
10346 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10347 let validation_errors = tracker.apply_entries(entries);
10348 if !validation_errors.is_empty() {
10349 warn!(
10350 error_count = validation_errors.len(),
10351 "Balance tracker encountered validation errors during subledger reconciliation"
10352 );
10353 for err in &validation_errors {
10354 debug!("Balance validation error: {:?}", err);
10355 }
10356 }
10357
10358 let mut engine = datasynth_generators::ReconciliationEngine::new(
10359 datasynth_generators::ReconciliationConfig::default(),
10360 );
10361
10362 let mut results = Vec::new();
10363 let company_code = self
10364 .config
10365 .companies
10366 .first()
10367 .map(|c| c.code.as_str())
10368 .unwrap_or("1000");
10369
10370 if !subledger.ar_invoices.is_empty() {
10372 let gl_balance = tracker
10373 .get_account_balance(
10374 company_code,
10375 datasynth_core::accounts::control_accounts::AR_CONTROL,
10376 )
10377 .map(|b| b.closing_balance)
10378 .unwrap_or_default();
10379 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10380 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10381 }
10382
10383 if !subledger.ap_invoices.is_empty() {
10385 let gl_balance = tracker
10386 .get_account_balance(
10387 company_code,
10388 datasynth_core::accounts::control_accounts::AP_CONTROL,
10389 )
10390 .map(|b| b.closing_balance)
10391 .unwrap_or_default();
10392 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10393 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10394 }
10395
10396 if !subledger.fa_records.is_empty() {
10398 let gl_asset_balance = tracker
10399 .get_account_balance(
10400 company_code,
10401 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10402 )
10403 .map(|b| b.closing_balance)
10404 .unwrap_or_default();
10405 let gl_accum_depr_balance = tracker
10406 .get_account_balance(
10407 company_code,
10408 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10409 )
10410 .map(|b| b.closing_balance)
10411 .unwrap_or_default();
10412 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10413 subledger.fa_records.iter().collect();
10414 let (asset_recon, depr_recon) = engine.reconcile_fa(
10415 company_code,
10416 end_date,
10417 gl_asset_balance,
10418 gl_accum_depr_balance,
10419 &fa_refs,
10420 );
10421 results.push(asset_recon);
10422 results.push(depr_recon);
10423 }
10424
10425 if !subledger.inventory_positions.is_empty() {
10427 let gl_balance = tracker
10428 .get_account_balance(
10429 company_code,
10430 datasynth_core::accounts::control_accounts::INVENTORY,
10431 )
10432 .map(|b| b.closing_balance)
10433 .unwrap_or_default();
10434 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10435 subledger.inventory_positions.iter().collect();
10436 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10437 }
10438
10439 stats.subledger_reconciliation_count = results.len();
10440 let passed = results.iter().filter(|r| r.is_balanced()).count();
10441 let failed = results.len() - passed;
10442 info!(
10443 "Subledger reconciliation: {} checks, {} passed, {} failed",
10444 results.len(),
10445 passed,
10446 failed
10447 );
10448 self.check_resources_with_log("post-subledger-reconciliation")?;
10449
10450 Ok(results)
10451 }
10452
10453 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10455 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10456
10457 let coa_framework = self.resolve_coa_framework();
10458
10459 let mut gen = ChartOfAccountsGenerator::new(
10460 self.config.chart_of_accounts.complexity,
10461 self.config.global.industry,
10462 self.seed,
10463 )
10464 .with_coa_framework(coa_framework);
10465
10466 let mut built = gen.generate();
10467 if self.config.accounting_standards.enabled {
10471 use datasynth_config::schema::AccountingFrameworkConfig;
10472 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10473 match f {
10474 AccountingFrameworkConfig::UsGaap => "us_gaap",
10475 AccountingFrameworkConfig::Ifrs => "ifrs",
10476 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10477 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10478 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10479 }
10480 .to_string()
10481 });
10482 }
10483 let coa = Arc::new(built);
10484 self.coa = Some(Arc::clone(&coa));
10485
10486 if let Some(pb) = pb {
10487 pb.finish_with_message("Chart of Accounts complete");
10488 }
10489
10490 Ok(coa)
10491 }
10492
10493 fn generate_master_data(&mut self) -> SynthResult<()> {
10495 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10496 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10497 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10498
10499 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
10501
10502 let pack = self.primary_pack().clone();
10504
10505 let vendors_per_company = self.phase_config.vendors_per_company;
10507 let customers_per_company = self.phase_config.customers_per_company;
10508 let materials_per_company = self.phase_config.materials_per_company;
10509 let assets_per_company = self.phase_config.assets_per_company;
10510 let coa_framework = self.resolve_coa_framework();
10511
10512 let per_company_results: Vec<_> = self
10515 .config
10516 .companies
10517 .par_iter()
10518 .enumerate()
10519 .map(|(i, company)| {
10520 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10521 let pack = pack.clone();
10522
10523 let mut vendor_gen = VendorGenerator::new(company_seed);
10525 vendor_gen.set_country_pack(pack.clone());
10526 vendor_gen.set_coa_framework(coa_framework);
10527 vendor_gen.set_counter_offset(i * vendors_per_company);
10528 vendor_gen.set_template_provider(self.template_provider.clone());
10531 if self.config.vendor_network.enabled {
10533 let vn = &self.config.vendor_network;
10534 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10535 enabled: true,
10536 depth: vn.depth,
10537 tier1_count: datasynth_generators::TierCountConfig::new(
10538 vn.tier1.min,
10539 vn.tier1.max,
10540 ),
10541 tier2_per_parent: datasynth_generators::TierCountConfig::new(
10542 vn.tier2_per_parent.min,
10543 vn.tier2_per_parent.max,
10544 ),
10545 tier3_per_parent: datasynth_generators::TierCountConfig::new(
10546 vn.tier3_per_parent.min,
10547 vn.tier3_per_parent.max,
10548 ),
10549 cluster_distribution: datasynth_generators::ClusterDistribution {
10550 reliable_strategic: vn.clusters.reliable_strategic,
10551 standard_operational: vn.clusters.standard_operational,
10552 transactional: vn.clusters.transactional,
10553 problematic: vn.clusters.problematic,
10554 },
10555 concentration_limits: datasynth_generators::ConcentrationLimits {
10556 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10557 max_top5: vn.dependencies.top_5_concentration,
10558 },
10559 ..datasynth_generators::VendorNetworkConfig::default()
10560 });
10561 }
10562 let vendor_pool =
10563 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10564
10565 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10567 customer_gen.set_country_pack(pack.clone());
10568 customer_gen.set_coa_framework(coa_framework);
10569 customer_gen.set_counter_offset(i * customers_per_company);
10570 customer_gen.set_template_provider(self.template_provider.clone());
10572 if self.config.customer_segmentation.enabled {
10574 let cs = &self.config.customer_segmentation;
10575 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10576 enabled: true,
10577 segment_distribution: datasynth_generators::SegmentDistribution {
10578 enterprise: cs.value_segments.enterprise.customer_share,
10579 mid_market: cs.value_segments.mid_market.customer_share,
10580 smb: cs.value_segments.smb.customer_share,
10581 consumer: cs.value_segments.consumer.customer_share,
10582 },
10583 referral_config: datasynth_generators::ReferralConfig {
10584 enabled: cs.networks.referrals.enabled,
10585 referral_rate: cs.networks.referrals.referral_rate,
10586 ..Default::default()
10587 },
10588 hierarchy_config: datasynth_generators::HierarchyConfig {
10589 enabled: cs.networks.corporate_hierarchies.enabled,
10590 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10591 ..Default::default()
10592 },
10593 ..Default::default()
10594 };
10595 customer_gen.set_segmentation_config(seg_cfg);
10596 }
10597 let customer_pool = customer_gen.generate_customer_pool(
10598 customers_per_company,
10599 &company.code,
10600 start_date,
10601 );
10602
10603 let mut material_gen = MaterialGenerator::new(company_seed + 200);
10605 material_gen.set_country_pack(pack.clone());
10606 material_gen.set_counter_offset(i * materials_per_company);
10607 material_gen.set_template_provider(self.template_provider.clone());
10609 let material_pool = material_gen.generate_material_pool(
10610 materials_per_company,
10611 &company.code,
10612 start_date,
10613 );
10614
10615 let mut asset_gen = AssetGenerator::new(company_seed + 300);
10617 asset_gen.set_template_provider(self.template_provider.clone());
10619 let asset_pool = asset_gen.generate_asset_pool(
10620 assets_per_company,
10621 &company.code,
10622 (start_date, end_date),
10623 );
10624
10625 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10627 employee_gen.set_country_pack(pack);
10628 employee_gen.set_template_provider(self.template_provider.clone());
10630 let employee_pool =
10631 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10632
10633 let employee_change_history =
10635 employee_gen.generate_all_change_history(&employee_pool, end_date);
10636
10637 let employee_ids: Vec<String> = employee_pool
10639 .employees
10640 .iter()
10641 .map(|e| e.employee_id.clone())
10642 .collect();
10643 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10644 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10645
10646 let mut pc_gen =
10649 datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10650 let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10651
10652 (
10653 vendor_pool.vendors,
10654 customer_pool.customers,
10655 material_pool.materials,
10656 asset_pool.assets,
10657 employee_pool.employees,
10658 employee_change_history,
10659 cost_centers,
10660 profit_centers,
10661 )
10662 })
10663 .collect();
10664
10665 for (
10667 vendors,
10668 customers,
10669 materials,
10670 assets,
10671 employees,
10672 change_history,
10673 cost_centers,
10674 profit_centers,
10675 ) in per_company_results
10676 {
10677 self.master_data.vendors.extend(vendors);
10678 self.master_data.customers.extend(customers);
10679 self.master_data.materials.extend(materials);
10680 self.master_data.assets.extend(assets);
10681 self.master_data.employees.extend(employees);
10682 self.master_data.cost_centers.extend(cost_centers);
10683 self.master_data.profit_centers.extend(profit_centers);
10684 self.master_data
10685 .employee_change_history
10686 .extend(change_history);
10687 }
10688
10689 {
10693 use datasynth_core::models::IndustrySector;
10694 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10695 let industry = match self.config.global.industry {
10696 IndustrySector::Manufacturing => "manufacturing",
10697 IndustrySector::Retail => "retail",
10698 IndustrySector::FinancialServices => "financial_services",
10699 IndustrySector::Technology => "technology",
10700 IndustrySector::Healthcare => "healthcare",
10701 _ => "other",
10702 };
10703 for (i, company) in self.config.companies.iter().enumerate() {
10704 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10705 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10706 let profile = profile_gen.generate(&company.code, industry);
10707 self.master_data.organizational_profiles.push(profile);
10708 }
10709 }
10710
10711 if let Some(pb) = &pb {
10712 pb.inc(total);
10713 }
10714 if let Some(pb) = pb {
10715 pb.finish_with_message("Master data generation complete");
10716 }
10717
10718 Ok(())
10719 }
10720
10721 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10723 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10724 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10725
10726 let months = (self.config.global.period_months as usize).max(1);
10729 let p2p_count = self
10730 .phase_config
10731 .p2p_chains
10732 .min(self.master_data.vendors.len() * 2 * months);
10733 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10734
10735 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10737 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10738 p2p_gen.set_country_pack(self.primary_pack().clone());
10739 if let Some(ctx) = &self.temporal_context {
10743 p2p_gen.set_temporal_context(Arc::clone(ctx));
10744 }
10745
10746 for i in 0..p2p_count {
10747 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10748 let materials: Vec<&Material> = self
10749 .master_data
10750 .materials
10751 .iter()
10752 .skip(i % self.master_data.materials.len().max(1))
10753 .take(2.min(self.master_data.materials.len()))
10754 .collect();
10755
10756 if materials.is_empty() {
10757 continue;
10758 }
10759
10760 let company = &self.config.companies[i % self.config.companies.len()];
10761 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10762 let fiscal_period = po_date.month() as u8;
10763 let created_by = if self.master_data.employees.is_empty() {
10764 "SYSTEM"
10765 } else {
10766 self.master_data.employees[i % self.master_data.employees.len()]
10767 .user_id
10768 .as_str()
10769 };
10770
10771 let chain = p2p_gen.generate_chain(
10772 &company.code,
10773 vendor,
10774 &materials,
10775 po_date,
10776 start_date.year() as u16,
10777 fiscal_period,
10778 created_by,
10779 );
10780
10781 flows.purchase_orders.push(chain.purchase_order.clone());
10783 flows.goods_receipts.extend(chain.goods_receipts.clone());
10784 if let Some(vi) = &chain.vendor_invoice {
10785 flows.vendor_invoices.push(vi.clone());
10786 }
10787 if let Some(payment) = &chain.payment {
10788 flows.payments.push(payment.clone());
10789 }
10790 for remainder in &chain.remainder_payments {
10791 flows.payments.push(remainder.clone());
10792 }
10793 flows.p2p_chains.push(chain);
10794
10795 if let Some(pb) = &pb {
10796 pb.inc(1);
10797 }
10798 }
10799
10800 if let Some(pb) = pb {
10801 pb.finish_with_message("P2P document flows complete");
10802 }
10803
10804 let o2c_count = self
10807 .phase_config
10808 .o2c_chains
10809 .min(self.master_data.customers.len() * 2 * months);
10810 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10811
10812 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10814 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10815 o2c_gen.set_country_pack(self.primary_pack().clone());
10816 if let Some(ctx) = &self.temporal_context {
10818 o2c_gen.set_temporal_context(Arc::clone(ctx));
10819 }
10820
10821 for i in 0..o2c_count {
10822 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10823 let materials: Vec<&Material> = self
10824 .master_data
10825 .materials
10826 .iter()
10827 .skip(i % self.master_data.materials.len().max(1))
10828 .take(2.min(self.master_data.materials.len()))
10829 .collect();
10830
10831 if materials.is_empty() {
10832 continue;
10833 }
10834
10835 let company = &self.config.companies[i % self.config.companies.len()];
10836 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10837 let fiscal_period = so_date.month() as u8;
10838 let created_by = if self.master_data.employees.is_empty() {
10839 "SYSTEM"
10840 } else {
10841 self.master_data.employees[i % self.master_data.employees.len()]
10842 .user_id
10843 .as_str()
10844 };
10845
10846 let chain = o2c_gen.generate_chain(
10847 &company.code,
10848 customer,
10849 &materials,
10850 so_date,
10851 start_date.year() as u16,
10852 fiscal_period,
10853 created_by,
10854 );
10855
10856 flows.sales_orders.push(chain.sales_order.clone());
10858 flows.deliveries.extend(chain.deliveries.clone());
10859 if let Some(ci) = &chain.customer_invoice {
10860 flows.customer_invoices.push(ci.clone());
10861 }
10862 if let Some(receipt) = &chain.customer_receipt {
10863 flows.payments.push(receipt.clone());
10864 }
10865 for receipt in &chain.remainder_receipts {
10867 flows.payments.push(receipt.clone());
10868 }
10869 flows.o2c_chains.push(chain);
10870
10871 if let Some(pb) = &pb {
10872 pb.inc(1);
10873 }
10874 }
10875
10876 if let Some(pb) = pb {
10877 pb.finish_with_message("O2C document flows complete");
10878 }
10879
10880 {
10884 let mut refs = Vec::new();
10885 for doc in &flows.purchase_orders {
10886 refs.extend(doc.header.document_references.iter().cloned());
10887 }
10888 for doc in &flows.goods_receipts {
10889 refs.extend(doc.header.document_references.iter().cloned());
10890 }
10891 for doc in &flows.vendor_invoices {
10892 refs.extend(doc.header.document_references.iter().cloned());
10893 }
10894 for doc in &flows.sales_orders {
10895 refs.extend(doc.header.document_references.iter().cloned());
10896 }
10897 for doc in &flows.deliveries {
10898 refs.extend(doc.header.document_references.iter().cloned());
10899 }
10900 for doc in &flows.customer_invoices {
10901 refs.extend(doc.header.document_references.iter().cloned());
10902 }
10903 for doc in &flows.payments {
10904 refs.extend(doc.header.document_references.iter().cloned());
10905 }
10906 debug!(
10907 "Collected {} document cross-references from document headers",
10908 refs.len()
10909 );
10910 flows.document_references = refs;
10911 }
10912
10913 Ok(())
10914 }
10915
10916 fn generate_journal_entries(
10918 &mut self,
10919 coa: &Arc<ChartOfAccounts>,
10920 ) -> SynthResult<Vec<JournalEntry>> {
10921 use datasynth_core::traits::ParallelGenerator;
10922
10923 let total = self.calculate_total_transactions();
10924 let pb = self.create_progress_bar(total, "Generating Journal Entries");
10925
10926 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10927 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10928 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10929
10930 let company_codes: Vec<String> = self
10931 .config
10932 .companies
10933 .iter()
10934 .map(|c| c.code.clone())
10935 .collect();
10936
10937 let mut generator = JournalEntryGenerator::new_with_params(
10938 self.config.transactions.clone(),
10939 Arc::clone(coa),
10940 company_codes,
10941 start_date,
10942 end_date,
10943 self.seed,
10944 );
10945 let bp = &self.config.business_processes;
10948 generator.set_business_process_weights(
10949 bp.o2c_weight,
10950 bp.p2p_weight,
10951 bp.r2r_weight,
10952 bp.h2r_weight,
10953 bp.a2r_weight,
10954 );
10955 generator
10960 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10961 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10962 let generator = generator;
10963
10964 let je_pack = self.primary_pack();
10968
10969 let mut generator = generator
10970 .with_master_data(
10971 &self.master_data.vendors,
10972 &self.master_data.customers,
10973 &self.master_data.materials,
10974 )
10975 .with_country_pack_names(je_pack)
10976 .with_country_pack_temporal(
10977 self.config.temporal_patterns.clone(),
10978 self.seed + 200,
10979 je_pack,
10980 )
10981 .with_persona_errors(true)
10982 .with_fraud_config(self.config.fraud.clone());
10983
10984 let temporal_enabled = self.config.temporal.enabled;
10989 let regimes_enabled = self.config.distributions.regime_changes.enabled;
10990 if temporal_enabled || regimes_enabled {
10991 let mut drift_config = if temporal_enabled {
10992 self.config.temporal.to_core_config()
10993 } else {
10994 datasynth_core::distributions::DriftConfig::default()
10997 };
10998 if regimes_enabled {
10999 self.config
11000 .distributions
11001 .regime_changes
11002 .apply_to(&mut drift_config, start_date);
11003 }
11004 generator = generator.with_drift_config(drift_config, self.seed + 100);
11005 }
11006
11007 self.check_memory_limit()?;
11009
11010 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11012
11013 let entries = if total >= 10_000 && num_threads > 1 {
11017 let sub_generators = generator.split(num_threads);
11020 let entries_per_thread = total as usize / num_threads;
11021 let remainder = total as usize % num_threads;
11022
11023 let batches: Vec<Vec<JournalEntry>> = sub_generators
11024 .into_par_iter()
11025 .enumerate()
11026 .map(|(i, mut gen)| {
11027 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11028 gen.generate_batch(count)
11029 })
11030 .collect();
11031
11032 let entries = JournalEntryGenerator::merge_results(batches);
11034
11035 if let Some(pb) = &pb {
11036 pb.inc(total);
11037 }
11038 entries
11039 } else {
11040 let mut entries = Vec::with_capacity(total as usize);
11042 for _ in 0..total {
11043 let entry = generator.generate();
11044 entries.push(entry);
11045 if let Some(pb) = &pb {
11046 pb.inc(1);
11047 }
11048 }
11049 entries
11050 };
11051
11052 if let Some(pb) = pb {
11053 pb.finish_with_message("Journal entries complete");
11054 }
11055
11056 Ok(entries)
11057 }
11058
11059 fn generate_jes_from_document_flows(
11064 &mut self,
11065 flows: &DocumentFlowSnapshot,
11066 ) -> SynthResult<Vec<JournalEntry>> {
11067 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11068 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11069
11070 let je_config = match self.resolve_coa_framework() {
11071 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11072 CoAFramework::GermanSkr04 => {
11073 let fa = datasynth_core::FrameworkAccounts::german_gaap();
11074 DocumentFlowJeConfig::from(&fa)
11075 }
11076 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11077 };
11078
11079 let populate_fec = je_config.populate_fec_fields;
11080 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11081
11082 if populate_fec {
11086 let mut aux_lookup = std::collections::HashMap::new();
11087 for vendor in &self.master_data.vendors {
11088 if let Some(ref aux) = vendor.auxiliary_gl_account {
11089 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11090 }
11091 }
11092 for customer in &self.master_data.customers {
11093 if let Some(ref aux) = customer.auxiliary_gl_account {
11094 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11095 }
11096 }
11097 if !aux_lookup.is_empty() {
11098 generator.set_auxiliary_account_lookup(aux_lookup);
11099 }
11100 }
11101
11102 let mut entries = Vec::new();
11103
11104 for chain in &flows.p2p_chains {
11106 let chain_entries = generator.generate_from_p2p_chain(chain);
11107 entries.extend(chain_entries);
11108 if let Some(pb) = &pb {
11109 pb.inc(1);
11110 }
11111 }
11112
11113 for chain in &flows.o2c_chains {
11115 let chain_entries = generator.generate_from_o2c_chain(chain);
11116 entries.extend(chain_entries);
11117 if let Some(pb) = &pb {
11118 pb.inc(1);
11119 }
11120 }
11121
11122 if let Some(pb) = pb {
11123 pb.finish_with_message(format!(
11124 "Generated {} JEs from document flows",
11125 entries.len()
11126 ));
11127 }
11128
11129 Ok(entries)
11130 }
11131
11132 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11138 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11139
11140 let mut jes = Vec::with_capacity(payroll_runs.len());
11141
11142 for run in payroll_runs {
11143 let mut je = JournalEntry::new_simple(
11144 format!("JE-PAYROLL-{}", run.payroll_id),
11145 run.company_code.clone(),
11146 run.run_date,
11147 format!("Payroll {}", run.payroll_id),
11148 );
11149
11150 je.add_line(JournalEntryLine {
11152 line_number: 1,
11153 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11154 debit_amount: run.total_gross,
11155 reference: Some(run.payroll_id.clone()),
11156 text: Some(format!(
11157 "Payroll {} ({} employees)",
11158 run.payroll_id, run.employee_count
11159 )),
11160 ..Default::default()
11161 });
11162
11163 je.add_line(JournalEntryLine {
11165 line_number: 2,
11166 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11167 credit_amount: run.total_gross,
11168 reference: Some(run.payroll_id.clone()),
11169 ..Default::default()
11170 });
11171
11172 jes.push(je);
11173 }
11174
11175 jes
11176 }
11177
11178 fn link_document_flows_to_subledgers(
11183 &mut self,
11184 flows: &DocumentFlowSnapshot,
11185 ) -> SynthResult<SubledgerSnapshot> {
11186 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11187 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11188
11189 let vendor_names: std::collections::HashMap<String, String> = self
11191 .master_data
11192 .vendors
11193 .iter()
11194 .map(|v| (v.vendor_id.clone(), v.name.clone()))
11195 .collect();
11196 let customer_names: std::collections::HashMap<String, String> = self
11197 .master_data
11198 .customers
11199 .iter()
11200 .map(|c| (c.customer_id.clone(), c.name.clone()))
11201 .collect();
11202
11203 let mut linker = DocumentFlowLinker::new()
11204 .with_vendor_names(vendor_names)
11205 .with_customer_names(customer_names);
11206
11207 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11209 if let Some(pb) = &pb {
11210 pb.inc(flows.vendor_invoices.len() as u64);
11211 }
11212
11213 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11215 if let Some(pb) = &pb {
11216 pb.inc(flows.customer_invoices.len() as u64);
11217 }
11218
11219 if let Some(pb) = pb {
11220 pb.finish_with_message(format!(
11221 "Linked {} AP and {} AR invoices",
11222 ap_invoices.len(),
11223 ar_invoices.len()
11224 ));
11225 }
11226
11227 Ok(SubledgerSnapshot {
11228 ap_invoices,
11229 ar_invoices,
11230 fa_records: Vec::new(),
11231 inventory_positions: Vec::new(),
11232 inventory_movements: Vec::new(),
11233 ar_aging_reports: Vec::new(),
11235 ap_aging_reports: Vec::new(),
11236 depreciation_runs: Vec::new(),
11238 inventory_valuations: Vec::new(),
11239 dunning_runs: Vec::new(),
11241 dunning_letters: Vec::new(),
11242 })
11243 }
11244
11245 #[allow(clippy::too_many_arguments)]
11250 fn generate_ocpm_events(
11251 &mut self,
11252 flows: &DocumentFlowSnapshot,
11253 sourcing: &SourcingSnapshot,
11254 hr: &HrSnapshot,
11255 manufacturing: &ManufacturingSnapshot,
11256 banking: &BankingSnapshot,
11257 audit: &AuditSnapshot,
11258 financial_reporting: &FinancialReportingSnapshot,
11259 ) -> SynthResult<OcpmSnapshot> {
11260 let total_chains = flows.p2p_chains.len()
11261 + flows.o2c_chains.len()
11262 + sourcing.sourcing_projects.len()
11263 + hr.payroll_runs.len()
11264 + manufacturing.production_orders.len()
11265 + banking.customers.len()
11266 + audit.engagements.len()
11267 + financial_reporting.bank_reconciliations.len();
11268 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11269
11270 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11272 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11273
11274 let ocpm_config = OcpmGeneratorConfig {
11276 generate_p2p: true,
11277 generate_o2c: true,
11278 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11279 generate_h2r: !hr.payroll_runs.is_empty(),
11280 generate_mfg: !manufacturing.production_orders.is_empty(),
11281 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11282 generate_bank: !banking.customers.is_empty(),
11283 generate_audit: !audit.engagements.is_empty(),
11284 happy_path_rate: 0.75,
11285 exception_path_rate: 0.20,
11286 error_path_rate: 0.05,
11287 add_duration_variability: true,
11288 duration_std_dev_factor: 0.3,
11289 };
11290 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11291 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11292
11293 let available_users: Vec<String> = self
11295 .master_data
11296 .employees
11297 .iter()
11298 .take(20)
11299 .map(|e| e.user_id.clone())
11300 .collect();
11301
11302 let fallback_date =
11304 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11305 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11306 .unwrap_or(fallback_date);
11307 let base_midnight = base_date
11308 .and_hms_opt(0, 0, 0)
11309 .expect("midnight is always valid");
11310 let base_datetime =
11311 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11312
11313 let add_result = |event_log: &mut OcpmEventLog,
11315 result: datasynth_ocpm::CaseGenerationResult| {
11316 for event in result.events {
11317 event_log.add_event(event);
11318 }
11319 for object in result.objects {
11320 event_log.add_object(object);
11321 }
11322 for relationship in result.relationships {
11323 event_log.add_relationship(relationship);
11324 }
11325 for corr in result.correlation_events {
11326 event_log.add_correlation_event(corr);
11327 }
11328 event_log.add_case(result.case_trace);
11329 };
11330
11331 for chain in &flows.p2p_chains {
11333 let po = &chain.purchase_order;
11334 let documents = P2pDocuments::new(
11335 &po.header.document_id,
11336 &po.vendor_id,
11337 &po.header.company_code,
11338 po.total_net_amount,
11339 &po.header.currency,
11340 &ocpm_uuid_factory,
11341 )
11342 .with_goods_receipt(
11343 chain
11344 .goods_receipts
11345 .first()
11346 .map(|gr| gr.header.document_id.as_str())
11347 .unwrap_or(""),
11348 &ocpm_uuid_factory,
11349 )
11350 .with_invoice(
11351 chain
11352 .vendor_invoice
11353 .as_ref()
11354 .map(|vi| vi.header.document_id.as_str())
11355 .unwrap_or(""),
11356 &ocpm_uuid_factory,
11357 )
11358 .with_payment(
11359 chain
11360 .payment
11361 .as_ref()
11362 .map(|p| p.header.document_id.as_str())
11363 .unwrap_or(""),
11364 &ocpm_uuid_factory,
11365 );
11366
11367 let start_time =
11368 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11369 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11370 add_result(&mut event_log, result);
11371
11372 if let Some(pb) = &pb {
11373 pb.inc(1);
11374 }
11375 }
11376
11377 for chain in &flows.o2c_chains {
11379 let so = &chain.sales_order;
11380 let documents = O2cDocuments::new(
11381 &so.header.document_id,
11382 &so.customer_id,
11383 &so.header.company_code,
11384 so.total_net_amount,
11385 &so.header.currency,
11386 &ocpm_uuid_factory,
11387 )
11388 .with_delivery(
11389 chain
11390 .deliveries
11391 .first()
11392 .map(|d| d.header.document_id.as_str())
11393 .unwrap_or(""),
11394 &ocpm_uuid_factory,
11395 )
11396 .with_invoice(
11397 chain
11398 .customer_invoice
11399 .as_ref()
11400 .map(|ci| ci.header.document_id.as_str())
11401 .unwrap_or(""),
11402 &ocpm_uuid_factory,
11403 )
11404 .with_receipt(
11405 chain
11406 .customer_receipt
11407 .as_ref()
11408 .map(|r| r.header.document_id.as_str())
11409 .unwrap_or(""),
11410 &ocpm_uuid_factory,
11411 );
11412
11413 let start_time =
11414 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11415 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11416 add_result(&mut event_log, result);
11417
11418 if let Some(pb) = &pb {
11419 pb.inc(1);
11420 }
11421 }
11422
11423 for project in &sourcing.sourcing_projects {
11425 let vendor_id = sourcing
11427 .contracts
11428 .iter()
11429 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11430 .map(|c| c.vendor_id.clone())
11431 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11432 .or_else(|| {
11433 self.master_data
11434 .vendors
11435 .first()
11436 .map(|v| v.vendor_id.clone())
11437 })
11438 .unwrap_or_else(|| "V000".to_string());
11439 let mut docs = S2cDocuments::new(
11440 &project.project_id,
11441 &vendor_id,
11442 &project.company_code,
11443 project.estimated_annual_spend,
11444 &ocpm_uuid_factory,
11445 );
11446 if let Some(rfx) = sourcing
11448 .rfx_events
11449 .iter()
11450 .find(|r| r.sourcing_project_id == project.project_id)
11451 {
11452 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11453 if let Some(bid) = sourcing.bids.iter().find(|b| {
11455 b.rfx_id == rfx.rfx_id
11456 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11457 }) {
11458 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11459 }
11460 }
11461 if let Some(contract) = sourcing
11463 .contracts
11464 .iter()
11465 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11466 {
11467 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11468 }
11469 let start_time = base_datetime - chrono::Duration::days(90);
11470 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11471 add_result(&mut event_log, result);
11472
11473 if let Some(pb) = &pb {
11474 pb.inc(1);
11475 }
11476 }
11477
11478 for run in &hr.payroll_runs {
11480 let employee_id = hr
11482 .payroll_line_items
11483 .iter()
11484 .find(|li| li.payroll_id == run.payroll_id)
11485 .map(|li| li.employee_id.as_str())
11486 .unwrap_or("EMP000");
11487 let docs = H2rDocuments::new(
11488 &run.payroll_id,
11489 employee_id,
11490 &run.company_code,
11491 run.total_gross,
11492 &ocpm_uuid_factory,
11493 )
11494 .with_time_entries(
11495 hr.time_entries
11496 .iter()
11497 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11498 .take(5)
11499 .map(|t| t.entry_id.as_str())
11500 .collect(),
11501 );
11502 let start_time = base_datetime - chrono::Duration::days(30);
11503 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11504 add_result(&mut event_log, result);
11505
11506 if let Some(pb) = &pb {
11507 pb.inc(1);
11508 }
11509 }
11510
11511 for order in &manufacturing.production_orders {
11513 let mut docs = MfgDocuments::new(
11514 &order.order_id,
11515 &order.material_id,
11516 &order.company_code,
11517 order.planned_quantity,
11518 &ocpm_uuid_factory,
11519 )
11520 .with_operations(
11521 order
11522 .operations
11523 .iter()
11524 .map(|o| format!("OP-{:04}", o.operation_number))
11525 .collect::<Vec<_>>()
11526 .iter()
11527 .map(std::string::String::as_str)
11528 .collect(),
11529 );
11530 if let Some(insp) = manufacturing
11532 .quality_inspections
11533 .iter()
11534 .find(|i| i.reference_id == order.order_id)
11535 {
11536 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11537 }
11538 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11540 cc.items
11541 .iter()
11542 .any(|item| item.material_id == order.material_id)
11543 }) {
11544 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11545 }
11546 let start_time = base_datetime - chrono::Duration::days(60);
11547 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11548 add_result(&mut event_log, result);
11549
11550 if let Some(pb) = &pb {
11551 pb.inc(1);
11552 }
11553 }
11554
11555 for customer in &banking.customers {
11557 let customer_id_str = customer.customer_id.to_string();
11558 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11559 if let Some(account) = banking
11561 .accounts
11562 .iter()
11563 .find(|a| a.primary_owner_id == customer.customer_id)
11564 {
11565 let account_id_str = account.account_id.to_string();
11566 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11567 let txn_strs: Vec<String> = banking
11569 .transactions
11570 .iter()
11571 .filter(|t| t.account_id == account.account_id)
11572 .take(10)
11573 .map(|t| t.transaction_id.to_string())
11574 .collect();
11575 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11576 let txn_amounts: Vec<rust_decimal::Decimal> = banking
11577 .transactions
11578 .iter()
11579 .filter(|t| t.account_id == account.account_id)
11580 .take(10)
11581 .map(|t| t.amount)
11582 .collect();
11583 if !txn_ids.is_empty() {
11584 docs = docs.with_transactions(txn_ids, txn_amounts);
11585 }
11586 }
11587 let start_time = base_datetime - chrono::Duration::days(180);
11588 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11589 add_result(&mut event_log, result);
11590
11591 if let Some(pb) = &pb {
11592 pb.inc(1);
11593 }
11594 }
11595
11596 for engagement in &audit.engagements {
11598 let engagement_id_str = engagement.engagement_id.to_string();
11599 let docs = AuditDocuments::new(
11600 &engagement_id_str,
11601 &engagement.client_entity_id,
11602 &ocpm_uuid_factory,
11603 )
11604 .with_workpapers(
11605 audit
11606 .workpapers
11607 .iter()
11608 .filter(|w| w.engagement_id == engagement.engagement_id)
11609 .take(10)
11610 .map(|w| w.workpaper_id.to_string())
11611 .collect::<Vec<_>>()
11612 .iter()
11613 .map(std::string::String::as_str)
11614 .collect(),
11615 )
11616 .with_evidence(
11617 audit
11618 .evidence
11619 .iter()
11620 .filter(|e| e.engagement_id == engagement.engagement_id)
11621 .take(10)
11622 .map(|e| e.evidence_id.to_string())
11623 .collect::<Vec<_>>()
11624 .iter()
11625 .map(std::string::String::as_str)
11626 .collect(),
11627 )
11628 .with_risks(
11629 audit
11630 .risk_assessments
11631 .iter()
11632 .filter(|r| r.engagement_id == engagement.engagement_id)
11633 .take(5)
11634 .map(|r| r.risk_id.to_string())
11635 .collect::<Vec<_>>()
11636 .iter()
11637 .map(std::string::String::as_str)
11638 .collect(),
11639 )
11640 .with_findings(
11641 audit
11642 .findings
11643 .iter()
11644 .filter(|f| f.engagement_id == engagement.engagement_id)
11645 .take(5)
11646 .map(|f| f.finding_id.to_string())
11647 .collect::<Vec<_>>()
11648 .iter()
11649 .map(std::string::String::as_str)
11650 .collect(),
11651 )
11652 .with_judgments(
11653 audit
11654 .judgments
11655 .iter()
11656 .filter(|j| j.engagement_id == engagement.engagement_id)
11657 .take(5)
11658 .map(|j| j.judgment_id.to_string())
11659 .collect::<Vec<_>>()
11660 .iter()
11661 .map(std::string::String::as_str)
11662 .collect(),
11663 );
11664 let start_time = base_datetime - chrono::Duration::days(120);
11665 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11666 add_result(&mut event_log, result);
11667
11668 if let Some(pb) = &pb {
11669 pb.inc(1);
11670 }
11671 }
11672
11673 for recon in &financial_reporting.bank_reconciliations {
11675 let docs = BankReconDocuments::new(
11676 &recon.reconciliation_id,
11677 &recon.bank_account_id,
11678 &recon.company_code,
11679 recon.bank_ending_balance,
11680 &ocpm_uuid_factory,
11681 )
11682 .with_statement_lines(
11683 recon
11684 .statement_lines
11685 .iter()
11686 .take(20)
11687 .map(|l| l.line_id.as_str())
11688 .collect(),
11689 )
11690 .with_reconciling_items(
11691 recon
11692 .reconciling_items
11693 .iter()
11694 .take(10)
11695 .map(|i| i.item_id.as_str())
11696 .collect(),
11697 );
11698 let start_time = base_datetime - chrono::Duration::days(30);
11699 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11700 add_result(&mut event_log, result);
11701
11702 if let Some(pb) = &pb {
11703 pb.inc(1);
11704 }
11705 }
11706
11707 event_log.compute_variants();
11709
11710 let summary = event_log.summary();
11711
11712 if let Some(pb) = pb {
11713 pb.finish_with_message(format!(
11714 "Generated {} OCPM events, {} objects",
11715 summary.event_count, summary.object_count
11716 ));
11717 }
11718
11719 Ok(OcpmSnapshot {
11720 event_count: summary.event_count,
11721 object_count: summary.object_count,
11722 case_count: summary.case_count,
11723 event_log: Some(event_log),
11724 })
11725 }
11726
11727 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11729 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11730
11731 let total_rate = if self.config.anomaly_injection.enabled {
11734 self.config.anomaly_injection.rates.total_rate
11735 } else if self.config.fraud.enabled {
11736 self.config.fraud.fraud_rate
11737 } else {
11738 0.02
11739 };
11740
11741 let fraud_rate = if self.config.anomaly_injection.enabled {
11742 self.config.anomaly_injection.rates.fraud_rate
11743 } else {
11744 AnomalyRateConfig::default().fraud_rate
11745 };
11746
11747 let error_rate = if self.config.anomaly_injection.enabled {
11748 self.config.anomaly_injection.rates.error_rate
11749 } else {
11750 AnomalyRateConfig::default().error_rate
11751 };
11752
11753 let process_issue_rate = if self.config.anomaly_injection.enabled {
11754 self.config.anomaly_injection.rates.process_rate
11755 } else {
11756 AnomalyRateConfig::default().process_issue_rate
11757 };
11758
11759 let anomaly_config = AnomalyInjectorConfig {
11760 rates: AnomalyRateConfig {
11761 total_rate,
11762 fraud_rate,
11763 error_rate,
11764 process_issue_rate,
11765 ..Default::default()
11766 },
11767 seed: self.seed + 5000,
11768 ..Default::default()
11769 };
11770
11771 let mut injector = AnomalyInjector::new(anomaly_config);
11772 let result = injector.process_entries(entries);
11773
11774 if let Some(pb) = &pb {
11775 pb.inc(entries.len() as u64);
11776 pb.finish_with_message("Anomaly injection complete");
11777 }
11778
11779 let mut by_type = HashMap::new();
11780 for label in &result.labels {
11781 *by_type
11782 .entry(format!("{:?}", label.anomaly_type))
11783 .or_insert(0) += 1;
11784 }
11785
11786 Ok(AnomalyLabels {
11787 labels: result.labels,
11788 summary: Some(result.summary),
11789 by_type,
11790 })
11791 }
11792
11793 fn validate_journal_entries(
11802 &mut self,
11803 entries: &[JournalEntry],
11804 ) -> SynthResult<BalanceValidationResult> {
11805 let clean_entries: Vec<&JournalEntry> = entries
11807 .iter()
11808 .filter(|e| {
11809 e.header
11810 .header_text
11811 .as_ref()
11812 .map(|t| !t.contains("[HUMAN_ERROR:"))
11813 .unwrap_or(true)
11814 })
11815 .collect();
11816
11817 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11818
11819 let config = BalanceTrackerConfig {
11821 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
11825 };
11826 let validation_currency = self
11827 .config
11828 .companies
11829 .first()
11830 .map(|c| c.currency.clone())
11831 .unwrap_or_else(|| "USD".to_string());
11832
11833 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11834
11835 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11837 let errors = tracker.apply_entries(&clean_refs);
11838
11839 if let Some(pb) = &pb {
11840 pb.inc(entries.len() as u64);
11841 }
11842
11843 let has_unbalanced = tracker
11846 .get_validation_errors()
11847 .iter()
11848 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11849
11850 let mut all_errors = errors;
11853 all_errors.extend(tracker.get_validation_errors().iter().cloned());
11854 let company_codes: Vec<String> = self
11855 .config
11856 .companies
11857 .iter()
11858 .map(|c| c.code.clone())
11859 .collect();
11860
11861 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11862 .map(|d| d + chrono::Months::new(self.config.global.period_months))
11863 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11864
11865 for company_code in &company_codes {
11866 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11867 all_errors.push(e);
11868 }
11869 }
11870
11871 let stats = tracker.get_statistics();
11873
11874 let is_balanced = all_errors.is_empty();
11876
11877 if let Some(pb) = pb {
11878 let msg = if is_balanced {
11879 "Balance validation passed"
11880 } else {
11881 "Balance validation completed with errors"
11882 };
11883 pb.finish_with_message(msg);
11884 }
11885
11886 Ok(BalanceValidationResult {
11887 validated: true,
11888 is_balanced,
11889 entries_processed: stats.entries_processed,
11890 total_debits: stats.total_debits,
11891 total_credits: stats.total_credits,
11892 accounts_tracked: stats.accounts_tracked,
11893 companies_tracked: stats.companies_tracked,
11894 validation_errors: all_errors,
11895 has_unbalanced_entries: has_unbalanced,
11896 })
11897 }
11898
11899 fn inject_data_quality(
11904 &mut self,
11905 entries: &mut [JournalEntry],
11906 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11907 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11908
11909 let config = if self.config.data_quality.enabled {
11912 let dq = &self.config.data_quality;
11913 let field_rates = dq.missing_values.field_rates.clone();
11917 let mut required_fields: std::collections::HashSet<String> =
11918 dq.missing_values.protected_fields.iter().cloned().collect();
11919 for f in [
11922 "document_id",
11923 "company_code",
11924 "posting_date",
11925 "fiscal_year",
11926 "fiscal_period",
11927 "gl_account",
11928 "line_number",
11929 "transaction_id",
11930 ] {
11931 required_fields.insert(f.to_string());
11932 }
11933 DataQualityConfig {
11934 enable_missing_values: dq.missing_values.enabled,
11935 missing_values: datasynth_generators::MissingValueConfig {
11936 global_rate: dq.effective_missing_rate(),
11937 field_rates,
11938 required_fields,
11939 ..Default::default()
11940 },
11941 enable_format_variations: dq.format_variations.enabled,
11942 format_variations: datasynth_generators::FormatVariationConfig {
11943 date_variation_rate: dq.format_variations.dates.rate,
11944 amount_variation_rate: dq.format_variations.amounts.rate,
11945 identifier_variation_rate: dq.format_variations.identifiers.rate,
11946 ..Default::default()
11947 },
11948 enable_duplicates: dq.duplicates.enabled,
11949 duplicates: datasynth_generators::DuplicateConfig {
11950 duplicate_rate: dq.effective_duplicate_rate(),
11951 ..Default::default()
11952 },
11953 enable_typos: dq.typos.enabled,
11954 typos: datasynth_generators::TypoConfig {
11955 char_error_rate: dq.effective_typo_rate(),
11956 ..Default::default()
11957 },
11958 enable_encoding_issues: dq.encoding_issues.enabled,
11959 encoding_issue_rate: dq.encoding_issues.rate,
11960 seed: self.seed.wrapping_add(77), track_statistics: true,
11962 }
11963 } else {
11964 DataQualityConfig::minimal()
11965 };
11966 let mut injector = DataQualityInjector::new(config);
11967
11968 injector.set_country_pack(self.primary_pack().clone());
11970
11971 let context = HashMap::new();
11973
11974 for entry in entries.iter_mut() {
11975 if let Some(text) = &entry.header.header_text {
11977 let processed = injector.process_text_field(
11978 "header_text",
11979 text,
11980 &entry.header.document_id.to_string(),
11981 &context,
11982 );
11983 match processed {
11984 Some(new_text) if new_text != *text => {
11985 entry.header.header_text = Some(new_text);
11986 }
11987 None => {
11988 entry.header.header_text = None; }
11990 _ => {}
11991 }
11992 }
11993
11994 if let Some(ref_text) = &entry.header.reference {
11996 let processed = injector.process_text_field(
11997 "reference",
11998 ref_text,
11999 &entry.header.document_id.to_string(),
12000 &context,
12001 );
12002 match processed {
12003 Some(new_text) if new_text != *ref_text => {
12004 entry.header.reference = Some(new_text);
12005 }
12006 None => {
12007 entry.header.reference = None;
12008 }
12009 _ => {}
12010 }
12011 }
12012
12013 let user_persona = entry.header.user_persona.clone();
12015 if let Some(processed) = injector.process_text_field(
12016 "user_persona",
12017 &user_persona,
12018 &entry.header.document_id.to_string(),
12019 &context,
12020 ) {
12021 if processed != user_persona {
12022 entry.header.user_persona = processed;
12023 }
12024 }
12025
12026 for line in &mut entry.lines {
12028 if let Some(ref text) = line.line_text {
12030 let processed = injector.process_text_field(
12031 "line_text",
12032 text,
12033 &entry.header.document_id.to_string(),
12034 &context,
12035 );
12036 match processed {
12037 Some(new_text) if new_text != *text => {
12038 line.line_text = Some(new_text);
12039 }
12040 None => {
12041 line.line_text = None;
12042 }
12043 _ => {}
12044 }
12045 }
12046
12047 if let Some(cc) = &line.cost_center {
12049 let processed = injector.process_text_field(
12050 "cost_center",
12051 cc,
12052 &entry.header.document_id.to_string(),
12053 &context,
12054 );
12055 match processed {
12056 Some(new_cc) if new_cc != *cc => {
12057 line.cost_center = Some(new_cc);
12058 }
12059 None => {
12060 line.cost_center = None;
12061 }
12062 _ => {}
12063 }
12064 }
12065
12066 macro_rules! process_opt_field {
12074 ($field_name:expr, $opt:expr) => {
12075 if let Some(val) = $opt.as_ref() {
12076 match injector.process_text_field(
12077 $field_name,
12078 val,
12079 &entry.header.document_id.to_string(),
12080 &context,
12081 ) {
12082 Some(new_val) if new_val != *val => {
12083 *$opt = Some(new_val);
12084 }
12085 None => {
12086 *$opt = None;
12087 }
12088 _ => {}
12089 }
12090 }
12091 };
12092 }
12093
12094 process_opt_field!("profit_center", &mut line.profit_center);
12095 process_opt_field!("assignment", &mut line.assignment);
12096 process_opt_field!("tax_code", &mut line.tax_code);
12097 process_opt_field!("account_description", &mut line.account_description);
12098 process_opt_field!(
12099 "auxiliary_account_number",
12100 &mut line.auxiliary_account_number
12101 );
12102 process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12103 process_opt_field!("lettrage", &mut line.lettrage);
12104 }
12105
12106 if let Some(pb) = &pb {
12107 pb.inc(1);
12108 }
12109 }
12110
12111 if let Some(pb) = pb {
12112 pb.finish_with_message("Data quality injection complete");
12113 }
12114
12115 let quality_issues = injector.issues().to_vec();
12116 Ok((injector.stats().clone(), quality_issues))
12117 }
12118
12119 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12130 let use_fsm = self
12132 .config
12133 .audit
12134 .fsm
12135 .as_ref()
12136 .map(|f| f.enabled)
12137 .unwrap_or(false);
12138
12139 if use_fsm {
12140 return self.generate_audit_data_with_fsm(entries);
12141 }
12142
12143 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12145 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12146 let fiscal_year = start_date.year() as u16;
12147 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12148
12149 let total_revenue: rust_decimal::Decimal = entries
12151 .iter()
12152 .flat_map(|e| e.lines.iter())
12153 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12154 .map(|l| l.credit_amount)
12155 .sum();
12156
12157 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12159
12160 let mut snapshot = AuditSnapshot::default();
12161
12162 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12164 engagement_gen.set_team_config(&self.config.audit.team);
12167
12168 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12169 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12173 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12174 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12175 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12176 finding_gen.set_template_provider(self.template_provider.clone());
12178 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12179 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12180 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12181 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12182 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12183 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12184 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12185
12186 let accounts: Vec<String> = self
12188 .coa
12189 .as_ref()
12190 .map(|coa| {
12191 coa.get_postable_accounts()
12192 .iter()
12193 .map(|acc| acc.account_code().to_string())
12194 .collect()
12195 })
12196 .unwrap_or_default();
12197
12198 for (i, company) in self.config.companies.iter().enumerate() {
12200 let company_revenue = total_revenue
12202 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12203
12204 let engagements_for_company =
12206 self.phase_config.audit_engagements / self.config.companies.len().max(1);
12207 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12208 1
12209 } else {
12210 0
12211 };
12212
12213 for _eng_idx in 0..(engagements_for_company + extra) {
12214 let eng_type =
12219 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12220
12221 let mut engagement = engagement_gen.generate_engagement(
12223 &company.code,
12224 &company.name,
12225 fiscal_year,
12226 period_end,
12227 company_revenue,
12228 Some(eng_type),
12229 );
12230
12231 if !self.master_data.employees.is_empty() {
12233 let emp_count = self.master_data.employees.len();
12234 let base = (i * 10 + _eng_idx) % emp_count;
12236 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12237 .employee_id
12238 .clone();
12239 engagement.engagement_manager_id = self.master_data.employees
12240 [(base + 1) % emp_count]
12241 .employee_id
12242 .clone();
12243 let real_team: Vec<String> = engagement
12244 .team_member_ids
12245 .iter()
12246 .enumerate()
12247 .map(|(j, _)| {
12248 self.master_data.employees[(base + 2 + j) % emp_count]
12249 .employee_id
12250 .clone()
12251 })
12252 .collect();
12253 engagement.team_member_ids = real_team;
12254 }
12255
12256 if let Some(pb) = &pb {
12257 pb.inc(1);
12258 }
12259
12260 let team_members: Vec<String> = engagement.team_member_ids.clone();
12262
12263 let workpapers = if self.config.audit.generate_workpapers {
12269 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12270 } else {
12271 Vec::new()
12272 };
12273
12274 for wp in &workpapers {
12275 if let Some(pb) = &pb {
12276 pb.inc(1);
12277 }
12278
12279 let evidence = evidence_gen.generate_evidence_for_workpaper(
12281 wp,
12282 &team_members,
12283 wp.preparer_date,
12284 );
12285
12286 for _ in &evidence {
12287 if let Some(pb) = &pb {
12288 pb.inc(1);
12289 }
12290 }
12291
12292 snapshot.evidence.extend(evidence);
12293 }
12294
12295 let risks =
12297 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12298
12299 for _ in &risks {
12300 if let Some(pb) = &pb {
12301 pb.inc(1);
12302 }
12303 }
12304 snapshot.risk_assessments.extend(risks);
12305
12306 let findings = finding_gen.generate_findings_for_engagement(
12308 &engagement,
12309 &workpapers,
12310 &team_members,
12311 );
12312
12313 for _ in &findings {
12314 if let Some(pb) = &pb {
12315 pb.inc(1);
12316 }
12317 }
12318 snapshot.findings.extend(findings);
12319
12320 let judgments =
12322 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12323
12324 for _ in &judgments {
12325 if let Some(pb) = &pb {
12326 pb.inc(1);
12327 }
12328 }
12329 snapshot.judgments.extend(judgments);
12330
12331 let (confs, resps) =
12333 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12334 snapshot.confirmations.extend(confs);
12335 snapshot.confirmation_responses.extend(resps);
12336
12337 let team_pairs: Vec<(String, String)> = team_members
12339 .iter()
12340 .map(|id| {
12341 let name = self
12342 .master_data
12343 .employees
12344 .iter()
12345 .find(|e| e.employee_id == *id)
12346 .map(|e| e.display_name.clone())
12347 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12348 (id.clone(), name)
12349 })
12350 .collect();
12351 for wp in &workpapers {
12352 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12353 snapshot.procedure_steps.extend(steps);
12354 }
12355
12356 for wp in &workpapers {
12358 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12359 snapshot.samples.push(sample);
12360 }
12361 }
12362
12363 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12365 snapshot.analytical_results.extend(analytical);
12366
12367 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12369 snapshot.ia_functions.push(ia_func);
12370 snapshot.ia_reports.extend(ia_reports);
12371
12372 let vendor_names: Vec<String> = self
12374 .master_data
12375 .vendors
12376 .iter()
12377 .map(|v| v.name.clone())
12378 .collect();
12379 let customer_names: Vec<String> = self
12380 .master_data
12381 .customers
12382 .iter()
12383 .map(|c| c.name.clone())
12384 .collect();
12385 let (parties, rp_txns) =
12386 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12387 snapshot.related_parties.extend(parties);
12388 snapshot.related_party_transactions.extend(rp_txns);
12389
12390 snapshot.workpapers.extend(workpapers);
12392
12393 {
12395 let scope_id = format!(
12396 "SCOPE-{}-{}",
12397 engagement.engagement_id.simple(),
12398 &engagement.client_entity_id
12399 );
12400 let scope = datasynth_core::models::audit::AuditScope::new(
12401 scope_id.clone(),
12402 engagement.engagement_id.to_string(),
12403 engagement.client_entity_id.clone(),
12404 engagement.materiality,
12405 );
12406 let mut eng = engagement;
12408 eng.scope_id = Some(scope_id);
12409 snapshot.audit_scopes.push(scope);
12410 snapshot.engagements.push(eng);
12411 }
12412 }
12413 }
12414
12415 if self.config.companies.len() > 1 {
12419 let group_materiality = snapshot
12422 .engagements
12423 .first()
12424 .map(|e| e.materiality)
12425 .unwrap_or_else(|| {
12426 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12427 total_revenue * pct
12428 });
12429
12430 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12431 let group_engagement_id = snapshot
12432 .engagements
12433 .first()
12434 .map(|e| e.engagement_id.to_string())
12435 .unwrap_or_else(|| "GROUP-ENG".to_string());
12436
12437 let component_snapshot = component_gen.generate(
12438 &self.config.companies,
12439 group_materiality,
12440 &group_engagement_id,
12441 period_end,
12442 );
12443
12444 snapshot.component_auditors = component_snapshot.component_auditors;
12445 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12446 snapshot.component_instructions = component_snapshot.component_instructions;
12447 snapshot.component_reports = component_snapshot.component_reports;
12448
12449 info!(
12450 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12451 snapshot.component_auditors.len(),
12452 snapshot.component_instructions.len(),
12453 snapshot.component_reports.len(),
12454 );
12455 }
12456
12457 {
12461 let applicable_framework = self
12462 .config
12463 .accounting_standards
12464 .framework
12465 .as_ref()
12466 .map(|f| format!("{f:?}"))
12467 .unwrap_or_else(|| "IFRS".to_string());
12468
12469 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12470 let entity_count = self.config.companies.len();
12471
12472 for engagement in &snapshot.engagements {
12473 let company = self
12474 .config
12475 .companies
12476 .iter()
12477 .find(|c| c.code == engagement.client_entity_id);
12478 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12479 let letter_date = engagement.planning_start;
12480 let letter = letter_gen.generate(
12481 &engagement.engagement_id.to_string(),
12482 &engagement.client_name,
12483 entity_count,
12484 engagement.period_end_date,
12485 currency,
12486 &applicable_framework,
12487 letter_date,
12488 );
12489 snapshot.engagement_letters.push(letter);
12490 }
12491
12492 info!(
12493 "ISA 210 engagement letters: {} generated",
12494 snapshot.engagement_letters.len()
12495 );
12496 }
12497
12498 if self.phase_config.generate_legal_documents {
12502 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12503 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12504 for engagement in &snapshot.engagements {
12505 let employee_names: Vec<String> = self
12509 .master_data
12510 .employees
12511 .iter()
12512 .filter(|e| e.company_code == engagement.client_entity_id)
12513 .map(|e| e.display_name.clone())
12514 .collect();
12515 let names_to_use = if !employee_names.is_empty() {
12516 employee_names
12517 } else {
12518 self.master_data
12519 .employees
12520 .iter()
12521 .take(10)
12522 .map(|e| e.display_name.clone())
12523 .collect()
12524 };
12525 let docs = legal_gen.generate(
12526 &engagement.client_entity_id,
12527 engagement.fiscal_year as i32,
12528 &names_to_use,
12529 );
12530 snapshot.legal_documents.extend(docs);
12531 }
12532 info!(
12533 "v3.3.0 legal documents: {} emitted across {} engagements",
12534 snapshot.legal_documents.len(),
12535 snapshot.engagements.len()
12536 );
12537 }
12538
12539 if self.phase_config.generate_it_controls {
12549 use datasynth_generators::it_controls_generator::ItControlsGenerator;
12550 use std::collections::HashMap;
12551 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12552
12553 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12556 HashMap::new();
12557 for engagement in &snapshot.engagements {
12558 let entry = by_company
12559 .entry(engagement.client_entity_id.clone())
12560 .or_insert((engagement.planning_start, engagement.period_end_date));
12561 if engagement.planning_start < entry.0 {
12562 entry.0 = engagement.planning_start;
12563 }
12564 if engagement.period_end_date > entry.1 {
12565 entry.1 = engagement.period_end_date;
12566 }
12567 }
12568
12569 let systems: Vec<String> = vec![
12573 "SAP ECC",
12574 "SAP S/4 HANA",
12575 "Oracle EBS",
12576 "Workday",
12577 "NetSuite",
12578 "Active Directory",
12579 "SharePoint",
12580 "Salesforce",
12581 "ServiceNow",
12582 "Jira",
12583 "GitHub Enterprise",
12584 "AWS Console",
12585 "Okta",
12586 ]
12587 .into_iter()
12588 .map(String::from)
12589 .collect();
12590
12591 for (company_code, (start, end)) in by_company {
12592 let emps: Vec<(String, String)> = self
12593 .master_data
12594 .employees
12595 .iter()
12596 .filter(|e| e.company_code == company_code)
12597 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12598 .collect();
12599 if emps.is_empty() {
12600 continue;
12601 }
12602 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12605 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12606 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12607 snapshot.it_controls_access_logs.extend(access_logs);
12608 snapshot.it_controls_change_records.extend(change_records);
12609 }
12610
12611 info!(
12612 "v3.3.0 IT controls: {} access logs, {} change records",
12613 snapshot.it_controls_access_logs.len(),
12614 snapshot.it_controls_change_records.len()
12615 );
12616 }
12617
12618 {
12622 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12623 let entity_codes: Vec<String> = self
12624 .config
12625 .companies
12626 .iter()
12627 .map(|c| c.code.clone())
12628 .collect();
12629 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12630 info!(
12631 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12632 subsequent.len(),
12633 subsequent
12634 .iter()
12635 .filter(|e| matches!(
12636 e.classification,
12637 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12638 ))
12639 .count(),
12640 subsequent
12641 .iter()
12642 .filter(|e| matches!(
12643 e.classification,
12644 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12645 ))
12646 .count(),
12647 );
12648 snapshot.subsequent_events = subsequent;
12649 }
12650
12651 {
12655 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12656 let entity_codes: Vec<String> = self
12657 .config
12658 .companies
12659 .iter()
12660 .map(|c| c.code.clone())
12661 .collect();
12662 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12663 info!(
12664 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12665 soc_snapshot.service_organizations.len(),
12666 soc_snapshot.soc_reports.len(),
12667 soc_snapshot.user_entity_controls.len(),
12668 );
12669 snapshot.service_organizations = soc_snapshot.service_organizations;
12670 snapshot.soc_reports = soc_snapshot.soc_reports;
12671 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12672 }
12673
12674 {
12678 use datasynth_generators::audit::going_concern_generator::{
12679 GoingConcernGenerator, GoingConcernInput,
12680 };
12681 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12682 let entity_codes: Vec<String> = self
12683 .config
12684 .companies
12685 .iter()
12686 .map(|c| c.code.clone())
12687 .collect();
12688 let assessment_date = period_end + chrono::Duration::days(75);
12690 let period_label = format!("FY{}", period_end.year());
12691
12692 let gc_inputs: Vec<GoingConcernInput> = self
12703 .config
12704 .companies
12705 .iter()
12706 .map(|company| {
12707 let code = &company.code;
12708 let mut revenue = rust_decimal::Decimal::ZERO;
12709 let mut expenses = rust_decimal::Decimal::ZERO;
12710 let mut current_assets = rust_decimal::Decimal::ZERO;
12711 let mut current_liabs = rust_decimal::Decimal::ZERO;
12712 let mut total_debt = rust_decimal::Decimal::ZERO;
12713
12714 for je in entries.iter().filter(|je| &je.header.company_code == code) {
12715 for line in &je.lines {
12716 let acct = line.gl_account.as_str();
12717 let net = line.debit_amount - line.credit_amount;
12718 if acct.starts_with('4') {
12719 revenue -= net;
12721 } else if acct.starts_with('6') {
12722 expenses += net;
12724 }
12725 if acct.starts_with('1') {
12727 if let Ok(n) = acct.parse::<u32>() {
12729 if (1000..=1499).contains(&n) {
12730 current_assets += net;
12731 }
12732 }
12733 } else if acct.starts_with('2') {
12734 if let Ok(n) = acct.parse::<u32>() {
12735 if (2000..=2499).contains(&n) {
12736 current_liabs -= net; } else if (2500..=2999).contains(&n) {
12739 total_debt -= net;
12741 }
12742 }
12743 }
12744 }
12745 }
12746
12747 let net_income = revenue - expenses;
12748 let working_capital = current_assets - current_liabs;
12749 let operating_cash_flow = net_income;
12752
12753 GoingConcernInput {
12754 entity_code: code.clone(),
12755 net_income,
12756 working_capital,
12757 operating_cash_flow,
12758 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12759 assessment_date,
12760 }
12761 })
12762 .collect();
12763
12764 let assessments = if gc_inputs.is_empty() {
12765 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12766 } else {
12767 gc_gen.generate_for_entities_with_inputs(
12768 &entity_codes,
12769 &gc_inputs,
12770 assessment_date,
12771 &period_label,
12772 )
12773 };
12774 info!(
12775 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12776 assessments.len(),
12777 assessments.iter().filter(|a| matches!(
12778 a.auditor_conclusion,
12779 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12780 )).count(),
12781 assessments.iter().filter(|a| matches!(
12782 a.auditor_conclusion,
12783 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12784 )).count(),
12785 assessments.iter().filter(|a| matches!(
12786 a.auditor_conclusion,
12787 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12788 )).count(),
12789 );
12790 snapshot.going_concern_assessments = assessments;
12791 }
12792
12793 {
12797 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12798 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12799 let entity_codes: Vec<String> = self
12800 .config
12801 .companies
12802 .iter()
12803 .map(|c| c.code.clone())
12804 .collect();
12805 let estimates = est_gen.generate_for_entities(&entity_codes);
12806 info!(
12807 "ISA 540 accounting estimates: {} estimates across {} entities \
12808 ({} with retrospective reviews, {} with auditor point estimates)",
12809 estimates.len(),
12810 entity_codes.len(),
12811 estimates
12812 .iter()
12813 .filter(|e| e.retrospective_review.is_some())
12814 .count(),
12815 estimates
12816 .iter()
12817 .filter(|e| e.auditor_point_estimate.is_some())
12818 .count(),
12819 );
12820 snapshot.accounting_estimates = estimates;
12821 }
12822
12823 {
12827 use datasynth_generators::audit::audit_opinion_generator::{
12828 AuditOpinionGenerator, AuditOpinionInput,
12829 };
12830
12831 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12832
12833 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12835 .engagements
12836 .iter()
12837 .map(|eng| {
12838 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12840 .findings
12841 .iter()
12842 .filter(|f| f.engagement_id == eng.engagement_id)
12843 .cloned()
12844 .collect();
12845
12846 let gc = snapshot
12848 .going_concern_assessments
12849 .iter()
12850 .find(|g| g.entity_code == eng.client_entity_id)
12851 .cloned();
12852
12853 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12855 snapshot.component_reports.clone();
12856
12857 let auditor = self
12858 .master_data
12859 .employees
12860 .first()
12861 .map(|e| e.display_name.clone())
12862 .unwrap_or_else(|| "Global Audit LLP".into());
12863
12864 let partner = self
12865 .master_data
12866 .employees
12867 .get(1)
12868 .map(|e| e.display_name.clone())
12869 .unwrap_or_else(|| eng.engagement_partner_id.clone());
12870
12871 AuditOpinionInput {
12872 entity_code: eng.client_entity_id.clone(),
12873 entity_name: eng.client_name.clone(),
12874 engagement_id: eng.engagement_id,
12875 period_end: eng.period_end_date,
12876 findings: eng_findings,
12877 going_concern: gc,
12878 component_reports: comp_reports,
12879 is_us_listed: {
12881 let fw = &self.config.audit_standards.isa_compliance.framework;
12882 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12883 },
12884 auditor_name: auditor,
12885 engagement_partner: partner,
12886 }
12887 })
12888 .collect();
12889
12890 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12891
12892 for go in &generated_opinions {
12893 snapshot
12894 .key_audit_matters
12895 .extend(go.key_audit_matters.clone());
12896 }
12897 snapshot.audit_opinions = generated_opinions
12898 .into_iter()
12899 .map(|go| go.opinion)
12900 .collect();
12901
12902 info!(
12903 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12904 snapshot.audit_opinions.len(),
12905 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12906 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12907 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12908 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12909 );
12910 }
12911
12912 {
12916 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12917
12918 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12919
12920 for (i, company) in self.config.companies.iter().enumerate() {
12921 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12923 .engagements
12924 .iter()
12925 .filter(|e| e.client_entity_id == company.code)
12926 .map(|e| e.engagement_id)
12927 .collect();
12928
12929 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12930 .findings
12931 .iter()
12932 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12933 .cloned()
12934 .collect();
12935
12936 let emp_count = self.master_data.employees.len();
12938 let ceo_name = if emp_count > 0 {
12939 self.master_data.employees[i % emp_count]
12940 .display_name
12941 .clone()
12942 } else {
12943 format!("CEO of {}", company.name)
12944 };
12945 let cfo_name = if emp_count > 1 {
12946 self.master_data.employees[(i + 1) % emp_count]
12947 .display_name
12948 .clone()
12949 } else {
12950 format!("CFO of {}", company.name)
12951 };
12952
12953 let materiality = snapshot
12955 .engagements
12956 .iter()
12957 .find(|e| e.client_entity_id == company.code)
12958 .map(|e| e.materiality)
12959 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12960
12961 let input = SoxGeneratorInput {
12962 company_code: company.code.clone(),
12963 company_name: company.name.clone(),
12964 fiscal_year,
12965 period_end,
12966 findings: company_findings,
12967 ceo_name,
12968 cfo_name,
12969 materiality_threshold: materiality,
12970 revenue_percent: rust_decimal::Decimal::from(100),
12971 assets_percent: rust_decimal::Decimal::from(100),
12972 significant_accounts: vec![
12973 "Revenue".into(),
12974 "Accounts Receivable".into(),
12975 "Inventory".into(),
12976 "Fixed Assets".into(),
12977 "Accounts Payable".into(),
12978 ],
12979 };
12980
12981 let (certs, assessment) = sox_gen.generate(&input);
12982 snapshot.sox_302_certifications.extend(certs);
12983 snapshot.sox_404_assessments.push(assessment);
12984 }
12985
12986 info!(
12987 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12988 snapshot.sox_302_certifications.len(),
12989 snapshot.sox_404_assessments.len(),
12990 snapshot
12991 .sox_404_assessments
12992 .iter()
12993 .filter(|a| a.icfr_effective)
12994 .count(),
12995 snapshot
12996 .sox_404_assessments
12997 .iter()
12998 .filter(|a| !a.icfr_effective)
12999 .count(),
13000 );
13001 }
13002
13003 {
13007 use datasynth_generators::audit::materiality_generator::{
13008 MaterialityGenerator, MaterialityInput,
13009 };
13010
13011 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13012
13013 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13017
13018 for company in &self.config.companies {
13019 let company_code = company.code.clone();
13020
13021 let company_revenue: rust_decimal::Decimal = entries
13023 .iter()
13024 .filter(|e| e.company_code() == company_code)
13025 .flat_map(|e| e.lines.iter())
13026 .filter(|l| l.account_code.starts_with('4'))
13027 .map(|l| l.credit_amount)
13028 .sum();
13029
13030 let total_assets: rust_decimal::Decimal = entries
13032 .iter()
13033 .filter(|e| e.company_code() == company_code)
13034 .flat_map(|e| e.lines.iter())
13035 .filter(|l| l.account_code.starts_with('1'))
13036 .map(|l| l.debit_amount)
13037 .sum();
13038
13039 let total_expenses: rust_decimal::Decimal = entries
13041 .iter()
13042 .filter(|e| e.company_code() == company_code)
13043 .flat_map(|e| e.lines.iter())
13044 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13045 .map(|l| l.debit_amount)
13046 .sum();
13047
13048 let equity: rust_decimal::Decimal = entries
13050 .iter()
13051 .filter(|e| e.company_code() == company_code)
13052 .flat_map(|e| e.lines.iter())
13053 .filter(|l| l.account_code.starts_with('3'))
13054 .map(|l| l.credit_amount)
13055 .sum();
13056
13057 let pretax_income = company_revenue - total_expenses;
13058
13059 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13061 let w = rust_decimal::Decimal::try_from(company.volume_weight)
13062 .unwrap_or(rust_decimal::Decimal::ONE);
13063 (
13064 total_revenue * w,
13065 total_revenue * w * rust_decimal::Decimal::from(3),
13066 total_revenue * w * rust_decimal::Decimal::new(1, 1),
13067 total_revenue * w * rust_decimal::Decimal::from(2),
13068 )
13069 } else {
13070 (company_revenue, total_assets, pretax_income, equity)
13071 };
13072
13073 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
13076 entity_code: company_code,
13077 period: format!("FY{}", fiscal_year),
13078 revenue: rev,
13079 pretax_income: pti,
13080 total_assets: assets,
13081 equity: eq,
13082 gross_profit,
13083 });
13084 }
13085
13086 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13087
13088 info!(
13089 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13090 {} total assets, {} equity benchmarks)",
13091 snapshot.materiality_calculations.len(),
13092 snapshot
13093 .materiality_calculations
13094 .iter()
13095 .filter(|m| matches!(
13096 m.benchmark,
13097 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13098 ))
13099 .count(),
13100 snapshot
13101 .materiality_calculations
13102 .iter()
13103 .filter(|m| matches!(
13104 m.benchmark,
13105 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13106 ))
13107 .count(),
13108 snapshot
13109 .materiality_calculations
13110 .iter()
13111 .filter(|m| matches!(
13112 m.benchmark,
13113 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13114 ))
13115 .count(),
13116 snapshot
13117 .materiality_calculations
13118 .iter()
13119 .filter(|m| matches!(
13120 m.benchmark,
13121 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13122 ))
13123 .count(),
13124 );
13125 }
13126
13127 {
13131 use datasynth_generators::audit::cra_generator::CraGenerator;
13132
13133 let mut cra_gen = CraGenerator::new(self.seed + 8315);
13134
13135 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13137 .audit_scopes
13138 .iter()
13139 .map(|s| (s.entity_code.clone(), s.id.clone()))
13140 .collect();
13141
13142 for company in &self.config.companies {
13143 let cras = cra_gen.generate_for_entity(&company.code, None);
13144 let scope_id = entity_scope_map.get(&company.code).cloned();
13145 let cras_with_scope: Vec<_> = cras
13146 .into_iter()
13147 .map(|mut cra| {
13148 cra.scope_id = scope_id.clone();
13149 cra
13150 })
13151 .collect();
13152 snapshot.combined_risk_assessments.extend(cras_with_scope);
13153 }
13154
13155 let significant_count = snapshot
13156 .combined_risk_assessments
13157 .iter()
13158 .filter(|c| c.significant_risk)
13159 .count();
13160 let high_cra_count = snapshot
13161 .combined_risk_assessments
13162 .iter()
13163 .filter(|c| {
13164 matches!(
13165 c.combined_risk,
13166 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13167 )
13168 })
13169 .count();
13170
13171 info!(
13172 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13173 snapshot.combined_risk_assessments.len(),
13174 significant_count,
13175 high_cra_count,
13176 );
13177 }
13178
13179 {
13183 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13184
13185 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13186
13187 for company in &self.config.companies {
13189 let entity_code = company.code.clone();
13190
13191 let tolerable_error = snapshot
13193 .materiality_calculations
13194 .iter()
13195 .find(|m| m.entity_code == entity_code)
13196 .map(|m| m.tolerable_error);
13197
13198 let entity_cras: Vec<_> = snapshot
13200 .combined_risk_assessments
13201 .iter()
13202 .filter(|c| c.entity_code == entity_code)
13203 .cloned()
13204 .collect();
13205
13206 if !entity_cras.is_empty() {
13207 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13208 snapshot.sampling_plans.extend(plans);
13209 snapshot.sampled_items.extend(items);
13210 }
13211 }
13212
13213 let misstatement_count = snapshot
13214 .sampled_items
13215 .iter()
13216 .filter(|i| i.misstatement_found)
13217 .count();
13218
13219 info!(
13220 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13221 snapshot.sampling_plans.len(),
13222 snapshot.sampled_items.len(),
13223 misstatement_count,
13224 );
13225 }
13226
13227 {
13231 use datasynth_generators::audit::scots_generator::{
13232 ScotsGenerator, ScotsGeneratorConfig,
13233 };
13234
13235 let ic_enabled = self.config.intercompany.enabled;
13236
13237 let config = ScotsGeneratorConfig {
13238 intercompany_enabled: ic_enabled,
13239 ..ScotsGeneratorConfig::default()
13240 };
13241 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13242
13243 for company in &self.config.companies {
13244 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13245 snapshot
13246 .significant_transaction_classes
13247 .extend(entity_scots);
13248 }
13249
13250 let estimation_count = snapshot
13251 .significant_transaction_classes
13252 .iter()
13253 .filter(|s| {
13254 matches!(
13255 s.transaction_type,
13256 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13257 )
13258 })
13259 .count();
13260
13261 info!(
13262 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13263 snapshot.significant_transaction_classes.len(),
13264 estimation_count,
13265 );
13266 }
13267
13268 {
13272 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13273
13274 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13275 let entity_codes: Vec<String> = self
13276 .config
13277 .companies
13278 .iter()
13279 .map(|c| c.code.clone())
13280 .collect();
13281 let unusual_flags =
13282 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13283 info!(
13284 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13285 unusual_flags.len(),
13286 unusual_flags
13287 .iter()
13288 .filter(|f| matches!(
13289 f.severity,
13290 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13291 ))
13292 .count(),
13293 unusual_flags
13294 .iter()
13295 .filter(|f| matches!(
13296 f.severity,
13297 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13298 ))
13299 .count(),
13300 unusual_flags
13301 .iter()
13302 .filter(|f| matches!(
13303 f.severity,
13304 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13305 ))
13306 .count(),
13307 );
13308 snapshot.unusual_items = unusual_flags;
13309 }
13310
13311 {
13315 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13316
13317 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13318 let entity_codes: Vec<String> = self
13319 .config
13320 .companies
13321 .iter()
13322 .map(|c| c.code.clone())
13323 .collect();
13324 let current_period_label = format!("FY{fiscal_year}");
13325 let prior_period_label = format!("FY{}", fiscal_year - 1);
13326 let analytical_rels = ar_gen.generate_for_entities(
13327 &entity_codes,
13328 entries,
13329 ¤t_period_label,
13330 &prior_period_label,
13331 );
13332 let out_of_range = analytical_rels
13333 .iter()
13334 .filter(|r| !r.within_expected_range)
13335 .count();
13336 info!(
13337 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13338 analytical_rels.len(),
13339 out_of_range,
13340 );
13341 snapshot.analytical_relationships = analytical_rels;
13342 }
13343
13344 if let Some(pb) = pb {
13345 pb.finish_with_message(format!(
13346 "Audit data: {} engagements, {} workpapers, {} evidence, \
13347 {} confirmations, {} procedure steps, {} samples, \
13348 {} analytical, {} IA funcs, {} related parties, \
13349 {} component auditors, {} letters, {} subsequent events, \
13350 {} service orgs, {} going concern, {} accounting estimates, \
13351 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13352 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13353 {} unusual items, {} analytical relationships",
13354 snapshot.engagements.len(),
13355 snapshot.workpapers.len(),
13356 snapshot.evidence.len(),
13357 snapshot.confirmations.len(),
13358 snapshot.procedure_steps.len(),
13359 snapshot.samples.len(),
13360 snapshot.analytical_results.len(),
13361 snapshot.ia_functions.len(),
13362 snapshot.related_parties.len(),
13363 snapshot.component_auditors.len(),
13364 snapshot.engagement_letters.len(),
13365 snapshot.subsequent_events.len(),
13366 snapshot.service_organizations.len(),
13367 snapshot.going_concern_assessments.len(),
13368 snapshot.accounting_estimates.len(),
13369 snapshot.audit_opinions.len(),
13370 snapshot.key_audit_matters.len(),
13371 snapshot.sox_302_certifications.len(),
13372 snapshot.sox_404_assessments.len(),
13373 snapshot.materiality_calculations.len(),
13374 snapshot.combined_risk_assessments.len(),
13375 snapshot.sampling_plans.len(),
13376 snapshot.significant_transaction_classes.len(),
13377 snapshot.unusual_items.len(),
13378 snapshot.analytical_relationships.len(),
13379 ));
13380 }
13381
13382 {
13389 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13390 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13391 debug!(
13392 "PCAOB-ISA mappings generated: {} mappings",
13393 snapshot.isa_pcaob_mappings.len()
13394 );
13395 }
13396
13397 {
13404 use datasynth_standards::audit::isa_reference::IsaStandard;
13405 snapshot.isa_mappings = IsaStandard::standard_entries();
13406 debug!(
13407 "ISA standard entries generated: {} standards",
13408 snapshot.isa_mappings.len()
13409 );
13410 }
13411
13412 {
13415 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13416 .engagements
13417 .iter()
13418 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13419 .collect();
13420
13421 for rpt in &mut snapshot.related_party_transactions {
13422 if rpt.journal_entry_id.is_some() {
13423 continue; }
13425 let entity = engagement_by_id
13426 .get(&rpt.engagement_id.to_string())
13427 .copied()
13428 .unwrap_or("");
13429
13430 let best_je = entries
13432 .iter()
13433 .filter(|je| je.header.company_code == entity)
13434 .min_by_key(|je| {
13435 (je.header.posting_date - rpt.transaction_date)
13436 .num_days()
13437 .abs()
13438 });
13439
13440 if let Some(je) = best_je {
13441 rpt.journal_entry_id = Some(je.header.document_id.to_string());
13442 }
13443 }
13444
13445 let linked = snapshot
13446 .related_party_transactions
13447 .iter()
13448 .filter(|t| t.journal_entry_id.is_some())
13449 .count();
13450 debug!(
13451 "Linked {}/{} related party transactions to journal entries",
13452 linked,
13453 snapshot.related_party_transactions.len()
13454 );
13455 }
13456
13457 if !snapshot.engagements.is_empty() {
13463 use datasynth_generators::audit_opinion_generator::{
13464 AuditOpinionGenerator, AuditOpinionInput,
13465 };
13466
13467 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13468 let inputs: Vec<AuditOpinionInput> = snapshot
13469 .engagements
13470 .iter()
13471 .map(|eng| {
13472 let findings = snapshot
13473 .findings
13474 .iter()
13475 .filter(|f| f.engagement_id == eng.engagement_id)
13476 .cloned()
13477 .collect();
13478 let going_concern = snapshot
13479 .going_concern_assessments
13480 .iter()
13481 .find(|gc| gc.entity_code == eng.client_entity_id)
13482 .cloned();
13483 let component_reports = snapshot
13486 .component_reports
13487 .iter()
13488 .filter(|r| r.entity_code == eng.client_entity_id)
13489 .cloned()
13490 .collect();
13491
13492 AuditOpinionInput {
13493 entity_code: eng.client_entity_id.clone(),
13494 entity_name: eng.client_name.clone(),
13495 engagement_id: eng.engagement_id,
13496 period_end: eng.period_end_date,
13497 findings,
13498 going_concern,
13499 component_reports,
13500 is_us_listed: matches!(
13501 eng.engagement_type,
13502 datasynth_core::audit::EngagementType::IntegratedAudit
13503 | datasynth_core::audit::EngagementType::Sox404
13504 ),
13505 auditor_name: "DataSynth Audit LLP".to_string(),
13506 engagement_partner: "Engagement Partner".to_string(),
13507 }
13508 })
13509 .collect();
13510
13511 let generated = opinion_gen.generate_batch(&inputs);
13512 for g in generated {
13513 snapshot.key_audit_matters.extend(g.key_audit_matters);
13514 snapshot.audit_opinions.push(g.opinion);
13515 }
13516 debug!(
13517 "Generated {} audit opinions with {} key audit matters",
13518 snapshot.audit_opinions.len(),
13519 snapshot.key_audit_matters.len()
13520 );
13521 }
13522
13523 Ok(snapshot)
13524 }
13525
13526 fn generate_audit_data_with_fsm(
13533 &mut self,
13534 entries: &[JournalEntry],
13535 ) -> SynthResult<AuditSnapshot> {
13536 use datasynth_audit_fsm::{
13537 context::EngagementContext,
13538 engine::AuditFsmEngine,
13539 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13540 };
13541 use rand::SeedableRng;
13542 use rand_chacha::ChaCha8Rng;
13543
13544 info!("Audit FSM: generating audit data via FSM engine");
13545
13546 let fsm_config = self
13547 .config
13548 .audit
13549 .fsm
13550 .as_ref()
13551 .expect("FSM config must be present when FSM is enabled");
13552
13553 let bwp = match fsm_config.blueprint.as_str() {
13555 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13556 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13557 _ => {
13558 warn!(
13559 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13560 fsm_config.blueprint
13561 );
13562 BlueprintWithPreconditions::load_builtin_fsa()
13563 }
13564 }
13565 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13566
13567 let overlay = match fsm_config.overlay.as_str() {
13569 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13570 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13571 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13572 _ => {
13573 warn!(
13574 "Unknown FSM overlay '{}', falling back to builtin:default",
13575 fsm_config.overlay
13576 );
13577 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13578 }
13579 }
13580 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13581
13582 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13584 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13585 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13586
13587 let company = self.config.companies.first();
13589 let company_code = company
13590 .map(|c| c.code.clone())
13591 .unwrap_or_else(|| "UNKNOWN".to_string());
13592 let company_name = company
13593 .map(|c| c.name.clone())
13594 .unwrap_or_else(|| "Unknown Company".to_string());
13595 let currency = company
13596 .map(|c| c.currency.clone())
13597 .unwrap_or_else(|| "USD".to_string());
13598
13599 let entity_entries: Vec<_> = entries
13601 .iter()
13602 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13603 .cloned()
13604 .collect();
13605 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
13609 .iter()
13610 .flat_map(|e| e.lines.iter())
13611 .filter(|l| l.account_code.starts_with('4'))
13612 .map(|l| l.credit_amount - l.debit_amount)
13613 .sum();
13614
13615 let total_assets: rust_decimal::Decimal = entries
13616 .iter()
13617 .flat_map(|e| e.lines.iter())
13618 .filter(|l| l.account_code.starts_with('1'))
13619 .map(|l| l.debit_amount - l.credit_amount)
13620 .sum();
13621
13622 let total_expenses: rust_decimal::Decimal = entries
13623 .iter()
13624 .flat_map(|e| e.lines.iter())
13625 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13626 .map(|l| l.debit_amount)
13627 .sum();
13628
13629 let equity: rust_decimal::Decimal = entries
13630 .iter()
13631 .flat_map(|e| e.lines.iter())
13632 .filter(|l| l.account_code.starts_with('3'))
13633 .map(|l| l.credit_amount - l.debit_amount)
13634 .sum();
13635
13636 let total_debt: rust_decimal::Decimal = entries
13637 .iter()
13638 .flat_map(|e| e.lines.iter())
13639 .filter(|l| l.account_code.starts_with('2'))
13640 .map(|l| l.credit_amount - l.debit_amount)
13641 .sum();
13642
13643 let pretax_income = total_revenue - total_expenses;
13644
13645 let cogs: rust_decimal::Decimal = entries
13646 .iter()
13647 .flat_map(|e| e.lines.iter())
13648 .filter(|l| l.account_code.starts_with('5'))
13649 .map(|l| l.debit_amount)
13650 .sum();
13651 let gross_profit = total_revenue - cogs;
13652
13653 let current_assets: rust_decimal::Decimal = entries
13654 .iter()
13655 .flat_map(|e| e.lines.iter())
13656 .filter(|l| {
13657 l.account_code.starts_with("10")
13658 || l.account_code.starts_with("11")
13659 || l.account_code.starts_with("12")
13660 || l.account_code.starts_with("13")
13661 })
13662 .map(|l| l.debit_amount - l.credit_amount)
13663 .sum();
13664 let current_liabilities: rust_decimal::Decimal = entries
13665 .iter()
13666 .flat_map(|e| e.lines.iter())
13667 .filter(|l| {
13668 l.account_code.starts_with("20")
13669 || l.account_code.starts_with("21")
13670 || l.account_code.starts_with("22")
13671 })
13672 .map(|l| l.credit_amount - l.debit_amount)
13673 .sum();
13674 let working_capital = current_assets - current_liabilities;
13675
13676 let depreciation: rust_decimal::Decimal = entries
13677 .iter()
13678 .flat_map(|e| e.lines.iter())
13679 .filter(|l| l.account_code.starts_with("60"))
13680 .map(|l| l.debit_amount)
13681 .sum();
13682 let operating_cash_flow = pretax_income + depreciation;
13683
13684 let accounts: Vec<String> = self
13686 .coa
13687 .as_ref()
13688 .map(|coa| {
13689 coa.get_postable_accounts()
13690 .iter()
13691 .map(|acc| acc.account_code().to_string())
13692 .collect()
13693 })
13694 .unwrap_or_default();
13695
13696 let team_member_ids: Vec<String> = self
13698 .master_data
13699 .employees
13700 .iter()
13701 .take(8) .map(|e| e.employee_id.clone())
13703 .collect();
13704 let team_member_pairs: Vec<(String, String)> = self
13705 .master_data
13706 .employees
13707 .iter()
13708 .take(8)
13709 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13710 .collect();
13711
13712 let vendor_names: Vec<String> = self
13713 .master_data
13714 .vendors
13715 .iter()
13716 .map(|v| v.name.clone())
13717 .collect();
13718 let customer_names: Vec<String> = self
13719 .master_data
13720 .customers
13721 .iter()
13722 .map(|c| c.name.clone())
13723 .collect();
13724
13725 let entity_codes: Vec<String> = self
13726 .config
13727 .companies
13728 .iter()
13729 .map(|c| c.code.clone())
13730 .collect();
13731
13732 let journal_entry_ids: Vec<String> = entries
13734 .iter()
13735 .take(50)
13736 .map(|e| e.header.document_id.to_string())
13737 .collect();
13738
13739 let mut account_balances = std::collections::HashMap::<String, f64>::new();
13741 for entry in entries {
13742 for line in &entry.lines {
13743 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13744 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13745 *account_balances
13746 .entry(line.account_code.clone())
13747 .or_insert(0.0) += debit_f64 - credit_f64;
13748 }
13749 }
13750
13751 let control_ids: Vec<String> = Vec::new();
13756 let anomaly_refs: Vec<String> = Vec::new();
13757
13758 let mut context = EngagementContext {
13759 company_code,
13760 company_name,
13761 fiscal_year: start_date.year(),
13762 currency,
13763 total_revenue,
13764 total_assets,
13765 engagement_start: start_date,
13766 report_date: period_end,
13767 pretax_income,
13768 equity,
13769 gross_profit,
13770 working_capital,
13771 operating_cash_flow,
13772 total_debt,
13773 team_member_ids,
13774 team_member_pairs,
13775 accounts,
13776 vendor_names,
13777 customer_names,
13778 journal_entry_ids,
13779 account_balances,
13780 control_ids,
13781 anomaly_refs,
13782 journal_entries: entries.to_vec(),
13783 is_us_listed: false,
13784 entity_codes,
13785 auditor_firm_name: "DataSynth Audit LLP".into(),
13786 accounting_framework: self
13787 .config
13788 .accounting_standards
13789 .framework
13790 .map(|f| match f {
13791 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13792 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13793 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13794 "French GAAP"
13795 }
13796 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13797 "German GAAP"
13798 }
13799 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13800 "Dual Reporting"
13801 }
13802 })
13803 .unwrap_or("IFRS")
13804 .into(),
13805 };
13806
13807 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13809 let rng = ChaCha8Rng::seed_from_u64(seed);
13810 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13811
13812 let mut result = engine
13813 .run_engagement(&context)
13814 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13815
13816 info!(
13817 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13818 {} phases completed, duration {:.1}h",
13819 result.event_log.len(),
13820 result.artifacts.total_artifacts(),
13821 result.anomalies.len(),
13822 result.phases_completed.len(),
13823 result.total_duration_hours,
13824 );
13825
13826 let tb_entity = context.company_code.clone();
13828 let tb_fy = context.fiscal_year;
13829 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13830 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13831 entries,
13832 &tb_entity,
13833 tb_fy,
13834 self.coa.as_ref().map(|c| c.as_ref()),
13835 );
13836
13837 let bag = result.artifacts;
13839 let mut snapshot = AuditSnapshot {
13840 engagements: bag.engagements,
13841 engagement_letters: bag.engagement_letters,
13842 materiality_calculations: bag.materiality_calculations,
13843 risk_assessments: bag.risk_assessments,
13844 combined_risk_assessments: bag.combined_risk_assessments,
13845 workpapers: bag.workpapers,
13846 evidence: bag.evidence,
13847 findings: bag.findings,
13848 judgments: bag.judgments,
13849 sampling_plans: bag.sampling_plans,
13850 sampled_items: bag.sampled_items,
13851 analytical_results: bag.analytical_results,
13852 going_concern_assessments: bag.going_concern_assessments,
13853 subsequent_events: bag.subsequent_events,
13854 audit_opinions: bag.audit_opinions,
13855 key_audit_matters: bag.key_audit_matters,
13856 procedure_steps: bag.procedure_steps,
13857 samples: bag.samples,
13858 confirmations: bag.confirmations,
13859 confirmation_responses: bag.confirmation_responses,
13860 fsm_event_trail: Some(result.event_log),
13862 ..Default::default()
13864 };
13865
13866 {
13868 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13869 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13870 }
13871 {
13872 use datasynth_standards::audit::isa_reference::IsaStandard;
13873 snapshot.isa_mappings = IsaStandard::standard_entries();
13874 }
13875
13876 info!(
13877 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13878 {} risk assessments, {} findings, {} materiality calcs",
13879 snapshot.engagements.len(),
13880 snapshot.workpapers.len(),
13881 snapshot.evidence.len(),
13882 snapshot.risk_assessments.len(),
13883 snapshot.findings.len(),
13884 snapshot.materiality_calculations.len(),
13885 );
13886
13887 Ok(snapshot)
13888 }
13889
13890 fn export_graphs(
13897 &mut self,
13898 entries: &[JournalEntry],
13899 _coa: &Arc<ChartOfAccounts>,
13900 stats: &mut EnhancedGenerationStatistics,
13901 ) -> SynthResult<GraphExportSnapshot> {
13902 let pb = self.create_progress_bar(100, "Exporting Graphs");
13903
13904 let mut snapshot = GraphExportSnapshot::default();
13905
13906 let output_dir = self
13908 .output_path
13909 .clone()
13910 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13911 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13912
13913 for graph_type in &self.config.graph_export.graph_types {
13915 if let Some(pb) = &pb {
13916 pb.inc(10);
13917 }
13918
13919 let graph_config = TransactionGraphConfig {
13921 include_vendors: false,
13922 include_customers: false,
13923 create_debit_credit_edges: true,
13924 include_document_nodes: graph_type.include_document_nodes,
13925 min_edge_weight: graph_type.min_edge_weight,
13926 aggregate_parallel_edges: graph_type.aggregate_edges,
13927 framework: None,
13928 };
13929
13930 let mut builder = TransactionGraphBuilder::new(graph_config);
13931 builder.add_journal_entries(entries);
13932 let graph = builder.build();
13933
13934 stats.graph_node_count += graph.node_count();
13936 stats.graph_edge_count += graph.edge_count();
13937
13938 if let Some(pb) = &pb {
13939 pb.inc(40);
13940 }
13941
13942 for format in &self.config.graph_export.formats {
13944 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13945
13946 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13948 warn!("Failed to create graph output directory: {}", e);
13949 continue;
13950 }
13951
13952 match format {
13953 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13954 let pyg_config = PyGExportConfig {
13955 common: datasynth_graph::CommonExportConfig {
13956 export_node_features: true,
13957 export_edge_features: true,
13958 export_node_labels: true,
13959 export_edge_labels: true,
13960 export_masks: true,
13961 train_ratio: self.config.graph_export.train_ratio,
13962 val_ratio: self.config.graph_export.validation_ratio,
13963 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13964 },
13965 one_hot_categoricals: false,
13966 };
13967
13968 let exporter = PyGExporter::new(pyg_config);
13969 match exporter.export(&graph, &format_dir) {
13970 Ok(metadata) => {
13971 snapshot.exports.insert(
13972 format!("{}_{}", graph_type.name, "pytorch_geometric"),
13973 GraphExportInfo {
13974 name: graph_type.name.clone(),
13975 format: "pytorch_geometric".to_string(),
13976 output_path: format_dir.clone(),
13977 node_count: metadata.num_nodes,
13978 edge_count: metadata.num_edges,
13979 },
13980 );
13981 snapshot.graph_count += 1;
13982 }
13983 Err(e) => {
13984 warn!("Failed to export PyTorch Geometric graph: {}", e);
13985 }
13986 }
13987 }
13988 datasynth_config::schema::GraphExportFormat::Neo4j => {
13989 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13990
13991 let neo4j_config = Neo4jExportConfig {
13992 export_node_properties: true,
13993 export_edge_properties: true,
13994 export_features: true,
13995 generate_cypher: true,
13996 generate_admin_import: true,
13997 database_name: "synth".to_string(),
13998 cypher_batch_size: 1000,
13999 };
14000
14001 let exporter = Neo4jExporter::new(neo4j_config);
14002 match exporter.export(&graph, &format_dir) {
14003 Ok(metadata) => {
14004 snapshot.exports.insert(
14005 format!("{}_{}", graph_type.name, "neo4j"),
14006 GraphExportInfo {
14007 name: graph_type.name.clone(),
14008 format: "neo4j".to_string(),
14009 output_path: format_dir.clone(),
14010 node_count: metadata.num_nodes,
14011 edge_count: metadata.num_edges,
14012 },
14013 );
14014 snapshot.graph_count += 1;
14015 }
14016 Err(e) => {
14017 warn!("Failed to export Neo4j graph: {}", e);
14018 }
14019 }
14020 }
14021 datasynth_config::schema::GraphExportFormat::Dgl => {
14022 use datasynth_graph::{DGLExportConfig, DGLExporter};
14023
14024 let dgl_config = DGLExportConfig {
14025 common: datasynth_graph::CommonExportConfig {
14026 export_node_features: true,
14027 export_edge_features: true,
14028 export_node_labels: true,
14029 export_edge_labels: true,
14030 export_masks: true,
14031 train_ratio: self.config.graph_export.train_ratio,
14032 val_ratio: self.config.graph_export.validation_ratio,
14033 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14034 },
14035 heterogeneous: self.config.graph_export.dgl.heterogeneous,
14036 include_pickle_script: true, };
14038
14039 let exporter = DGLExporter::new(dgl_config);
14040 match exporter.export(&graph, &format_dir) {
14041 Ok(metadata) => {
14042 snapshot.exports.insert(
14043 format!("{}_{}", graph_type.name, "dgl"),
14044 GraphExportInfo {
14045 name: graph_type.name.clone(),
14046 format: "dgl".to_string(),
14047 output_path: format_dir.clone(),
14048 node_count: metadata.common.num_nodes,
14049 edge_count: metadata.common.num_edges,
14050 },
14051 );
14052 snapshot.graph_count += 1;
14053 }
14054 Err(e) => {
14055 warn!("Failed to export DGL graph: {}", e);
14056 }
14057 }
14058 }
14059 datasynth_config::schema::GraphExportFormat::RustGraph => {
14060 use datasynth_graph::{
14061 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14062 };
14063
14064 let rustgraph_config = RustGraphExportConfig {
14065 include_features: true,
14066 include_temporal: true,
14067 include_labels: true,
14068 source_name: "datasynth".to_string(),
14069 batch_id: None,
14070 output_format: RustGraphOutputFormat::JsonLines,
14071 export_node_properties: true,
14072 export_edge_properties: true,
14073 pretty_print: false,
14074 };
14075
14076 let exporter = RustGraphExporter::new(rustgraph_config);
14077 match exporter.export(&graph, &format_dir) {
14078 Ok(metadata) => {
14079 snapshot.exports.insert(
14080 format!("{}_{}", graph_type.name, "rustgraph"),
14081 GraphExportInfo {
14082 name: graph_type.name.clone(),
14083 format: "rustgraph".to_string(),
14084 output_path: format_dir.clone(),
14085 node_count: metadata.num_nodes,
14086 edge_count: metadata.num_edges,
14087 },
14088 );
14089 snapshot.graph_count += 1;
14090 }
14091 Err(e) => {
14092 warn!("Failed to export RustGraph: {}", e);
14093 }
14094 }
14095 }
14096 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14097 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14099 }
14100 }
14101 }
14102
14103 if let Some(pb) = &pb {
14104 pb.inc(40);
14105 }
14106 }
14107
14108 stats.graph_export_count = snapshot.graph_count;
14109 snapshot.exported = snapshot.graph_count > 0;
14110
14111 if let Some(pb) = pb {
14112 pb.finish_with_message(format!(
14113 "Graphs exported: {} graphs ({} nodes, {} edges)",
14114 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14115 ));
14116 }
14117
14118 Ok(snapshot)
14119 }
14120
14121 fn build_additional_graphs(
14126 &self,
14127 banking: &BankingSnapshot,
14128 intercompany: &IntercompanySnapshot,
14129 entries: &[JournalEntry],
14130 stats: &mut EnhancedGenerationStatistics,
14131 ) {
14132 let output_dir = self
14133 .output_path
14134 .clone()
14135 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14136 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14137
14138 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14140 info!("Phase 10c: Building banking network graph");
14141 let config = BankingGraphConfig::default();
14142 let mut builder = BankingGraphBuilder::new(config);
14143 builder.add_customers(&banking.customers);
14144 builder.add_accounts(&banking.accounts, &banking.customers);
14145 builder.add_transactions(&banking.transactions);
14146 let graph = builder.build();
14147
14148 let node_count = graph.node_count();
14149 let edge_count = graph.edge_count();
14150 stats.graph_node_count += node_count;
14151 stats.graph_edge_count += edge_count;
14152
14153 for format in &self.config.graph_export.formats {
14155 if matches!(
14156 format,
14157 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14158 ) {
14159 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14160 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14161 warn!("Failed to create banking graph output dir: {}", e);
14162 continue;
14163 }
14164 let pyg_config = PyGExportConfig::default();
14165 let exporter = PyGExporter::new(pyg_config);
14166 if let Err(e) = exporter.export(&graph, &format_dir) {
14167 warn!("Failed to export banking graph as PyG: {}", e);
14168 } else {
14169 info!(
14170 "Banking network graph exported: {} nodes, {} edges",
14171 node_count, edge_count
14172 );
14173 }
14174 }
14175 }
14176 }
14177
14178 let approval_entries: Vec<_> = entries
14180 .iter()
14181 .filter(|je| je.header.approval_workflow.is_some())
14182 .collect();
14183
14184 if !approval_entries.is_empty() {
14185 info!(
14186 "Phase 10c: Building approval network graph ({} entries with approvals)",
14187 approval_entries.len()
14188 );
14189 let config = ApprovalGraphConfig::default();
14190 let mut builder = ApprovalGraphBuilder::new(config);
14191
14192 for je in &approval_entries {
14193 if let Some(ref wf) = je.header.approval_workflow {
14194 for action in &wf.actions {
14195 let record = datasynth_core::models::ApprovalRecord {
14196 approval_id: format!(
14197 "APR-{}-{}",
14198 je.header.document_id, action.approval_level
14199 ),
14200 document_number: je.header.document_id.to_string(),
14201 document_type: "JE".to_string(),
14202 company_code: je.company_code().to_string(),
14203 requester_id: wf.preparer_id.clone(),
14204 requester_name: Some(wf.preparer_name.clone()),
14205 approver_id: action.actor_id.clone(),
14206 approver_name: action.actor_name.clone(),
14207 approval_date: je.posting_date(),
14208 action: format!("{:?}", action.action),
14209 amount: wf.amount,
14210 approval_limit: None,
14211 comments: action.comments.clone(),
14212 delegation_from: None,
14213 is_auto_approved: false,
14214 };
14215 builder.add_approval(&record);
14216 }
14217 }
14218 }
14219
14220 let graph = builder.build();
14221 let node_count = graph.node_count();
14222 let edge_count = graph.edge_count();
14223 stats.graph_node_count += node_count;
14224 stats.graph_edge_count += edge_count;
14225
14226 for format in &self.config.graph_export.formats {
14228 if matches!(
14229 format,
14230 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14231 ) {
14232 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14233 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14234 warn!("Failed to create approval graph output dir: {}", e);
14235 continue;
14236 }
14237 let pyg_config = PyGExportConfig::default();
14238 let exporter = PyGExporter::new(pyg_config);
14239 if let Err(e) = exporter.export(&graph, &format_dir) {
14240 warn!("Failed to export approval graph as PyG: {}", e);
14241 } else {
14242 info!(
14243 "Approval network graph exported: {} nodes, {} edges",
14244 node_count, edge_count
14245 );
14246 }
14247 }
14248 }
14249 }
14250
14251 if self.config.companies.len() >= 2 {
14253 info!(
14254 "Phase 10c: Building entity relationship graph ({} companies)",
14255 self.config.companies.len()
14256 );
14257
14258 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14259 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14260
14261 let parent_code = &self.config.companies[0].code;
14263 let mut companies: Vec<datasynth_core::models::Company> =
14264 Vec::with_capacity(self.config.companies.len());
14265
14266 let first = &self.config.companies[0];
14268 companies.push(datasynth_core::models::Company::parent(
14269 &first.code,
14270 &first.name,
14271 &first.country,
14272 &first.currency,
14273 ));
14274
14275 for cc in self.config.companies.iter().skip(1) {
14277 companies.push(datasynth_core::models::Company::subsidiary(
14278 &cc.code,
14279 &cc.name,
14280 &cc.country,
14281 &cc.currency,
14282 parent_code,
14283 rust_decimal::Decimal::from(100),
14284 ));
14285 }
14286
14287 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14289 self.config
14290 .companies
14291 .iter()
14292 .skip(1)
14293 .enumerate()
14294 .map(|(i, cc)| {
14295 let mut rel =
14296 datasynth_core::models::intercompany::IntercompanyRelationship::new(
14297 format!("REL{:03}", i + 1),
14298 parent_code.clone(),
14299 cc.code.clone(),
14300 rust_decimal::Decimal::from(100),
14301 start_date,
14302 );
14303 rel.functional_currency = cc.currency.clone();
14304 rel
14305 })
14306 .collect();
14307
14308 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14309 builder.add_companies(&companies);
14310 builder.add_ownership_relationships(&relationships);
14311
14312 for pair in &intercompany.matched_pairs {
14314 builder.add_intercompany_edge(
14315 &pair.seller_company,
14316 &pair.buyer_company,
14317 pair.amount,
14318 &format!("{:?}", pair.transaction_type),
14319 );
14320 }
14321
14322 let graph = builder.build();
14323 let node_count = graph.node_count();
14324 let edge_count = graph.edge_count();
14325 stats.graph_node_count += node_count;
14326 stats.graph_edge_count += edge_count;
14327
14328 for format in &self.config.graph_export.formats {
14330 if matches!(
14331 format,
14332 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14333 ) {
14334 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14335 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14336 warn!("Failed to create entity graph output dir: {}", e);
14337 continue;
14338 }
14339 let pyg_config = PyGExportConfig::default();
14340 let exporter = PyGExporter::new(pyg_config);
14341 if let Err(e) = exporter.export(&graph, &format_dir) {
14342 warn!("Failed to export entity graph as PyG: {}", e);
14343 } else {
14344 info!(
14345 "Entity relationship graph exported: {} nodes, {} edges",
14346 node_count, edge_count
14347 );
14348 }
14349 }
14350 }
14351 } else {
14352 debug!(
14353 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14354 self.config.companies.len()
14355 );
14356 }
14357 }
14358
14359 #[allow(clippy::too_many_arguments)]
14366 fn export_hypergraph(
14367 &self,
14368 coa: &Arc<ChartOfAccounts>,
14369 entries: &[JournalEntry],
14370 document_flows: &DocumentFlowSnapshot,
14371 sourcing: &SourcingSnapshot,
14372 hr: &HrSnapshot,
14373 manufacturing: &ManufacturingSnapshot,
14374 banking: &BankingSnapshot,
14375 audit: &AuditSnapshot,
14376 financial_reporting: &FinancialReportingSnapshot,
14377 ocpm: &OcpmSnapshot,
14378 compliance: &ComplianceRegulationsSnapshot,
14379 stats: &mut EnhancedGenerationStatistics,
14380 ) -> SynthResult<HypergraphExportInfo> {
14381 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14382 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14383 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14384 use datasynth_graph::models::hypergraph::AggregationStrategy;
14385
14386 let hg_settings = &self.config.graph_export.hypergraph;
14387
14388 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14390 "truncate" => AggregationStrategy::Truncate,
14391 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14392 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14393 "importance_sample" => AggregationStrategy::ImportanceSample,
14394 _ => AggregationStrategy::PoolByCounterparty,
14395 };
14396
14397 let builder_config = HypergraphConfig {
14398 max_nodes: hg_settings.max_nodes,
14399 aggregation_strategy,
14400 include_coso: hg_settings.governance_layer.include_coso,
14401 include_controls: hg_settings.governance_layer.include_controls,
14402 include_sox: hg_settings.governance_layer.include_sox,
14403 include_vendors: hg_settings.governance_layer.include_vendors,
14404 include_customers: hg_settings.governance_layer.include_customers,
14405 include_employees: hg_settings.governance_layer.include_employees,
14406 include_p2p: hg_settings.process_layer.include_p2p,
14407 include_o2c: hg_settings.process_layer.include_o2c,
14408 include_s2c: hg_settings.process_layer.include_s2c,
14409 include_h2r: hg_settings.process_layer.include_h2r,
14410 include_mfg: hg_settings.process_layer.include_mfg,
14411 include_bank: hg_settings.process_layer.include_bank,
14412 include_audit: hg_settings.process_layer.include_audit,
14413 include_r2r: hg_settings.process_layer.include_r2r,
14414 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14415 docs_per_counterparty_threshold: hg_settings
14416 .process_layer
14417 .docs_per_counterparty_threshold,
14418 include_accounts: hg_settings.accounting_layer.include_accounts,
14419 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14420 include_cross_layer_edges: hg_settings.cross_layer.enabled,
14421 include_compliance: self.config.compliance_regulations.enabled,
14422 include_tax: true,
14423 include_treasury: true,
14424 include_esg: true,
14425 include_project: true,
14426 include_intercompany: true,
14427 include_temporal_events: true,
14428 };
14429
14430 let mut builder = HypergraphBuilder::new(builder_config);
14431
14432 builder.add_coso_framework();
14434
14435 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14438 let controls = InternalControl::standard_controls();
14439 builder.add_controls(&controls);
14440 }
14441
14442 builder.add_vendors(&self.master_data.vendors);
14444 builder.add_customers(&self.master_data.customers);
14445 builder.add_employees(&self.master_data.employees);
14446
14447 builder.add_p2p_documents(
14449 &document_flows.purchase_orders,
14450 &document_flows.goods_receipts,
14451 &document_flows.vendor_invoices,
14452 &document_flows.payments,
14453 );
14454 builder.add_o2c_documents(
14455 &document_flows.sales_orders,
14456 &document_flows.deliveries,
14457 &document_flows.customer_invoices,
14458 );
14459 builder.add_s2c_documents(
14460 &sourcing.sourcing_projects,
14461 &sourcing.qualifications,
14462 &sourcing.rfx_events,
14463 &sourcing.bids,
14464 &sourcing.bid_evaluations,
14465 &sourcing.contracts,
14466 );
14467 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14468 builder.add_mfg_documents(
14469 &manufacturing.production_orders,
14470 &manufacturing.quality_inspections,
14471 &manufacturing.cycle_counts,
14472 );
14473 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14474 builder.add_audit_documents(
14475 &audit.engagements,
14476 &audit.workpapers,
14477 &audit.findings,
14478 &audit.evidence,
14479 &audit.risk_assessments,
14480 &audit.judgments,
14481 &audit.materiality_calculations,
14482 &audit.audit_opinions,
14483 &audit.going_concern_assessments,
14484 );
14485 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14486
14487 if let Some(ref event_log) = ocpm.event_log {
14489 builder.add_ocpm_events(event_log);
14490 }
14491
14492 if self.config.compliance_regulations.enabled
14494 && hg_settings.governance_layer.include_controls
14495 {
14496 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14498 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14499 .standard_records
14500 .iter()
14501 .filter_map(|r| {
14502 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14503 registry.get(&sid).cloned()
14504 })
14505 .collect();
14506
14507 builder.add_compliance_regulations(
14508 &standards,
14509 &compliance.findings,
14510 &compliance.filings,
14511 );
14512 }
14513
14514 builder.add_accounts(coa);
14516 builder.add_journal_entries_as_hyperedges(entries);
14517
14518 let hypergraph = builder.build();
14520
14521 let output_dir = self
14523 .output_path
14524 .clone()
14525 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14526 let hg_dir = output_dir
14527 .join(&self.config.graph_export.output_subdirectory)
14528 .join(&hg_settings.output_subdirectory);
14529
14530 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14532 "unified" => {
14533 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14534 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14535 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14536 })?;
14537 (
14538 metadata.num_nodes,
14539 metadata.num_edges,
14540 metadata.num_hyperedges,
14541 )
14542 }
14543 _ => {
14544 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14546 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14547 SynthError::generation(format!("Hypergraph export failed: {e}"))
14548 })?;
14549 (
14550 metadata.num_nodes,
14551 metadata.num_edges,
14552 metadata.num_hyperedges,
14553 )
14554 }
14555 };
14556
14557 #[cfg(feature = "streaming")]
14559 if let Some(ref target_url) = hg_settings.stream_target {
14560 use crate::stream_client::{StreamClient, StreamConfig};
14561 use std::io::Write as _;
14562
14563 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14564 let stream_config = StreamConfig {
14565 target_url: target_url.clone(),
14566 batch_size: hg_settings.stream_batch_size,
14567 api_key,
14568 ..StreamConfig::default()
14569 };
14570
14571 match StreamClient::new(stream_config) {
14572 Ok(mut client) => {
14573 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14574 match exporter.export_to_writer(&hypergraph, &mut client) {
14575 Ok(_) => {
14576 if let Err(e) = client.flush() {
14577 warn!("Failed to flush stream client: {}", e);
14578 } else {
14579 info!("Streamed {} records to {}", client.total_sent(), target_url);
14580 }
14581 }
14582 Err(e) => {
14583 warn!("Streaming export failed: {}", e);
14584 }
14585 }
14586 }
14587 Err(e) => {
14588 warn!("Failed to create stream client: {}", e);
14589 }
14590 }
14591 }
14592
14593 stats.graph_node_count += num_nodes;
14595 stats.graph_edge_count += num_edges;
14596 stats.graph_export_count += 1;
14597
14598 Ok(HypergraphExportInfo {
14599 node_count: num_nodes,
14600 edge_count: num_edges,
14601 hyperedge_count: num_hyperedges,
14602 output_path: hg_dir,
14603 })
14604 }
14605
14606 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14611 let pb = self.create_progress_bar(100, "Generating Banking Data");
14612
14613 let orchestrator = BankingOrchestratorBuilder::new()
14615 .config(self.config.banking.clone())
14616 .seed(self.seed + 9000)
14617 .country_pack(self.primary_pack().clone())
14618 .build();
14619
14620 if let Some(pb) = &pb {
14621 pb.inc(10);
14622 }
14623
14624 let result = orchestrator.generate();
14626
14627 if let Some(pb) = &pb {
14628 pb.inc(90);
14629 pb.finish_with_message(format!(
14630 "Banking: {} customers, {} transactions",
14631 result.customers.len(),
14632 result.transactions.len()
14633 ));
14634 }
14635
14636 let mut banking_customers = result.customers;
14641 let core_customers = &self.master_data.customers;
14642 if !core_customers.is_empty() {
14643 for (i, bc) in banking_customers.iter_mut().enumerate() {
14644 let core = &core_customers[i % core_customers.len()];
14645 bc.name = CustomerName::business(&core.name);
14646 bc.residence_country = core.country.clone();
14647 bc.enterprise_customer_id = Some(core.customer_id.clone());
14648 }
14649 debug!(
14650 "Cross-referenced {} banking customers with {} core customers",
14651 banking_customers.len(),
14652 core_customers.len()
14653 );
14654 }
14655
14656 Ok(BankingSnapshot {
14657 customers: banking_customers,
14658 accounts: result.accounts,
14659 transactions: result.transactions,
14660 transaction_labels: result.transaction_labels,
14661 customer_labels: result.customer_labels,
14662 account_labels: result.account_labels,
14663 relationship_labels: result.relationship_labels,
14664 narratives: result.narratives,
14665 suspicious_count: result.stats.suspicious_count,
14666 scenario_count: result.scenarios.len(),
14667 })
14668 }
14669
14670 fn calculate_total_transactions(&self) -> u64 {
14672 let months = self.config.global.period_months as f64;
14673 self.config
14674 .companies
14675 .iter()
14676 .map(|c| {
14677 let annual = c.annual_transaction_volume.count() as f64;
14678 let weighted = annual * c.volume_weight;
14679 (weighted * months / 12.0) as u64
14680 })
14681 .sum()
14682 }
14683
14684 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14686 if !self.phase_config.show_progress {
14687 return None;
14688 }
14689
14690 let pb = if let Some(mp) = &self.multi_progress {
14691 mp.add(ProgressBar::new(total))
14692 } else {
14693 ProgressBar::new(total)
14694 };
14695
14696 pb.set_style(
14697 ProgressStyle::default_bar()
14698 .template(&format!(
14699 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14700 ))
14701 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14702 .progress_chars("#>-"),
14703 );
14704
14705 Some(pb)
14706 }
14707
14708 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14710 self.coa.clone()
14711 }
14712
14713 pub fn get_master_data(&self) -> &MasterDataSnapshot {
14715 &self.master_data
14716 }
14717
14718 fn phase_compliance_regulations(
14720 &mut self,
14721 _stats: &mut EnhancedGenerationStatistics,
14722 ) -> SynthResult<ComplianceRegulationsSnapshot> {
14723 if !self.phase_config.generate_compliance_regulations {
14724 return Ok(ComplianceRegulationsSnapshot::default());
14725 }
14726
14727 info!("Phase: Generating Compliance Regulations Data");
14728
14729 let cr_config = &self.config.compliance_regulations;
14730
14731 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14733 self.config
14734 .companies
14735 .iter()
14736 .map(|c| c.country.clone())
14737 .collect::<std::collections::HashSet<_>>()
14738 .into_iter()
14739 .collect()
14740 } else {
14741 cr_config.jurisdictions.clone()
14742 };
14743
14744 let fallback_date =
14746 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14747 let reference_date = cr_config
14748 .reference_date
14749 .as_ref()
14750 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14751 .unwrap_or_else(|| {
14752 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14753 .unwrap_or(fallback_date)
14754 });
14755
14756 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14758 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14759 let cross_reference_records = reg_gen.generate_cross_reference_records();
14760 let jurisdiction_records =
14761 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14762
14763 info!(
14764 " Standards: {} records, {} cross-references, {} jurisdictions",
14765 standard_records.len(),
14766 cross_reference_records.len(),
14767 jurisdiction_records.len()
14768 );
14769
14770 let audit_procedures = if cr_config.audit_procedures.enabled {
14772 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14773 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14774 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14775 confidence_level: cr_config.audit_procedures.confidence_level,
14776 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14777 };
14778 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14779 self.seed + 9000,
14780 proc_config,
14781 );
14782 let registry = reg_gen.registry();
14783 let mut all_procs = Vec::new();
14784 for jurisdiction in &jurisdictions {
14785 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14786 all_procs.extend(procs);
14787 }
14788 info!(" Audit procedures: {}", all_procs.len());
14789 all_procs
14790 } else {
14791 Vec::new()
14792 };
14793
14794 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14796 let finding_config =
14797 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14798 finding_rate: cr_config.findings.finding_rate,
14799 material_weakness_rate: cr_config.findings.material_weakness_rate,
14800 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14801 generate_remediation: cr_config.findings.generate_remediation,
14802 };
14803 let mut finding_gen =
14804 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14805 self.seed + 9100,
14806 finding_config,
14807 );
14808 let mut all_findings = Vec::new();
14809 for company in &self.config.companies {
14810 let company_findings =
14811 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14812 all_findings.extend(company_findings);
14813 }
14814 info!(" Compliance findings: {}", all_findings.len());
14815 all_findings
14816 } else {
14817 Vec::new()
14818 };
14819
14820 let filings = if cr_config.filings.enabled {
14822 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14823 filing_types: cr_config.filings.filing_types.clone(),
14824 generate_status_progression: cr_config.filings.generate_status_progression,
14825 };
14826 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14827 self.seed + 9200,
14828 filing_config,
14829 );
14830 let company_codes: Vec<String> = self
14831 .config
14832 .companies
14833 .iter()
14834 .map(|c| c.code.clone())
14835 .collect();
14836 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14837 .unwrap_or(fallback_date);
14838 let filings = filing_gen.generate_filings(
14839 &company_codes,
14840 &jurisdictions,
14841 start_date,
14842 self.config.global.period_months,
14843 );
14844 info!(" Regulatory filings: {}", filings.len());
14845 filings
14846 } else {
14847 Vec::new()
14848 };
14849
14850 let compliance_graph = if cr_config.graph.enabled {
14852 let graph_config = datasynth_graph::ComplianceGraphConfig {
14853 include_standard_nodes: cr_config.graph.include_compliance_nodes,
14854 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14855 include_cross_references: cr_config.graph.include_cross_references,
14856 include_supersession_edges: cr_config.graph.include_supersession_edges,
14857 include_account_links: cr_config.graph.include_account_links,
14858 include_control_links: cr_config.graph.include_control_links,
14859 include_company_links: cr_config.graph.include_company_links,
14860 };
14861 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14862
14863 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14865 .iter()
14866 .map(|r| datasynth_graph::StandardNodeInput {
14867 standard_id: r.standard_id.clone(),
14868 title: r.title.clone(),
14869 category: r.category.clone(),
14870 domain: r.domain.clone(),
14871 is_active: r.is_active,
14872 features: vec![if r.is_active { 1.0 } else { 0.0 }],
14873 applicable_account_types: r.applicable_account_types.clone(),
14874 applicable_processes: r.applicable_processes.clone(),
14875 })
14876 .collect();
14877 builder.add_standards(&standard_inputs);
14878
14879 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14881 jurisdiction_records
14882 .iter()
14883 .map(|r| datasynth_graph::JurisdictionNodeInput {
14884 country_code: r.country_code.clone(),
14885 country_name: r.country_name.clone(),
14886 framework: r.accounting_framework.clone(),
14887 standard_count: r.standard_count,
14888 tax_rate: r.statutory_tax_rate,
14889 })
14890 .collect();
14891 builder.add_jurisdictions(&jurisdiction_inputs);
14892
14893 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14895 cross_reference_records
14896 .iter()
14897 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14898 from_standard: r.from_standard.clone(),
14899 to_standard: r.to_standard.clone(),
14900 relationship: r.relationship.clone(),
14901 convergence_level: r.convergence_level,
14902 })
14903 .collect();
14904 builder.add_cross_references(&xref_inputs);
14905
14906 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14908 .iter()
14909 .map(|r| datasynth_graph::JurisdictionMappingInput {
14910 country_code: r.jurisdiction.clone(),
14911 standard_id: r.standard_id.clone(),
14912 })
14913 .collect();
14914 builder.add_jurisdiction_mappings(&mapping_inputs);
14915
14916 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14918 .iter()
14919 .map(|p| datasynth_graph::ProcedureNodeInput {
14920 procedure_id: p.procedure_id.clone(),
14921 standard_id: p.standard_id.clone(),
14922 procedure_type: p.procedure_type.clone(),
14923 sample_size: p.sample_size,
14924 confidence_level: p.confidence_level,
14925 })
14926 .collect();
14927 builder.add_procedures(&proc_inputs);
14928
14929 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14931 .iter()
14932 .map(|f| datasynth_graph::FindingNodeInput {
14933 finding_id: f.finding_id.to_string(),
14934 standard_id: f
14935 .related_standards
14936 .first()
14937 .map(|s| s.as_str().to_string())
14938 .unwrap_or_default(),
14939 severity: f.severity.to_string(),
14940 deficiency_level: f.deficiency_level.to_string(),
14941 severity_score: f.deficiency_level.severity_score(),
14942 control_id: f.control_id.clone(),
14943 affected_accounts: f.affected_accounts.clone(),
14944 })
14945 .collect();
14946 builder.add_findings(&finding_inputs);
14947
14948 if cr_config.graph.include_account_links {
14950 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14951 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14952 for std_record in &standard_records {
14953 if let Some(std_obj) =
14954 registry.get(&datasynth_core::models::compliance::StandardId::parse(
14955 &std_record.standard_id,
14956 ))
14957 {
14958 for acct_type in &std_obj.applicable_account_types {
14959 account_links.push(datasynth_graph::AccountLinkInput {
14960 standard_id: std_record.standard_id.clone(),
14961 account_code: acct_type.clone(),
14962 account_name: acct_type.clone(),
14963 });
14964 }
14965 }
14966 }
14967 builder.add_account_links(&account_links);
14968 }
14969
14970 if cr_config.graph.include_control_links {
14972 let mut control_links = Vec::new();
14973 let sox_like_ids: Vec<String> = standard_records
14975 .iter()
14976 .filter(|r| {
14977 r.standard_id.starts_with("SOX")
14978 || r.standard_id.starts_with("PCAOB-AS-2201")
14979 })
14980 .map(|r| r.standard_id.clone())
14981 .collect();
14982 let control_ids = [
14984 ("C001", "Cash Controls"),
14985 ("C002", "Large Transaction Approval"),
14986 ("C010", "PO Approval"),
14987 ("C011", "Three-Way Match"),
14988 ("C020", "Revenue Recognition"),
14989 ("C021", "Credit Check"),
14990 ("C030", "Manual JE Approval"),
14991 ("C031", "Period Close Review"),
14992 ("C032", "Account Reconciliation"),
14993 ("C040", "Payroll Processing"),
14994 ("C050", "Fixed Asset Capitalization"),
14995 ("C060", "Intercompany Elimination"),
14996 ];
14997 for sox_id in &sox_like_ids {
14998 for (ctrl_id, ctrl_name) in &control_ids {
14999 control_links.push(datasynth_graph::ControlLinkInput {
15000 standard_id: sox_id.clone(),
15001 control_id: ctrl_id.to_string(),
15002 control_name: ctrl_name.to_string(),
15003 });
15004 }
15005 }
15006 builder.add_control_links(&control_links);
15007 }
15008
15009 if cr_config.graph.include_company_links {
15011 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15012 .iter()
15013 .enumerate()
15014 .map(|(i, f)| datasynth_graph::FilingNodeInput {
15015 filing_id: format!("F{:04}", i + 1),
15016 filing_type: f.filing_type.to_string(),
15017 company_code: f.company_code.clone(),
15018 jurisdiction: f.jurisdiction.clone(),
15019 status: format!("{:?}", f.status),
15020 })
15021 .collect();
15022 builder.add_filings(&filing_inputs);
15023 }
15024
15025 let graph = builder.build();
15026 info!(
15027 " Compliance graph: {} nodes, {} edges",
15028 graph.nodes.len(),
15029 graph.edges.len()
15030 );
15031 Some(graph)
15032 } else {
15033 None
15034 };
15035
15036 self.check_resources_with_log("post-compliance-regulations")?;
15037
15038 Ok(ComplianceRegulationsSnapshot {
15039 standard_records,
15040 cross_reference_records,
15041 jurisdiction_records,
15042 audit_procedures,
15043 findings,
15044 filings,
15045 compliance_graph,
15046 })
15047 }
15048
15049 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15051 use super::lineage::LineageGraphBuilder;
15052
15053 let mut builder = LineageGraphBuilder::new();
15054
15055 builder.add_config_section("config:global", "Global Config");
15057 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15058 builder.add_config_section("config:transactions", "Transaction Config");
15059
15060 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15062 builder.add_generator_phase("phase:je", "Journal Entry Generation");
15063
15064 builder.configured_by("phase:coa", "config:chart_of_accounts");
15066 builder.configured_by("phase:je", "config:transactions");
15067
15068 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15070 builder.produced_by("output:je", "phase:je");
15071
15072 if self.phase_config.generate_master_data {
15074 builder.add_config_section("config:master_data", "Master Data Config");
15075 builder.add_generator_phase("phase:master_data", "Master Data Generation");
15076 builder.configured_by("phase:master_data", "config:master_data");
15077 builder.input_to("phase:master_data", "phase:je");
15078 }
15079
15080 if self.phase_config.generate_document_flows {
15081 builder.add_config_section("config:document_flows", "Document Flow Config");
15082 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15083 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15084 builder.configured_by("phase:p2p", "config:document_flows");
15085 builder.configured_by("phase:o2c", "config:document_flows");
15086
15087 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15088 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15089 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15090 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15091 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15092
15093 builder.produced_by("output:po", "phase:p2p");
15094 builder.produced_by("output:gr", "phase:p2p");
15095 builder.produced_by("output:vi", "phase:p2p");
15096 builder.produced_by("output:so", "phase:o2c");
15097 builder.produced_by("output:ci", "phase:o2c");
15098 }
15099
15100 if self.phase_config.inject_anomalies {
15101 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15102 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15103 builder.configured_by("phase:anomaly", "config:fraud");
15104 builder.add_output_file(
15105 "output:labels",
15106 "Anomaly Labels",
15107 "labels/anomaly_labels.csv",
15108 );
15109 builder.produced_by("output:labels", "phase:anomaly");
15110 }
15111
15112 if self.phase_config.generate_audit {
15113 builder.add_config_section("config:audit", "Audit Config");
15114 builder.add_generator_phase("phase:audit", "Audit Data Generation");
15115 builder.configured_by("phase:audit", "config:audit");
15116 }
15117
15118 if self.phase_config.generate_banking {
15119 builder.add_config_section("config:banking", "Banking Config");
15120 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15121 builder.configured_by("phase:banking", "config:banking");
15122 }
15123
15124 if self.config.llm.enabled {
15125 builder.add_config_section("config:llm", "LLM Enrichment Config");
15126 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15127 builder.configured_by("phase:llm_enrichment", "config:llm");
15128 }
15129
15130 if self.config.diffusion.enabled {
15131 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15132 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15133 builder.configured_by("phase:diffusion", "config:diffusion");
15134 }
15135
15136 if self.config.causal.enabled {
15137 builder.add_config_section("config:causal", "Causal Generation Config");
15138 builder.add_generator_phase("phase:causal", "Causal Overlay");
15139 builder.configured_by("phase:causal", "config:causal");
15140 }
15141
15142 builder.build()
15143 }
15144
15145 fn compute_company_revenue(
15154 entries: &[JournalEntry],
15155 company_code: &str,
15156 ) -> rust_decimal::Decimal {
15157 use rust_decimal::Decimal;
15158 let mut revenue = Decimal::ZERO;
15159 for je in entries {
15160 if je.header.company_code != company_code {
15161 continue;
15162 }
15163 for line in &je.lines {
15164 if line.gl_account.starts_with('4') {
15165 revenue += line.credit_amount - line.debit_amount;
15167 }
15168 }
15169 }
15170 revenue.max(Decimal::ZERO)
15171 }
15172
15173 fn compute_entity_net_assets(
15177 entries: &[JournalEntry],
15178 entity_code: &str,
15179 ) -> rust_decimal::Decimal {
15180 use rust_decimal::Decimal;
15181 let mut asset_net = Decimal::ZERO;
15182 let mut liability_net = Decimal::ZERO;
15183 for je in entries {
15184 if je.header.company_code != entity_code {
15185 continue;
15186 }
15187 for line in &je.lines {
15188 if line.gl_account.starts_with('1') {
15189 asset_net += line.debit_amount - line.credit_amount;
15190 } else if line.gl_account.starts_with('2') {
15191 liability_net += line.credit_amount - line.debit_amount;
15192 }
15193 }
15194 }
15195 asset_net - liability_net
15196 }
15197
15198 fn phase_statistical_validation(
15209 &self,
15210 entries: &[JournalEntry],
15211 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15212 use datasynth_config::schema::StatisticalTestConfig;
15213 use datasynth_core::distributions::{
15214 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15215 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15216 };
15217 use rust_decimal::prelude::ToPrimitive;
15218
15219 let cfg = &self.config.distributions.validation;
15220 if !cfg.enabled {
15221 return Ok(None);
15222 }
15223
15224 let amounts: Vec<rust_decimal::Decimal> = entries
15227 .iter()
15228 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15229 .filter(|a| *a > rust_decimal::Decimal::ZERO)
15230 .collect();
15231
15232 let paired_amount_linecount: Vec<(f64, f64)> = entries
15236 .iter()
15237 .filter_map(|je| {
15238 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15239 if amt > rust_decimal::Decimal::ZERO {
15240 amt.to_f64().map(|a| (a, je.lines.len() as f64))
15241 } else {
15242 None
15243 }
15244 })
15245 .collect();
15246
15247 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15248 for test_cfg in &cfg.tests {
15249 match test_cfg {
15250 StatisticalTestConfig::BenfordFirstDigit {
15251 threshold_mad,
15252 warning_mad,
15253 } => {
15254 results.push(run_benford_first_digit(
15255 &amounts,
15256 *threshold_mad,
15257 *warning_mad,
15258 ));
15259 }
15260 StatisticalTestConfig::ChiSquared { bins, significance } => {
15261 results.push(run_chi_squared(&amounts, *bins, *significance));
15262 }
15263 StatisticalTestConfig::DistributionFit {
15264 target: _,
15265 ks_significance,
15266 method: _,
15267 } => {
15268 results.push(run_ks_uniform_log(&amounts, *ks_significance));
15271 }
15272 StatisticalTestConfig::AndersonDarling {
15273 target: _,
15274 significance,
15275 } => {
15276 results.push(run_anderson_darling(&amounts, *significance));
15279 }
15280 StatisticalTestConfig::CorrelationCheck {
15281 expected_correlations,
15282 } => {
15283 if expected_correlations.is_empty() {
15287 results.push(StatisticalTestResult {
15288 name: "correlation_check".to_string(),
15289 outcome: TestOutcome::Skipped,
15290 statistic: 0.0,
15291 threshold: 0.0,
15292 message: "no expected correlations declared".to_string(),
15293 });
15294 } else {
15295 for ec in expected_correlations {
15296 let pair_key = format!("{}_{}", ec.field1, ec.field2);
15297 let is_amount_linecount = (ec.field1 == "amount"
15298 && ec.field2 == "line_count")
15299 || (ec.field1 == "line_count" && ec.field2 == "amount");
15300 if is_amount_linecount {
15301 let xs: Vec<f64> =
15302 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15303 let ys: Vec<f64> =
15304 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15305 results.push(run_correlation_check(
15306 &pair_key,
15307 &xs,
15308 &ys,
15309 ec.expected_r,
15310 ec.tolerance,
15311 ));
15312 } else {
15313 results.push(StatisticalTestResult {
15314 name: format!("correlation_check_{pair_key}"),
15315 outcome: TestOutcome::Skipped,
15316 statistic: 0.0,
15317 threshold: ec.tolerance,
15318 message: format!(
15319 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15320 ec.field1, ec.field2
15321 ),
15322 });
15323 }
15324 }
15325 }
15326 }
15327 }
15328 }
15329
15330 let report = StatisticalValidationReport {
15331 sample_count: amounts.len(),
15332 results,
15333 };
15334
15335 if cfg.reporting.fail_on_error && !report.all_passed() {
15336 let failed = report.failed_names().join(", ");
15337 return Err(SynthError::validation(format!(
15338 "statistical validation failed: {failed}"
15339 )));
15340 }
15341
15342 Ok(Some(report))
15343 }
15344
15345 fn phase_analytics_metadata(
15358 &mut self,
15359 entries: &[JournalEntry],
15360 ) -> SynthResult<AnalyticsMetadataSnapshot> {
15361 use datasynth_generators::drift_event_generator::DriftEventGenerator;
15362 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15363 use datasynth_generators::management_report_generator::ManagementReportGenerator;
15364 use datasynth_generators::prior_year_generator::PriorYearGenerator;
15365 use std::collections::BTreeMap;
15366
15367 let mut snap = AnalyticsMetadataSnapshot::default();
15368
15369 if !self.phase_config.generate_analytics_metadata {
15370 return Ok(snap);
15371 }
15372
15373 let cfg = &self.config.analytics_metadata;
15374 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15375 .map(|d| d.year())
15376 .unwrap_or(2025);
15377
15378 if cfg.prior_year {
15380 let mut gen = PriorYearGenerator::new(self.seed + 9100);
15381 for company in &self.config.companies {
15382 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15385 BTreeMap::new();
15386 for je in entries {
15387 if je.header.company_code != company.code {
15388 continue;
15389 }
15390 for line in &je.lines {
15391 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15392 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15393 });
15394 entry.1 += line.debit_amount - line.credit_amount;
15395 }
15396 }
15397 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15398 .into_iter()
15399 .filter(|(_, (_, bal))| !bal.is_zero())
15400 .map(|(code, (name, bal))| (code, name, bal))
15401 .collect();
15402 if !current.is_empty() {
15403 let comparatives =
15404 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
15405 snap.prior_year_comparatives.extend(comparatives);
15406 }
15407 }
15408 info!(
15409 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15410 snap.prior_year_comparatives.len(),
15411 self.config.companies.len()
15412 );
15413 }
15414
15415 if cfg.industry_benchmark {
15417 use datasynth_core::models::IndustrySector;
15418 let industry = match self.config.global.industry {
15419 IndustrySector::Manufacturing => "manufacturing",
15420 IndustrySector::Retail => "retail",
15421 IndustrySector::FinancialServices => "financial_services",
15422 IndustrySector::Technology => "technology",
15423 IndustrySector::Healthcare => "healthcare",
15424 _ => "other",
15425 };
15426 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15427 let benchmarks = gen.generate(industry, fiscal_year);
15428 info!(
15429 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15430 benchmarks.len()
15431 );
15432 snap.industry_benchmarks = benchmarks;
15433 }
15434
15435 if cfg.management_reports {
15437 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15438 let period_months = self.config.global.period_months;
15439 for company in &self.config.companies {
15440 let reports =
15441 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15442 snap.management_reports.extend(reports);
15443 }
15444 info!(
15445 "v3.3.0 analytics: {} management reports across {} companies",
15446 snap.management_reports.len(),
15447 self.config.companies.len()
15448 );
15449 }
15450
15451 if cfg.drift_events {
15453 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15454 .expect("hardcoded NaiveDate 2025-01-01 is valid");
15455 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15456 .unwrap_or(fallback_start);
15457 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15458 let mut gen = DriftEventGenerator::new(self.seed + 9400);
15459 let drifts = gen.generate_standalone_drifts(start_date, end_date);
15460 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15461 snap.drift_events = drifts;
15462 }
15463 let _ = entries;
15465
15466 Ok(snap)
15467 }
15468}
15469
15470fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15472 match format {
15473 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15474 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15475 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15476 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15477 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15478 }
15479}
15480
15481fn compute_trial_balance_entries(
15486 entries: &[JournalEntry],
15487 entity_code: &str,
15488 fiscal_year: i32,
15489 coa: Option<&ChartOfAccounts>,
15490) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15491 use std::collections::BTreeMap;
15492
15493 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15494 BTreeMap::new();
15495
15496 for je in entries {
15497 for line in &je.lines {
15498 let entry = balances.entry(line.account_code.clone()).or_default();
15499 entry.0 += line.debit_amount;
15500 entry.1 += line.credit_amount;
15501 }
15502 }
15503
15504 balances
15505 .into_iter()
15506 .map(
15507 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15508 account_description: coa
15509 .and_then(|c| c.get_account(&account_code))
15510 .map(|a| a.description().to_string())
15511 .unwrap_or_else(|| account_code.clone()),
15512 account_code,
15513 debit_balance: debit,
15514 credit_balance: credit,
15515 net_balance: debit - credit,
15516 entity_code: entity_code.to_string(),
15517 period: format!("FY{}", fiscal_year),
15518 },
15519 )
15520 .collect()
15521}
15522
15523#[cfg(test)]
15524#[allow(clippy::unwrap_used)]
15525mod tests {
15526 use super::*;
15527 use datasynth_config::schema::*;
15528
15529 fn create_test_config() -> GeneratorConfig {
15530 GeneratorConfig {
15531 global: GlobalConfig {
15532 industry: IndustrySector::Manufacturing,
15533 start_date: "2024-01-01".to_string(),
15534 period_months: 1,
15535 seed: Some(42),
15536 parallel: false,
15537 group_currency: "USD".to_string(),
15538 presentation_currency: None,
15539 worker_threads: 0,
15540 memory_limit_mb: 0,
15541 fiscal_year_months: None,
15542 },
15543 companies: vec![CompanyConfig {
15544 code: "1000".to_string(),
15545 name: "Test Company".to_string(),
15546 currency: "USD".to_string(),
15547 functional_currency: None,
15548 country: "US".to_string(),
15549 annual_transaction_volume: TransactionVolume::TenK,
15550 volume_weight: 1.0,
15551 fiscal_year_variant: "K4".to_string(),
15552 }],
15553 chart_of_accounts: ChartOfAccountsConfig {
15554 complexity: CoAComplexity::Small,
15555 industry_specific: true,
15556 custom_accounts: None,
15557 min_hierarchy_depth: 2,
15558 max_hierarchy_depth: 4,
15559 },
15560 transactions: TransactionConfig::default(),
15561 output: OutputConfig::default(),
15562 fraud: FraudConfig::default(),
15563 internal_controls: InternalControlsConfig::default(),
15564 business_processes: BusinessProcessConfig::default(),
15565 user_personas: UserPersonaConfig::default(),
15566 templates: TemplateConfig::default(),
15567 approval: ApprovalConfig::default(),
15568 departments: DepartmentConfig::default(),
15569 master_data: MasterDataConfig::default(),
15570 document_flows: DocumentFlowConfig::default(),
15571 intercompany: IntercompanyConfig::default(),
15572 balance: BalanceConfig::default(),
15573 ocpm: OcpmConfig::default(),
15574 audit: AuditGenerationConfig::default(),
15575 banking: datasynth_banking::BankingConfig::default(),
15576 data_quality: DataQualitySchemaConfig::default(),
15577 scenario: ScenarioConfig::default(),
15578 temporal: TemporalDriftConfig::default(),
15579 graph_export: GraphExportConfig::default(),
15580 streaming: StreamingSchemaConfig::default(),
15581 rate_limit: RateLimitSchemaConfig::default(),
15582 temporal_attributes: TemporalAttributeSchemaConfig::default(),
15583 relationships: RelationshipSchemaConfig::default(),
15584 accounting_standards: AccountingStandardsConfig::default(),
15585 audit_standards: AuditStandardsConfig::default(),
15586 distributions: Default::default(),
15587 temporal_patterns: Default::default(),
15588 vendor_network: VendorNetworkSchemaConfig::default(),
15589 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15590 relationship_strength: RelationshipStrengthSchemaConfig::default(),
15591 cross_process_links: CrossProcessLinksSchemaConfig::default(),
15592 organizational_events: OrganizationalEventsSchemaConfig::default(),
15593 behavioral_drift: BehavioralDriftSchemaConfig::default(),
15594 market_drift: MarketDriftSchemaConfig::default(),
15595 drift_labeling: DriftLabelingSchemaConfig::default(),
15596 anomaly_injection: Default::default(),
15597 industry_specific: Default::default(),
15598 fingerprint_privacy: Default::default(),
15599 quality_gates: Default::default(),
15600 compliance: Default::default(),
15601 webhooks: Default::default(),
15602 llm: Default::default(),
15603 diffusion: Default::default(),
15604 causal: Default::default(),
15605 source_to_pay: Default::default(),
15606 financial_reporting: Default::default(),
15607 hr: Default::default(),
15608 manufacturing: Default::default(),
15609 sales_quotes: Default::default(),
15610 tax: Default::default(),
15611 treasury: Default::default(),
15612 project_accounting: Default::default(),
15613 esg: Default::default(),
15614 country_packs: None,
15615 scenarios: Default::default(),
15616 session: Default::default(),
15617 compliance_regulations: Default::default(),
15618 analytics_metadata: Default::default(),
15619 }
15620 }
15621
15622 #[test]
15623 fn test_enhanced_orchestrator_creation() {
15624 let config = create_test_config();
15625 let orchestrator = EnhancedOrchestrator::with_defaults(config);
15626 assert!(orchestrator.is_ok());
15627 }
15628
15629 #[test]
15630 fn test_minimal_generation() {
15631 let config = create_test_config();
15632 let phase_config = PhaseConfig {
15633 generate_master_data: false,
15634 generate_document_flows: false,
15635 generate_journal_entries: true,
15636 inject_anomalies: false,
15637 show_progress: false,
15638 ..Default::default()
15639 };
15640
15641 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15642 let result = orchestrator.generate();
15643
15644 assert!(result.is_ok());
15645 let result = result.unwrap();
15646 assert!(!result.journal_entries.is_empty());
15647 }
15648
15649 #[test]
15650 fn test_master_data_generation() {
15651 let config = create_test_config();
15652 let phase_config = PhaseConfig {
15653 generate_master_data: true,
15654 generate_document_flows: false,
15655 generate_journal_entries: false,
15656 inject_anomalies: false,
15657 show_progress: false,
15658 vendors_per_company: 5,
15659 customers_per_company: 5,
15660 materials_per_company: 10,
15661 assets_per_company: 5,
15662 employees_per_company: 10,
15663 ..Default::default()
15664 };
15665
15666 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15667 let result = orchestrator.generate().unwrap();
15668
15669 assert!(!result.master_data.vendors.is_empty());
15670 assert!(!result.master_data.customers.is_empty());
15671 assert!(!result.master_data.materials.is_empty());
15672 }
15673
15674 #[test]
15675 fn test_document_flow_generation() {
15676 let config = create_test_config();
15677 let phase_config = PhaseConfig {
15678 generate_master_data: true,
15679 generate_document_flows: true,
15680 generate_journal_entries: false,
15681 inject_anomalies: false,
15682 inject_data_quality: false,
15683 validate_balances: false,
15684 validate_coa_coverage_strict: false,
15685 generate_ocpm_events: false,
15686 show_progress: false,
15687 vendors_per_company: 5,
15688 customers_per_company: 5,
15689 materials_per_company: 10,
15690 assets_per_company: 5,
15691 employees_per_company: 10,
15692 p2p_chains: 5,
15693 o2c_chains: 5,
15694 ..Default::default()
15695 };
15696
15697 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15698 let result = orchestrator.generate().unwrap();
15699
15700 assert!(!result.document_flows.p2p_chains.is_empty());
15702 assert!(!result.document_flows.o2c_chains.is_empty());
15703
15704 assert!(!result.document_flows.purchase_orders.is_empty());
15706 assert!(!result.document_flows.sales_orders.is_empty());
15707 }
15708
15709 #[test]
15710 fn test_anomaly_injection() {
15711 let config = create_test_config();
15712 let phase_config = PhaseConfig {
15713 generate_master_data: false,
15714 generate_document_flows: false,
15715 generate_journal_entries: true,
15716 inject_anomalies: true,
15717 show_progress: false,
15718 ..Default::default()
15719 };
15720
15721 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15722 let result = orchestrator.generate().unwrap();
15723
15724 assert!(!result.journal_entries.is_empty());
15726
15727 assert!(result.anomaly_labels.summary.is_some());
15730 }
15731
15732 #[test]
15733 fn test_full_generation_pipeline() {
15734 let config = create_test_config();
15735 let phase_config = PhaseConfig {
15736 generate_master_data: true,
15737 generate_document_flows: true,
15738 generate_journal_entries: true,
15739 inject_anomalies: false,
15740 inject_data_quality: false,
15741 validate_balances: true,
15742 validate_coa_coverage_strict: false,
15743 generate_ocpm_events: false,
15744 show_progress: false,
15745 vendors_per_company: 3,
15746 customers_per_company: 3,
15747 materials_per_company: 5,
15748 assets_per_company: 3,
15749 employees_per_company: 5,
15750 p2p_chains: 3,
15751 o2c_chains: 3,
15752 ..Default::default()
15753 };
15754
15755 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15756 let result = orchestrator.generate().unwrap();
15757
15758 assert!(!result.master_data.vendors.is_empty());
15760 assert!(!result.master_data.customers.is_empty());
15761 assert!(!result.document_flows.p2p_chains.is_empty());
15762 assert!(!result.document_flows.o2c_chains.is_empty());
15763 assert!(!result.journal_entries.is_empty());
15764 assert!(result.statistics.accounts_count > 0);
15765
15766 assert!(!result.subledger.ap_invoices.is_empty());
15768 assert!(!result.subledger.ar_invoices.is_empty());
15769
15770 assert!(result.balance_validation.validated);
15772 assert!(result.balance_validation.entries_processed > 0);
15773 }
15774
15775 #[test]
15776 fn test_subledger_linking() {
15777 let config = create_test_config();
15778 let phase_config = PhaseConfig {
15779 generate_master_data: true,
15780 generate_document_flows: true,
15781 generate_journal_entries: false,
15782 inject_anomalies: false,
15783 inject_data_quality: false,
15784 validate_balances: false,
15785 validate_coa_coverage_strict: false,
15786 generate_ocpm_events: false,
15787 show_progress: false,
15788 vendors_per_company: 5,
15789 customers_per_company: 5,
15790 materials_per_company: 10,
15791 assets_per_company: 3,
15792 employees_per_company: 5,
15793 p2p_chains: 5,
15794 o2c_chains: 5,
15795 ..Default::default()
15796 };
15797
15798 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15799 let result = orchestrator.generate().unwrap();
15800
15801 assert!(!result.document_flows.vendor_invoices.is_empty());
15803 assert!(!result.document_flows.customer_invoices.is_empty());
15804
15805 assert!(!result.subledger.ap_invoices.is_empty());
15807 assert!(!result.subledger.ar_invoices.is_empty());
15808
15809 assert_eq!(
15811 result.subledger.ap_invoices.len(),
15812 result.document_flows.vendor_invoices.len()
15813 );
15814
15815 assert_eq!(
15817 result.subledger.ar_invoices.len(),
15818 result.document_flows.customer_invoices.len()
15819 );
15820
15821 assert_eq!(
15823 result.statistics.ap_invoice_count,
15824 result.subledger.ap_invoices.len()
15825 );
15826 assert_eq!(
15827 result.statistics.ar_invoice_count,
15828 result.subledger.ar_invoices.len()
15829 );
15830 }
15831
15832 #[test]
15833 fn test_balance_validation() {
15834 let config = create_test_config();
15835 let phase_config = PhaseConfig {
15836 generate_master_data: false,
15837 generate_document_flows: false,
15838 generate_journal_entries: true,
15839 inject_anomalies: false,
15840 validate_balances: true,
15841 validate_coa_coverage_strict: false,
15842 show_progress: false,
15843 ..Default::default()
15844 };
15845
15846 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15847 let result = orchestrator.generate().unwrap();
15848
15849 assert!(result.balance_validation.validated);
15851 assert!(result.balance_validation.entries_processed > 0);
15852
15853 assert!(!result.balance_validation.has_unbalanced_entries);
15855
15856 assert_eq!(
15858 result.balance_validation.total_debits,
15859 result.balance_validation.total_credits
15860 );
15861 }
15862
15863 #[test]
15864 fn test_statistics_accuracy() {
15865 let config = create_test_config();
15866 let phase_config = PhaseConfig {
15867 generate_master_data: true,
15868 generate_document_flows: false,
15869 generate_journal_entries: true,
15870 inject_anomalies: false,
15871 show_progress: false,
15872 vendors_per_company: 10,
15873 customers_per_company: 20,
15874 materials_per_company: 15,
15875 assets_per_company: 5,
15876 employees_per_company: 8,
15877 ..Default::default()
15878 };
15879
15880 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15881 let result = orchestrator.generate().unwrap();
15882
15883 assert_eq!(
15885 result.statistics.vendor_count,
15886 result.master_data.vendors.len()
15887 );
15888 assert_eq!(
15889 result.statistics.customer_count,
15890 result.master_data.customers.len()
15891 );
15892 assert_eq!(
15893 result.statistics.material_count,
15894 result.master_data.materials.len()
15895 );
15896 assert_eq!(
15897 result.statistics.total_entries as usize,
15898 result.journal_entries.len()
15899 );
15900 }
15901
15902 #[test]
15903 fn test_phase_config_defaults() {
15904 let config = PhaseConfig::default();
15905 assert!(config.generate_master_data);
15906 assert!(config.generate_document_flows);
15907 assert!(config.generate_journal_entries);
15908 assert!(!config.inject_anomalies);
15909 assert!(config.validate_balances);
15910 assert!(config.show_progress);
15911 assert!(config.vendors_per_company > 0);
15912 assert!(config.customers_per_company > 0);
15913 }
15914
15915 #[test]
15916 fn test_get_coa_before_generation() {
15917 let config = create_test_config();
15918 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15919
15920 assert!(orchestrator.get_coa().is_none());
15922 }
15923
15924 #[test]
15925 fn test_get_coa_after_generation() {
15926 let config = create_test_config();
15927 let phase_config = PhaseConfig {
15928 generate_master_data: false,
15929 generate_document_flows: false,
15930 generate_journal_entries: true,
15931 inject_anomalies: false,
15932 show_progress: false,
15933 ..Default::default()
15934 };
15935
15936 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15937 let _ = orchestrator.generate().unwrap();
15938
15939 assert!(orchestrator.get_coa().is_some());
15941 }
15942
15943 #[test]
15944 fn test_get_master_data() {
15945 let config = create_test_config();
15946 let phase_config = PhaseConfig {
15947 generate_master_data: true,
15948 generate_document_flows: false,
15949 generate_journal_entries: false,
15950 inject_anomalies: false,
15951 show_progress: false,
15952 vendors_per_company: 5,
15953 customers_per_company: 5,
15954 materials_per_company: 5,
15955 assets_per_company: 5,
15956 employees_per_company: 5,
15957 ..Default::default()
15958 };
15959
15960 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15961 let result = orchestrator.generate().unwrap();
15962
15963 assert!(!result.master_data.vendors.is_empty());
15965 }
15966
15967 #[test]
15968 fn test_with_progress_builder() {
15969 let config = create_test_config();
15970 let orchestrator = EnhancedOrchestrator::with_defaults(config)
15971 .unwrap()
15972 .with_progress(false);
15973
15974 assert!(!orchestrator.phase_config.show_progress);
15976 }
15977
15978 #[test]
15979 fn test_multi_company_generation() {
15980 let mut config = create_test_config();
15981 config.companies.push(CompanyConfig {
15982 code: "2000".to_string(),
15983 name: "Subsidiary".to_string(),
15984 currency: "EUR".to_string(),
15985 functional_currency: None,
15986 country: "DE".to_string(),
15987 annual_transaction_volume: TransactionVolume::TenK,
15988 volume_weight: 0.5,
15989 fiscal_year_variant: "K4".to_string(),
15990 });
15991
15992 let phase_config = PhaseConfig {
15993 generate_master_data: true,
15994 generate_document_flows: false,
15995 generate_journal_entries: true,
15996 inject_anomalies: false,
15997 show_progress: false,
15998 vendors_per_company: 5,
15999 customers_per_company: 5,
16000 materials_per_company: 5,
16001 assets_per_company: 5,
16002 employees_per_company: 5,
16003 ..Default::default()
16004 };
16005
16006 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16007 let result = orchestrator.generate().unwrap();
16008
16009 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
16012 assert!(result.statistics.companies_count == 2);
16013 }
16014
16015 #[test]
16016 fn test_empty_master_data_skips_document_flows() {
16017 let config = create_test_config();
16018 let phase_config = PhaseConfig {
16019 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
16022 inject_anomalies: false,
16023 show_progress: false,
16024 ..Default::default()
16025 };
16026
16027 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16028 let result = orchestrator.generate().unwrap();
16029
16030 assert!(result.document_flows.p2p_chains.is_empty());
16032 assert!(result.document_flows.o2c_chains.is_empty());
16033 }
16034
16035 #[test]
16036 fn test_journal_entry_line_item_count() {
16037 let config = create_test_config();
16038 let phase_config = PhaseConfig {
16039 generate_master_data: false,
16040 generate_document_flows: false,
16041 generate_journal_entries: true,
16042 inject_anomalies: false,
16043 show_progress: false,
16044 ..Default::default()
16045 };
16046
16047 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16048 let result = orchestrator.generate().unwrap();
16049
16050 let calculated_line_items: u64 = result
16052 .journal_entries
16053 .iter()
16054 .map(|e| e.line_count() as u64)
16055 .sum();
16056 assert_eq!(result.statistics.total_line_items, calculated_line_items);
16057 }
16058
16059 #[test]
16060 fn test_audit_generation() {
16061 let config = create_test_config();
16062 let phase_config = PhaseConfig {
16063 generate_master_data: false,
16064 generate_document_flows: false,
16065 generate_journal_entries: true,
16066 inject_anomalies: false,
16067 show_progress: false,
16068 generate_audit: true,
16069 audit_engagements: 2,
16070 workpapers_per_engagement: 5,
16071 evidence_per_workpaper: 2,
16072 risks_per_engagement: 3,
16073 findings_per_engagement: 2,
16074 judgments_per_engagement: 2,
16075 ..Default::default()
16076 };
16077
16078 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16079 let result = orchestrator.generate().unwrap();
16080
16081 assert_eq!(result.audit.engagements.len(), 2);
16083 assert!(!result.audit.workpapers.is_empty());
16084 assert!(!result.audit.evidence.is_empty());
16085 assert!(!result.audit.risk_assessments.is_empty());
16086 assert!(!result.audit.findings.is_empty());
16087 assert!(!result.audit.judgments.is_empty());
16088
16089 assert!(
16091 !result.audit.confirmations.is_empty(),
16092 "ISA 505 confirmations should be generated"
16093 );
16094 assert!(
16095 !result.audit.confirmation_responses.is_empty(),
16096 "ISA 505 confirmation responses should be generated"
16097 );
16098 assert!(
16099 !result.audit.procedure_steps.is_empty(),
16100 "ISA 330 procedure steps should be generated"
16101 );
16102 assert!(
16104 !result.audit.analytical_results.is_empty(),
16105 "ISA 520 analytical procedures should be generated"
16106 );
16107 assert!(
16108 !result.audit.ia_functions.is_empty(),
16109 "ISA 610 IA functions should be generated (one per engagement)"
16110 );
16111 assert!(
16112 !result.audit.related_parties.is_empty(),
16113 "ISA 550 related parties should be generated"
16114 );
16115
16116 assert_eq!(
16118 result.statistics.audit_engagement_count,
16119 result.audit.engagements.len()
16120 );
16121 assert_eq!(
16122 result.statistics.audit_workpaper_count,
16123 result.audit.workpapers.len()
16124 );
16125 assert_eq!(
16126 result.statistics.audit_evidence_count,
16127 result.audit.evidence.len()
16128 );
16129 assert_eq!(
16130 result.statistics.audit_risk_count,
16131 result.audit.risk_assessments.len()
16132 );
16133 assert_eq!(
16134 result.statistics.audit_finding_count,
16135 result.audit.findings.len()
16136 );
16137 assert_eq!(
16138 result.statistics.audit_judgment_count,
16139 result.audit.judgments.len()
16140 );
16141 assert_eq!(
16142 result.statistics.audit_confirmation_count,
16143 result.audit.confirmations.len()
16144 );
16145 assert_eq!(
16146 result.statistics.audit_confirmation_response_count,
16147 result.audit.confirmation_responses.len()
16148 );
16149 assert_eq!(
16150 result.statistics.audit_procedure_step_count,
16151 result.audit.procedure_steps.len()
16152 );
16153 assert_eq!(
16154 result.statistics.audit_sample_count,
16155 result.audit.samples.len()
16156 );
16157 assert_eq!(
16158 result.statistics.audit_analytical_result_count,
16159 result.audit.analytical_results.len()
16160 );
16161 assert_eq!(
16162 result.statistics.audit_ia_function_count,
16163 result.audit.ia_functions.len()
16164 );
16165 assert_eq!(
16166 result.statistics.audit_ia_report_count,
16167 result.audit.ia_reports.len()
16168 );
16169 assert_eq!(
16170 result.statistics.audit_related_party_count,
16171 result.audit.related_parties.len()
16172 );
16173 assert_eq!(
16174 result.statistics.audit_related_party_transaction_count,
16175 result.audit.related_party_transactions.len()
16176 );
16177 }
16178
16179 #[test]
16180 fn test_new_phases_disabled_by_default() {
16181 let config = create_test_config();
16182 assert!(!config.llm.enabled);
16184 assert!(!config.diffusion.enabled);
16185 assert!(!config.causal.enabled);
16186
16187 let phase_config = PhaseConfig {
16188 generate_master_data: false,
16189 generate_document_flows: false,
16190 generate_journal_entries: true,
16191 inject_anomalies: false,
16192 show_progress: false,
16193 ..Default::default()
16194 };
16195
16196 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16197 let result = orchestrator.generate().unwrap();
16198
16199 assert_eq!(result.statistics.llm_enrichment_ms, 0);
16201 assert_eq!(result.statistics.llm_vendors_enriched, 0);
16202 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16203 assert_eq!(result.statistics.diffusion_samples_generated, 0);
16204 assert_eq!(result.statistics.causal_generation_ms, 0);
16205 assert_eq!(result.statistics.causal_samples_generated, 0);
16206 assert!(result.statistics.causal_validation_passed.is_none());
16207 assert_eq!(result.statistics.counterfactual_pair_count, 0);
16208 assert!(result.counterfactual_pairs.is_empty());
16209 }
16210
16211 #[test]
16212 fn test_counterfactual_generation_enabled() {
16213 let config = create_test_config();
16214 let phase_config = PhaseConfig {
16215 generate_master_data: false,
16216 generate_document_flows: false,
16217 generate_journal_entries: true,
16218 inject_anomalies: false,
16219 show_progress: false,
16220 generate_counterfactuals: true,
16221 generate_period_close: false, ..Default::default()
16223 };
16224
16225 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16226 let result = orchestrator.generate().unwrap();
16227
16228 if !result.journal_entries.is_empty() {
16230 assert_eq!(
16231 result.counterfactual_pairs.len(),
16232 result.journal_entries.len()
16233 );
16234 assert_eq!(
16235 result.statistics.counterfactual_pair_count,
16236 result.journal_entries.len()
16237 );
16238 let ids: std::collections::HashSet<_> = result
16240 .counterfactual_pairs
16241 .iter()
16242 .map(|p| p.pair_id.clone())
16243 .collect();
16244 assert_eq!(ids.len(), result.counterfactual_pairs.len());
16245 }
16246 }
16247
16248 #[test]
16249 fn test_llm_enrichment_enabled() {
16250 let mut config = create_test_config();
16251 config.llm.enabled = true;
16252 config.llm.max_vendor_enrichments = 3;
16253
16254 let phase_config = PhaseConfig {
16255 generate_master_data: true,
16256 generate_document_flows: false,
16257 generate_journal_entries: false,
16258 inject_anomalies: false,
16259 show_progress: false,
16260 vendors_per_company: 5,
16261 customers_per_company: 3,
16262 materials_per_company: 3,
16263 assets_per_company: 3,
16264 employees_per_company: 3,
16265 ..Default::default()
16266 };
16267
16268 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16269 let result = orchestrator.generate().unwrap();
16270
16271 assert!(result.statistics.llm_vendors_enriched > 0);
16273 assert!(result.statistics.llm_vendors_enriched <= 3);
16274 }
16275
16276 #[test]
16277 fn test_diffusion_enhancement_enabled() {
16278 let mut config = create_test_config();
16279 config.diffusion.enabled = true;
16280 config.diffusion.n_steps = 50;
16281 config.diffusion.sample_size = 20;
16282
16283 let phase_config = PhaseConfig {
16284 generate_master_data: false,
16285 generate_document_flows: false,
16286 generate_journal_entries: true,
16287 inject_anomalies: false,
16288 show_progress: false,
16289 ..Default::default()
16290 };
16291
16292 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16293 let result = orchestrator.generate().unwrap();
16294
16295 assert_eq!(result.statistics.diffusion_samples_generated, 20);
16297 }
16298
16299 #[test]
16300 fn test_causal_overlay_enabled() {
16301 let mut config = create_test_config();
16302 config.causal.enabled = true;
16303 config.causal.template = "fraud_detection".to_string();
16304 config.causal.sample_size = 100;
16305 config.causal.validate = true;
16306
16307 let phase_config = PhaseConfig {
16308 generate_master_data: false,
16309 generate_document_flows: false,
16310 generate_journal_entries: true,
16311 inject_anomalies: false,
16312 show_progress: false,
16313 ..Default::default()
16314 };
16315
16316 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16317 let result = orchestrator.generate().unwrap();
16318
16319 assert_eq!(result.statistics.causal_samples_generated, 100);
16321 assert!(result.statistics.causal_validation_passed.is_some());
16323 }
16324
16325 #[test]
16326 fn test_causal_overlay_revenue_cycle_template() {
16327 let mut config = create_test_config();
16328 config.causal.enabled = true;
16329 config.causal.template = "revenue_cycle".to_string();
16330 config.causal.sample_size = 50;
16331 config.causal.validate = false;
16332
16333 let phase_config = PhaseConfig {
16334 generate_master_data: false,
16335 generate_document_flows: false,
16336 generate_journal_entries: true,
16337 inject_anomalies: false,
16338 show_progress: false,
16339 ..Default::default()
16340 };
16341
16342 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16343 let result = orchestrator.generate().unwrap();
16344
16345 assert_eq!(result.statistics.causal_samples_generated, 50);
16347 assert!(result.statistics.causal_validation_passed.is_none());
16349 }
16350
16351 #[test]
16352 fn test_all_new_phases_enabled_together() {
16353 let mut config = create_test_config();
16354 config.llm.enabled = true;
16355 config.llm.max_vendor_enrichments = 2;
16356 config.diffusion.enabled = true;
16357 config.diffusion.n_steps = 20;
16358 config.diffusion.sample_size = 10;
16359 config.causal.enabled = true;
16360 config.causal.sample_size = 50;
16361 config.causal.validate = true;
16362
16363 let phase_config = PhaseConfig {
16364 generate_master_data: true,
16365 generate_document_flows: false,
16366 generate_journal_entries: true,
16367 inject_anomalies: false,
16368 show_progress: false,
16369 vendors_per_company: 5,
16370 customers_per_company: 3,
16371 materials_per_company: 3,
16372 assets_per_company: 3,
16373 employees_per_company: 3,
16374 ..Default::default()
16375 };
16376
16377 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16378 let result = orchestrator.generate().unwrap();
16379
16380 assert!(result.statistics.llm_vendors_enriched > 0);
16382 assert_eq!(result.statistics.diffusion_samples_generated, 10);
16383 assert_eq!(result.statistics.causal_samples_generated, 50);
16384 assert!(result.statistics.causal_validation_passed.is_some());
16385 }
16386
16387 #[test]
16388 fn test_statistics_serialization_with_new_fields() {
16389 let stats = EnhancedGenerationStatistics {
16390 total_entries: 100,
16391 total_line_items: 500,
16392 llm_enrichment_ms: 42,
16393 llm_vendors_enriched: 10,
16394 diffusion_enhancement_ms: 100,
16395 diffusion_samples_generated: 50,
16396 causal_generation_ms: 200,
16397 causal_samples_generated: 100,
16398 causal_validation_passed: Some(true),
16399 ..Default::default()
16400 };
16401
16402 let json = serde_json::to_string(&stats).unwrap();
16403 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16404
16405 assert_eq!(deserialized.llm_enrichment_ms, 42);
16406 assert_eq!(deserialized.llm_vendors_enriched, 10);
16407 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16408 assert_eq!(deserialized.diffusion_samples_generated, 50);
16409 assert_eq!(deserialized.causal_generation_ms, 200);
16410 assert_eq!(deserialized.causal_samples_generated, 100);
16411 assert_eq!(deserialized.causal_validation_passed, Some(true));
16412 }
16413
16414 #[test]
16415 fn test_statistics_backward_compat_deserialization() {
16416 let old_json = r#"{
16418 "total_entries": 100,
16419 "total_line_items": 500,
16420 "accounts_count": 50,
16421 "companies_count": 1,
16422 "period_months": 12,
16423 "vendor_count": 10,
16424 "customer_count": 20,
16425 "material_count": 15,
16426 "asset_count": 5,
16427 "employee_count": 8,
16428 "p2p_chain_count": 5,
16429 "o2c_chain_count": 5,
16430 "ap_invoice_count": 5,
16431 "ar_invoice_count": 5,
16432 "ocpm_event_count": 0,
16433 "ocpm_object_count": 0,
16434 "ocpm_case_count": 0,
16435 "audit_engagement_count": 0,
16436 "audit_workpaper_count": 0,
16437 "audit_evidence_count": 0,
16438 "audit_risk_count": 0,
16439 "audit_finding_count": 0,
16440 "audit_judgment_count": 0,
16441 "anomalies_injected": 0,
16442 "data_quality_issues": 0,
16443 "banking_customer_count": 0,
16444 "banking_account_count": 0,
16445 "banking_transaction_count": 0,
16446 "banking_suspicious_count": 0,
16447 "graph_export_count": 0,
16448 "graph_node_count": 0,
16449 "graph_edge_count": 0
16450 }"#;
16451
16452 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16453
16454 assert_eq!(stats.llm_enrichment_ms, 0);
16456 assert_eq!(stats.llm_vendors_enriched, 0);
16457 assert_eq!(stats.diffusion_enhancement_ms, 0);
16458 assert_eq!(stats.diffusion_samples_generated, 0);
16459 assert_eq!(stats.causal_generation_ms, 0);
16460 assert_eq!(stats.causal_samples_generated, 0);
16461 assert!(stats.causal_validation_passed.is_none());
16462 }
16463}