1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164 AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165 TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195 #[allow(clippy::field_reassign_with_default)]
196 {
197 let mut s = DataQualityStats::default();
198 s.total_records = n_entries;
199 s.missing_values.total_records = n_entries;
200 s.format_variations.total_processed = n_entries;
201 s.duplicates.total_processed = n_entries;
202 s
203 }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207 let payment_behavior = &schema_config.payment_behavior;
208 let late_dist = &payment_behavior.late_payment_days_distribution;
209
210 P2PGeneratorConfig {
211 three_way_match_rate: schema_config.three_way_match_rate,
212 partial_delivery_rate: schema_config.partial_delivery_rate,
213 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214 price_variance_rate: schema_config.price_variance_rate,
215 max_price_variance_percent: schema_config.max_price_variance_percent,
216 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219 payment_method_distribution: vec![
220 (PaymentMethod::BankTransfer, 0.60),
221 (PaymentMethod::Check, 0.25),
222 (PaymentMethod::Wire, 0.10),
223 (PaymentMethod::CreditCard, 0.05),
224 ],
225 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226 payment_behavior: P2PPaymentBehavior {
227 late_payment_rate: payment_behavior.late_payment_rate,
228 late_payment_distribution: LatePaymentDistribution {
229 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230 late_8_to_14: late_dist.late_8_to_14,
231 very_late_15_to_30: late_dist.very_late_15_to_30,
232 severely_late_31_to_60: late_dist.severely_late_31_to_60,
233 extremely_late_over_60: late_dist.extremely_late_over_60,
234 },
235 partial_payment_rate: payment_behavior.partial_payment_rate,
236 payment_correction_rate: payment_behavior.payment_correction_rate,
237 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238 },
239 }
240}
241
242fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244 let payment_behavior = &schema_config.payment_behavior;
245
246 O2CGeneratorConfig {
247 credit_check_failure_rate: schema_config.credit_check_failure_rate,
248 partial_shipment_rate: schema_config.partial_shipment_rate,
249 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253 bad_debt_rate: schema_config.bad_debt_rate,
254 returns_rate: schema_config.return_rate,
255 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256 payment_method_distribution: vec![
257 (PaymentMethod::BankTransfer, 0.50),
258 (PaymentMethod::Check, 0.30),
259 (PaymentMethod::Wire, 0.15),
260 (PaymentMethod::CreditCard, 0.05),
261 ],
262 payment_behavior: O2CPaymentBehavior {
263 partial_payment_rate: payment_behavior.partial_payments.rate,
264 short_payment_rate: payment_behavior.short_payments.rate,
265 max_short_percent: payment_behavior.short_payments.max_short_percent,
266 on_account_rate: payment_behavior.on_account_payments.rate,
267 payment_correction_rate: payment_behavior.payment_corrections.rate,
268 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269 },
270 }
271}
272
273#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276 pub generate_master_data: bool,
278 pub generate_document_flows: bool,
280 pub generate_ocpm_events: bool,
282 pub generate_journal_entries: bool,
284 pub inject_anomalies: bool,
286 pub inject_data_quality: bool,
288 pub validate_balances: bool,
290 pub validate_coa_coverage_strict: bool,
294 pub show_progress: bool,
296 pub vendors_per_company: usize,
298 pub customers_per_company: usize,
300 pub materials_per_company: usize,
302 pub assets_per_company: usize,
304 pub employees_per_company: usize,
306 pub p2p_chains: usize,
308 pub o2c_chains: usize,
310 pub generate_audit: bool,
312 pub audit_engagements: usize,
314 pub workpapers_per_engagement: usize,
316 pub evidence_per_workpaper: usize,
318 pub risks_per_engagement: usize,
320 pub findings_per_engagement: usize,
322 pub judgments_per_engagement: usize,
324 pub generate_banking: bool,
326 pub generate_graph_export: bool,
328 pub generate_sourcing: bool,
330 pub generate_bank_reconciliation: bool,
332 pub generate_financial_statements: bool,
334 pub generate_accounting_standards: bool,
336 pub generate_manufacturing: bool,
338 pub generate_sales_kpi_budgets: bool,
340 pub generate_tax: bool,
342 pub generate_esg: bool,
344 pub generate_intercompany: bool,
346 pub generate_evolution_events: bool,
348 pub generate_counterfactuals: bool,
350 pub generate_compliance_regulations: bool,
352 pub generate_period_close: bool,
354 pub generate_hr: bool,
356 pub generate_treasury: bool,
358 pub generate_project_accounting: bool,
360 pub generate_legal_documents: bool,
364 pub generate_it_controls: bool,
368 pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376 fn default() -> Self {
377 Self {
378 generate_master_data: true,
379 generate_document_flows: true,
380 generate_ocpm_events: false, generate_journal_entries: true,
382 inject_anomalies: false,
383 inject_data_quality: false, validate_balances: true,
385 validate_coa_coverage_strict: false,
386 show_progress: true,
387 vendors_per_company: 50,
388 customers_per_company: 100,
389 materials_per_company: 200,
390 assets_per_company: 50,
391 employees_per_company: 100,
392 p2p_chains: 100,
393 o2c_chains: 100,
394 generate_audit: false, audit_engagements: 5,
396 workpapers_per_engagement: 20,
397 evidence_per_workpaper: 5,
398 risks_per_engagement: 15,
399 findings_per_engagement: 8,
400 judgments_per_engagement: 10,
401 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
423 }
424}
425
426impl PhaseConfig {
427 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432 Self {
433 generate_master_data: true,
435 generate_document_flows: true,
436 generate_journal_entries: true,
437 validate_balances: true,
438 validate_coa_coverage_strict: false,
439 generate_period_close: true,
440 generate_evolution_events: true,
441 show_progress: true,
442
443 generate_audit: cfg.audit.enabled,
445 generate_banking: cfg.banking.enabled,
446 generate_graph_export: cfg.graph_export.enabled,
447 generate_sourcing: cfg.source_to_pay.enabled,
448 generate_intercompany: cfg.intercompany.enabled,
449 generate_financial_statements: cfg.financial_reporting.enabled,
450 generate_bank_reconciliation: cfg.financial_reporting.enabled,
451 generate_accounting_standards: cfg.accounting_standards.enabled,
452 generate_manufacturing: cfg.manufacturing.enabled,
453 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454 generate_tax: cfg.tax.enabled,
455 generate_esg: cfg.esg.enabled,
456 generate_ocpm_events: cfg.ocpm.enabled,
457 generate_compliance_regulations: cfg.compliance_regulations.enabled,
458 generate_hr: cfg.hr.enabled,
459 generate_treasury: cfg.treasury.enabled,
460 generate_project_accounting: cfg.project_accounting.enabled,
461
462 generate_legal_documents: cfg.compliance_regulations.enabled
466 && cfg.compliance_regulations.legal_documents.enabled,
467 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470 generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478 inject_data_quality: cfg.data_quality.enabled,
479
480 vendors_per_company: 50,
482 customers_per_company: 100,
483 materials_per_company: 200,
484 assets_per_company: 50,
485 employees_per_company: 100,
486 p2p_chains: 100,
487 o2c_chains: 100,
488 audit_engagements: 5,
489 workpapers_per_engagement: 20,
490 evidence_per_workpaper: 5,
491 risks_per_engagement: 15,
492 findings_per_engagement: 8,
493 judgments_per_engagement: 10,
494 }
495 }
496}
497
498#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501 pub vendors: Vec<Vendor>,
503 pub customers: Vec<Customer>,
505 pub materials: Vec<Material>,
507 pub assets: Vec<FixedAsset>,
509 pub employees: Vec<Employee>,
511 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513 pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528 pub node_count: usize,
530 pub edge_count: usize,
532 pub hyperedge_count: usize,
534 pub output_path: PathBuf,
536}
537
538#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541 pub p2p_chains: Vec<P2PDocumentChain>,
543 pub o2c_chains: Vec<O2CDocumentChain>,
545 pub purchase_orders: Vec<documents::PurchaseOrder>,
547 pub goods_receipts: Vec<documents::GoodsReceipt>,
549 pub vendor_invoices: Vec<documents::VendorInvoice>,
551 pub sales_orders: Vec<documents::SalesOrder>,
553 pub deliveries: Vec<documents::Delivery>,
555 pub customer_invoices: Vec<documents::CustomerInvoice>,
557 pub payments: Vec<documents::Payment>,
559 pub document_references: Vec<documents::DocumentReference>,
562}
563
564#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567 pub ap_invoices: Vec<APInvoice>,
569 pub ar_invoices: Vec<ARInvoice>,
571 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577 pub ar_aging_reports: Vec<ARAgingReport>,
579 pub ap_aging_reports: Vec<APAgingReport>,
581 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594 pub event_log: Option<OcpmEventLog>,
596 pub event_count: usize,
598 pub object_count: usize,
600 pub case_count: usize,
602}
603
604#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607 pub engagements: Vec<AuditEngagement>,
609 pub workpapers: Vec<Workpaper>,
611 pub evidence: Vec<AuditEvidence>,
613 pub risk_assessments: Vec<RiskAssessment>,
615 pub findings: Vec<AuditFinding>,
617 pub judgments: Vec<ProfessionalJudgment>,
619 pub confirmations: Vec<ExternalConfirmation>,
621 pub confirmation_responses: Vec<ConfirmationResponse>,
623 pub procedure_steps: Vec<AuditProcedureStep>,
625 pub samples: Vec<AuditSample>,
627 pub analytical_results: Vec<AnalyticalProcedureResult>,
629 pub ia_functions: Vec<InternalAuditFunction>,
631 pub ia_reports: Vec<InternalAuditReport>,
633 pub related_parties: Vec<RelatedParty>,
635 pub related_party_transactions: Vec<RelatedPartyTransaction>,
637 pub component_auditors: Vec<ComponentAuditor>,
640 pub group_audit_plan: Option<GroupAuditPlan>,
642 pub component_instructions: Vec<ComponentInstruction>,
644 pub component_reports: Vec<ComponentAuditorReport>,
646 pub engagement_letters: Vec<EngagementLetter>,
649 pub subsequent_events: Vec<SubsequentEvent>,
652 pub service_organizations: Vec<ServiceOrganization>,
655 pub soc_reports: Vec<SocReport>,
657 pub user_entity_controls: Vec<UserEntityControl>,
659 pub going_concern_assessments:
662 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663 pub accounting_estimates:
666 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677 pub materiality_calculations:
680 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681 pub combined_risk_assessments:
684 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690 pub significant_transaction_classes:
693 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697 pub analytical_relationships:
700 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733 pub customers: Vec<BankingCustomer>,
735 pub accounts: Vec<BankAccount>,
737 pub transactions: Vec<BankTransaction>,
739 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749 pub suspicious_count: usize,
751 pub scenario_count: usize,
753}
754
755#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758 pub exported: bool,
760 pub graph_count: usize,
762 pub exports: HashMap<String, GraphExportInfo>,
764}
765
766#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769 pub name: String,
771 pub format: String,
773 pub output_path: PathBuf,
775 pub node_count: usize,
777 pub edge_count: usize,
779}
780
781#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784 pub spend_analyses: Vec<SpendAnalysis>,
786 pub sourcing_projects: Vec<SourcingProject>,
788 pub qualifications: Vec<SupplierQualification>,
790 pub rfx_events: Vec<RfxEvent>,
792 pub bids: Vec<SupplierBid>,
794 pub bid_evaluations: Vec<BidEvaluation>,
796 pub contracts: Vec<ProcurementContract>,
798 pub catalog_items: Vec<CatalogItem>,
800 pub scorecards: Vec<SupplierScorecard>,
802}
803
804#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816 pub fiscal_year: u16,
818 pub fiscal_period: u8,
820 pub period_start: NaiveDate,
822 pub period_end: NaiveDate,
824 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826}
827
828impl PeriodTrialBalance {
829 pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
838 let mut total_debits = Decimal::ZERO;
839 let mut total_credits = Decimal::ZERO;
840 let lines: Vec<TrialBalanceLine> = self
841 .entries
842 .into_iter()
843 .map(|e| {
844 total_debits += e.debit_balance;
845 total_credits += e.credit_balance;
846 let category = AccountCategory::from_account_code(&e.account_code);
847 TrialBalanceLine {
848 account_code: e.account_code,
849 account_description: e.account_name,
850 category,
851 account_type: AccountType::Asset,
852 opening_balance: Decimal::ZERO,
853 period_debits: e.debit_balance,
854 period_credits: e.credit_balance,
855 closing_balance: e.debit_balance - e.credit_balance,
856 debit_balance: e.debit_balance,
857 credit_balance: e.credit_balance,
858 cost_center: None,
859 profit_center: None,
860 }
861 })
862 .collect();
863 let imbalance = total_debits - total_credits;
864 let is_balanced = imbalance.abs() < Decimal::new(1, 2);
865 TrialBalance {
866 trial_balance_id: format!(
867 "{company_code}-{:04}{:02}",
868 self.fiscal_year, self.fiscal_period
869 ),
870 company_code: company_code.to_string(),
871 company_name: None,
872 as_of_date: self.period_end,
873 fiscal_year: self.fiscal_year as i32,
874 fiscal_period: self.fiscal_period as u32,
875 currency: currency.to_string(),
876 balance_type: TrialBalanceType::Adjusted,
877 lines,
878 total_debits,
879 total_credits,
880 is_balanced,
881 out_of_balance: imbalance,
882 is_equation_valid: is_balanced,
883 equation_difference: imbalance,
884 category_summary: std::collections::HashMap::new(),
885 created_at: self
886 .period_start
887 .and_hms_opt(0, 0, 0)
888 .expect("midnight is a valid time"),
889 created_by: "ORCHESTRATOR".to_string(),
890 approved_by: None,
891 approved_at: None,
892 status: TrialBalanceStatus::Final,
893 }
894 }
895}
896
897#[derive(Debug, Clone, Default)]
899pub struct FinancialReportingSnapshot {
900 pub financial_statements: Vec<FinancialStatement>,
903 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
906 pub consolidated_statements: Vec<FinancialStatement>,
908 pub consolidation_schedules: Vec<ConsolidationSchedule>,
910 pub bank_reconciliations: Vec<BankReconciliation>,
912 pub trial_balances: Vec<PeriodTrialBalance>,
914 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
916 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
918 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
920}
921
922#[derive(Debug, Clone, Default)]
924pub struct HrSnapshot {
925 pub payroll_runs: Vec<PayrollRun>,
927 pub payroll_line_items: Vec<PayrollLineItem>,
929 pub time_entries: Vec<TimeEntry>,
931 pub expense_reports: Vec<ExpenseReport>,
933 pub benefit_enrollments: Vec<BenefitEnrollment>,
935 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
937 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
939 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
941 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
943 pub pension_journal_entries: Vec<JournalEntry>,
945 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
947 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
949 pub stock_comp_journal_entries: Vec<JournalEntry>,
951 pub payroll_run_count: usize,
953 pub payroll_line_item_count: usize,
955 pub time_entry_count: usize,
957 pub expense_report_count: usize,
959 pub benefit_enrollment_count: usize,
961 pub pension_plan_count: usize,
963 pub stock_grant_count: usize,
965}
966
967#[derive(Debug, Clone, Default)]
969pub struct AccountingStandardsSnapshot {
970 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
972 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
974 pub business_combinations:
976 Vec<datasynth_core::models::business_combination::BusinessCombination>,
977 pub business_combination_journal_entries: Vec<JournalEntry>,
979 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
981 pub ecl_provision_movements:
983 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
984 pub ecl_journal_entries: Vec<JournalEntry>,
986 pub provisions: Vec<datasynth_core::models::provision::Provision>,
988 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
990 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
992 pub provision_journal_entries: Vec<JournalEntry>,
994 pub currency_translation_results:
996 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
997 pub revenue_contract_count: usize,
999 pub impairment_test_count: usize,
1001 pub business_combination_count: usize,
1003 pub ecl_model_count: usize,
1005 pub provision_count: usize,
1007 pub currency_translation_count: usize,
1009 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1013 pub fair_value_measurements:
1015 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1016 pub framework_differences:
1018 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1019 pub framework_reconciliations:
1021 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1022 pub lease_count: usize,
1024 pub fair_value_measurement_count: usize,
1025 pub framework_difference_count: usize,
1026}
1027
1028#[derive(Debug, Clone, Default)]
1030pub struct ComplianceRegulationsSnapshot {
1031 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1033 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1035 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1037 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1039 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1041 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1043 pub compliance_graph: Option<datasynth_graph::Graph>,
1045}
1046
1047#[derive(Debug, Clone, Default)]
1049pub struct ManufacturingSnapshot {
1050 pub production_orders: Vec<ProductionOrder>,
1052 pub quality_inspections: Vec<QualityInspection>,
1054 pub cycle_counts: Vec<CycleCount>,
1056 pub bom_components: Vec<BomComponent>,
1058 pub inventory_movements: Vec<InventoryMovement>,
1060 pub production_order_count: usize,
1062 pub quality_inspection_count: usize,
1064 pub cycle_count_count: usize,
1066 pub bom_component_count: usize,
1068 pub inventory_movement_count: usize,
1070}
1071
1072#[derive(Debug, Clone, Default)]
1074pub struct SalesKpiBudgetsSnapshot {
1075 pub sales_quotes: Vec<SalesQuote>,
1077 pub kpis: Vec<ManagementKpi>,
1079 pub budgets: Vec<Budget>,
1081 pub sales_quote_count: usize,
1083 pub kpi_count: usize,
1085 pub budget_line_count: usize,
1087}
1088
1089#[derive(Debug, Clone, Default)]
1091pub struct AnomalyLabels {
1092 pub labels: Vec<LabeledAnomaly>,
1094 pub summary: Option<AnomalySummary>,
1096 pub by_type: HashMap<String, usize>,
1098}
1099
1100#[derive(Debug, Clone, Default)]
1102pub struct BalanceValidationResult {
1103 pub validated: bool,
1105 pub is_balanced: bool,
1107 pub entries_processed: u64,
1109 pub total_debits: rust_decimal::Decimal,
1111 pub total_credits: rust_decimal::Decimal,
1113 pub accounts_tracked: usize,
1115 pub companies_tracked: usize,
1117 pub validation_errors: Vec<ValidationError>,
1119 pub has_unbalanced_entries: bool,
1121}
1122
1123#[derive(Debug, Clone, Default)]
1125pub struct TaxSnapshot {
1126 pub jurisdictions: Vec<TaxJurisdiction>,
1128 pub codes: Vec<TaxCode>,
1130 pub tax_lines: Vec<TaxLine>,
1132 pub tax_returns: Vec<TaxReturn>,
1134 pub tax_provisions: Vec<TaxProvision>,
1136 pub withholding_records: Vec<WithholdingTaxRecord>,
1138 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1140 pub jurisdiction_count: usize,
1142 pub code_count: usize,
1144 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1146 pub tax_posting_journal_entries: Vec<JournalEntry>,
1148}
1149
1150#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1152pub struct IntercompanySnapshot {
1153 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1155 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1157 pub seller_journal_entries: Vec<JournalEntry>,
1159 pub buyer_journal_entries: Vec<JournalEntry>,
1161 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1163 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1165 #[serde(skip)]
1167 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1168 pub matched_pair_count: usize,
1170 pub elimination_entry_count: usize,
1172 pub match_rate: f64,
1174}
1175
1176#[derive(Debug, Clone, Default)]
1178pub struct EsgSnapshot {
1179 pub emissions: Vec<EmissionRecord>,
1181 pub energy: Vec<EnergyConsumption>,
1183 pub water: Vec<WaterUsage>,
1185 pub waste: Vec<WasteRecord>,
1187 pub diversity: Vec<WorkforceDiversityMetric>,
1189 pub pay_equity: Vec<PayEquityMetric>,
1191 pub safety_incidents: Vec<SafetyIncident>,
1193 pub safety_metrics: Vec<SafetyMetric>,
1195 pub governance: Vec<GovernanceMetric>,
1197 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1199 pub materiality: Vec<MaterialityAssessment>,
1201 pub disclosures: Vec<EsgDisclosure>,
1203 pub climate_scenarios: Vec<ClimateScenario>,
1205 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1207 pub emission_count: usize,
1209 pub disclosure_count: usize,
1211}
1212
1213#[derive(Debug, Clone, Default)]
1215pub struct TreasurySnapshot {
1216 pub cash_positions: Vec<CashPosition>,
1218 pub cash_forecasts: Vec<CashForecast>,
1220 pub cash_pools: Vec<CashPool>,
1222 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1224 pub hedging_instruments: Vec<HedgingInstrument>,
1226 pub hedge_relationships: Vec<HedgeRelationship>,
1228 pub debt_instruments: Vec<DebtInstrument>,
1230 pub bank_guarantees: Vec<BankGuarantee>,
1232 pub netting_runs: Vec<NettingRun>,
1234 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1236 pub journal_entries: Vec<JournalEntry>,
1239}
1240
1241#[derive(Debug, Clone, Default)]
1243pub struct ProjectAccountingSnapshot {
1244 pub projects: Vec<Project>,
1246 pub cost_lines: Vec<ProjectCostLine>,
1248 pub revenue_records: Vec<ProjectRevenue>,
1250 pub earned_value_metrics: Vec<EarnedValueMetric>,
1252 pub change_orders: Vec<ChangeOrder>,
1254 pub milestones: Vec<ProjectMilestone>,
1256}
1257
1258#[derive(Debug, Default)]
1260pub struct EnhancedGenerationResult {
1261 pub chart_of_accounts: ChartOfAccounts,
1263 pub master_data: MasterDataSnapshot,
1265 pub document_flows: DocumentFlowSnapshot,
1267 pub subledger: SubledgerSnapshot,
1269 pub ocpm: OcpmSnapshot,
1271 pub audit: AuditSnapshot,
1273 pub banking: BankingSnapshot,
1275 pub graph_export: GraphExportSnapshot,
1277 pub sourcing: SourcingSnapshot,
1279 pub financial_reporting: FinancialReportingSnapshot,
1281 pub hr: HrSnapshot,
1283 pub accounting_standards: AccountingStandardsSnapshot,
1285 pub manufacturing: ManufacturingSnapshot,
1287 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1289 pub tax: TaxSnapshot,
1291 pub esg: EsgSnapshot,
1293 pub treasury: TreasurySnapshot,
1295 pub project_accounting: ProjectAccountingSnapshot,
1297 pub process_evolution: Vec<ProcessEvolutionEvent>,
1299 pub organizational_events: Vec<OrganizationalEvent>,
1301 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1303 pub intercompany: IntercompanySnapshot,
1305 pub journal_entries: Vec<JournalEntry>,
1307 pub anomaly_labels: AnomalyLabels,
1309 pub balance_validation: BalanceValidationResult,
1311 pub data_quality_stats: DataQualityStats,
1313 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1315 pub statistics: EnhancedGenerationStatistics,
1317 pub lineage: Option<super::lineage::LineageGraph>,
1319 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1321 pub internal_controls: Vec<InternalControl>,
1323 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1327 pub opening_balances: Vec<GeneratedOpeningBalance>,
1329 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1331 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1333 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1335 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1337 pub temporal_vendor_chains:
1339 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1340 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1342 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1344 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1346 pub compliance_regulations: ComplianceRegulationsSnapshot,
1348 pub analytics_metadata: AnalyticsMetadataSnapshot,
1352 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1356 pub interconnectivity: InterconnectivitySnapshot,
1362}
1363
1364#[derive(Debug, Clone, Default)]
1370pub struct InterconnectivitySnapshot {
1371 pub vendor_tiers: Vec<(String, u8)>,
1374 pub vendor_clusters: Vec<(String, String)>,
1378 pub customer_value_segments: Vec<(String, String)>,
1381 pub customer_lifecycle_stages: Vec<(String, String)>,
1385 pub industry_metadata: Vec<String>,
1388}
1389
1390#[derive(Debug, Clone, Default)]
1392pub struct AnalyticsMetadataSnapshot {
1393 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1395 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1397 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1399 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1401}
1402
1403#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1405pub struct EnhancedGenerationStatistics {
1406 pub total_entries: u64,
1408 pub total_line_items: u64,
1410 pub accounts_count: usize,
1412 pub companies_count: usize,
1414 pub period_months: u32,
1416 pub vendor_count: usize,
1418 pub customer_count: usize,
1419 pub material_count: usize,
1420 pub asset_count: usize,
1421 pub employee_count: usize,
1422 pub p2p_chain_count: usize,
1424 pub o2c_chain_count: usize,
1425 pub ap_invoice_count: usize,
1427 pub ar_invoice_count: usize,
1428 pub ocpm_event_count: usize,
1430 pub ocpm_object_count: usize,
1431 pub ocpm_case_count: usize,
1432 pub audit_engagement_count: usize,
1434 pub audit_workpaper_count: usize,
1435 pub audit_evidence_count: usize,
1436 pub audit_risk_count: usize,
1437 pub audit_finding_count: usize,
1438 pub audit_judgment_count: usize,
1439 #[serde(default)]
1441 pub audit_confirmation_count: usize,
1442 #[serde(default)]
1443 pub audit_confirmation_response_count: usize,
1444 #[serde(default)]
1446 pub audit_procedure_step_count: usize,
1447 #[serde(default)]
1448 pub audit_sample_count: usize,
1449 #[serde(default)]
1451 pub audit_analytical_result_count: usize,
1452 #[serde(default)]
1454 pub audit_ia_function_count: usize,
1455 #[serde(default)]
1456 pub audit_ia_report_count: usize,
1457 #[serde(default)]
1459 pub audit_related_party_count: usize,
1460 #[serde(default)]
1461 pub audit_related_party_transaction_count: usize,
1462 pub anomalies_injected: usize,
1464 pub data_quality_issues: usize,
1466 pub banking_customer_count: usize,
1468 pub banking_account_count: usize,
1469 pub banking_transaction_count: usize,
1470 pub banking_suspicious_count: usize,
1471 pub graph_export_count: usize,
1473 pub graph_node_count: usize,
1474 pub graph_edge_count: usize,
1475 #[serde(default)]
1477 pub llm_enrichment_ms: u64,
1478 #[serde(default)]
1480 pub llm_vendors_enriched: usize,
1481 #[serde(default)]
1483 pub llm_customers_enriched: usize,
1484 #[serde(default)]
1486 pub llm_materials_enriched: usize,
1487 #[serde(default)]
1489 pub llm_findings_enriched: usize,
1490 #[serde(default)]
1492 pub diffusion_enhancement_ms: u64,
1493 #[serde(default)]
1495 pub diffusion_samples_generated: usize,
1496 #[serde(default, skip_serializing_if = "Option::is_none")]
1499 pub neural_hybrid_weight: Option<f64>,
1500 #[serde(default, skip_serializing_if = "Option::is_none")]
1502 pub neural_hybrid_strategy: Option<String>,
1503 #[serde(default, skip_serializing_if = "Option::is_none")]
1505 pub neural_routed_column_count: Option<usize>,
1506 #[serde(default)]
1508 pub causal_generation_ms: u64,
1509 #[serde(default)]
1511 pub causal_samples_generated: usize,
1512 #[serde(default)]
1514 pub causal_validation_passed: Option<bool>,
1515 #[serde(default)]
1517 pub sourcing_project_count: usize,
1518 #[serde(default)]
1519 pub rfx_event_count: usize,
1520 #[serde(default)]
1521 pub bid_count: usize,
1522 #[serde(default)]
1523 pub contract_count: usize,
1524 #[serde(default)]
1525 pub catalog_item_count: usize,
1526 #[serde(default)]
1527 pub scorecard_count: usize,
1528 #[serde(default)]
1530 pub financial_statement_count: usize,
1531 #[serde(default)]
1532 pub bank_reconciliation_count: usize,
1533 #[serde(default)]
1535 pub payroll_run_count: usize,
1536 #[serde(default)]
1537 pub time_entry_count: usize,
1538 #[serde(default)]
1539 pub expense_report_count: usize,
1540 #[serde(default)]
1541 pub benefit_enrollment_count: usize,
1542 #[serde(default)]
1543 pub pension_plan_count: usize,
1544 #[serde(default)]
1545 pub stock_grant_count: usize,
1546 #[serde(default)]
1548 pub revenue_contract_count: usize,
1549 #[serde(default)]
1550 pub impairment_test_count: usize,
1551 #[serde(default)]
1552 pub business_combination_count: usize,
1553 #[serde(default)]
1554 pub ecl_model_count: usize,
1555 #[serde(default)]
1556 pub provision_count: usize,
1557 #[serde(default)]
1559 pub production_order_count: usize,
1560 #[serde(default)]
1561 pub quality_inspection_count: usize,
1562 #[serde(default)]
1563 pub cycle_count_count: usize,
1564 #[serde(default)]
1565 pub bom_component_count: usize,
1566 #[serde(default)]
1567 pub inventory_movement_count: usize,
1568 #[serde(default)]
1570 pub sales_quote_count: usize,
1571 #[serde(default)]
1572 pub kpi_count: usize,
1573 #[serde(default)]
1574 pub budget_line_count: usize,
1575 #[serde(default)]
1577 pub tax_jurisdiction_count: usize,
1578 #[serde(default)]
1579 pub tax_code_count: usize,
1580 #[serde(default)]
1582 pub esg_emission_count: usize,
1583 #[serde(default)]
1584 pub esg_disclosure_count: usize,
1585 #[serde(default)]
1587 pub ic_matched_pair_count: usize,
1588 #[serde(default)]
1589 pub ic_elimination_count: usize,
1590 #[serde(default)]
1592 pub ic_transaction_count: usize,
1593 #[serde(default)]
1595 pub fa_subledger_count: usize,
1596 #[serde(default)]
1598 pub inventory_subledger_count: usize,
1599 #[serde(default)]
1601 pub treasury_debt_instrument_count: usize,
1602 #[serde(default)]
1604 pub treasury_hedging_instrument_count: usize,
1605 #[serde(default)]
1607 pub project_count: usize,
1608 #[serde(default)]
1610 pub project_change_order_count: usize,
1611 #[serde(default)]
1613 pub tax_provision_count: usize,
1614 #[serde(default)]
1616 pub opening_balance_count: usize,
1617 #[serde(default)]
1619 pub subledger_reconciliation_count: usize,
1620 #[serde(default)]
1622 pub tax_line_count: usize,
1623 #[serde(default)]
1625 pub project_cost_line_count: usize,
1626 #[serde(default)]
1628 pub cash_position_count: usize,
1629 #[serde(default)]
1631 pub cash_forecast_count: usize,
1632 #[serde(default)]
1634 pub cash_pool_count: usize,
1635 #[serde(default)]
1637 pub process_evolution_event_count: usize,
1638 #[serde(default)]
1640 pub organizational_event_count: usize,
1641 #[serde(default)]
1643 pub counterfactual_pair_count: usize,
1644 #[serde(default)]
1646 pub red_flag_count: usize,
1647 #[serde(default)]
1649 pub collusion_ring_count: usize,
1650 #[serde(default)]
1652 pub temporal_version_chain_count: usize,
1653 #[serde(default)]
1655 pub entity_relationship_node_count: usize,
1656 #[serde(default)]
1658 pub entity_relationship_edge_count: usize,
1659 #[serde(default)]
1661 pub cross_process_link_count: usize,
1662 #[serde(default)]
1664 pub disruption_event_count: usize,
1665 #[serde(default)]
1667 pub industry_gl_account_count: usize,
1668 #[serde(default)]
1670 pub period_close_je_count: usize,
1671}
1672
1673pub struct EnhancedOrchestrator {
1675 config: GeneratorConfig,
1676 phase_config: PhaseConfig,
1677 coa: Option<Arc<ChartOfAccounts>>,
1678 master_data: MasterDataSnapshot,
1679 seed: u64,
1680 multi_progress: Option<MultiProgress>,
1681 resource_guard: ResourceGuard,
1683 output_path: Option<PathBuf>,
1685 copula_generators: Vec<CopulaGeneratorSpec>,
1687 country_pack_registry: datasynth_core::CountryPackRegistry,
1689 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1691 template_provider: datasynth_core::templates::SharedTemplateProvider,
1698 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1705 shard_context: Option<crate::shard_context::ShardContext>,
1708}
1709
1710impl EnhancedOrchestrator {
1711 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1713 datasynth_config::validate_config(&config)?;
1714
1715 let seed = config.global.seed.unwrap_or_else(rand::random);
1716
1717 let resource_guard = Self::build_resource_guard(&config, None);
1719
1720 let country_pack_registry = match &config.country_packs {
1722 Some(cp) => {
1723 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1724 .map_err(|e| SynthError::config(e.to_string()))?
1725 }
1726 None => datasynth_core::CountryPackRegistry::builtin_only()
1727 .map_err(|e| SynthError::config(e.to_string()))?,
1728 };
1729
1730 let template_provider = Self::build_template_provider(&config)?;
1734
1735 let temporal_context = Self::build_temporal_context(&config)?;
1739
1740 Ok(Self {
1741 config,
1742 phase_config,
1743 coa: None,
1744 master_data: MasterDataSnapshot::default(),
1745 seed,
1746 multi_progress: None,
1747 resource_guard,
1748 output_path: None,
1749 copula_generators: Vec::new(),
1750 country_pack_registry,
1751 phase_sink: None,
1752 template_provider,
1753 temporal_context,
1754 shard_context: None,
1755 })
1756 }
1757
1758 pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1764 self.shard_context = Some(ctx);
1765 }
1766
1767 fn build_temporal_context(
1773 config: &GeneratorConfig,
1774 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1775 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1776
1777 let tp = &config.temporal_patterns;
1778 if !tp.enabled || !tp.business_days.enabled {
1779 return Ok(None);
1780 }
1781
1782 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1783 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1784 let end_date = start_date + chrono::Months::new(config.global.period_months);
1785
1786 let region_code = tp
1787 .calendars
1788 .regions
1789 .first()
1790 .cloned()
1791 .unwrap_or_else(|| "US".to_string());
1792 let region = parse_region_code(®ion_code);
1793
1794 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1795 }
1796
1797 fn build_template_provider(
1805 config: &GeneratorConfig,
1806 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1807 use datasynth_core::templates::{
1808 loader::{MergeStrategy, TemplateLoader},
1809 DefaultTemplateProvider,
1810 };
1811 use std::sync::Arc;
1812
1813 let provider = match &config.templates.path {
1814 None => DefaultTemplateProvider::new(),
1815 Some(path) => {
1816 let data = if path.is_dir() {
1817 TemplateLoader::load_from_directory(path)
1818 } else {
1819 TemplateLoader::load_from_file(path)
1820 }
1821 .map_err(|e| {
1822 SynthError::config(format!(
1823 "Failed to load templates from {}: {e}",
1824 path.display()
1825 ))
1826 })?;
1827 let strategy = match config.templates.merge_strategy {
1828 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1829 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1830 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1831 MergeStrategy::MergePreferFile
1832 }
1833 };
1834 DefaultTemplateProvider::with_templates(data, strategy)
1835 }
1836 };
1837 Ok(Arc::new(provider))
1838 }
1839
1840 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1842 Self::new(config, PhaseConfig::default())
1843 }
1844
1845 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1847 self.phase_sink = Some(sink);
1848 self
1849 }
1850
1851 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1853 self.phase_sink = Some(sink);
1854 }
1855
1856 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1858 if let Some(ref sink) = self.phase_sink {
1859 for item in items {
1860 if let Ok(value) = serde_json::to_value(item) {
1861 if let Err(e) = sink.emit(phase, type_name, &value) {
1862 warn!(
1863 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1864 );
1865 }
1866 }
1867 }
1868 if let Err(e) = sink.phase_complete(phase) {
1869 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1870 }
1871 }
1872 }
1873
1874 pub fn with_progress(mut self, show: bool) -> Self {
1876 self.phase_config.show_progress = show;
1877 if show {
1878 self.multi_progress = Some(MultiProgress::new());
1879 }
1880 self
1881 }
1882
1883 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1885 let path = path.into();
1886 self.output_path = Some(path.clone());
1887 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1889 self
1890 }
1891
1892 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1894 &self.country_pack_registry
1895 }
1896
1897 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1899 self.country_pack_registry.get_by_str(country)
1900 }
1901
1902 fn primary_country_code(&self) -> &str {
1905 self.config
1906 .companies
1907 .first()
1908 .map(|c| c.country.as_str())
1909 .unwrap_or("US")
1910 }
1911
1912 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1914 self.country_pack_for(self.primary_country_code())
1915 }
1916
1917 fn resolve_coa_framework(&self) -> CoAFramework {
1919 if self.config.accounting_standards.enabled {
1920 match self.config.accounting_standards.framework {
1921 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1922 return CoAFramework::FrenchPcg;
1923 }
1924 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1925 return CoAFramework::GermanSkr04;
1926 }
1927 _ => {}
1928 }
1929 }
1930 let pack = self.primary_pack();
1932 match pack.accounting.framework.as_str() {
1933 "french_gaap" => CoAFramework::FrenchPcg,
1934 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1935 _ => CoAFramework::UsGaap,
1936 }
1937 }
1938
1939 pub fn has_copulas(&self) -> bool {
1944 !self.copula_generators.is_empty()
1945 }
1946
1947 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1953 &self.copula_generators
1954 }
1955
1956 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1960 &mut self.copula_generators
1961 }
1962
1963 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1967 self.copula_generators
1968 .iter_mut()
1969 .find(|c| c.name == copula_name)
1970 .map(|c| c.generator.sample())
1971 }
1972
1973 pub fn from_fingerprint(
1996 fingerprint_path: &std::path::Path,
1997 phase_config: PhaseConfig,
1998 scale: f64,
1999 ) -> SynthResult<Self> {
2000 info!("Loading fingerprint from: {}", fingerprint_path.display());
2001
2002 let reader = FingerprintReader::new();
2004 let fingerprint = reader
2005 .read_from_file(fingerprint_path)
2006 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2007
2008 Self::from_fingerprint_data(fingerprint, phase_config, scale)
2009 }
2010
2011 pub fn from_fingerprint_data(
2018 fingerprint: Fingerprint,
2019 phase_config: PhaseConfig,
2020 scale: f64,
2021 ) -> SynthResult<Self> {
2022 info!(
2023 "Synthesizing config from fingerprint (version: {}, tables: {})",
2024 fingerprint.manifest.version,
2025 fingerprint.schema.tables.len()
2026 );
2027
2028 let seed: u64 = rand::random();
2030 info!("Fingerprint synthesis seed: {}", seed);
2031
2032 let options = SynthesisOptions {
2034 scale,
2035 seed: Some(seed),
2036 preserve_correlations: true,
2037 inject_anomalies: true,
2038 };
2039 let synthesizer = ConfigSynthesizer::with_options(options);
2040
2041 let synthesis_result = synthesizer
2043 .synthesize_full(&fingerprint, seed)
2044 .map_err(|e| {
2045 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2046 })?;
2047
2048 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2050 Self::base_config_for_industry(industry)
2051 } else {
2052 Self::base_config_for_industry("manufacturing")
2053 };
2054
2055 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2057
2058 info!(
2060 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2061 fingerprint.schema.tables.len(),
2062 scale,
2063 synthesis_result.copula_generators.len()
2064 );
2065
2066 if !synthesis_result.copula_generators.is_empty() {
2067 for spec in &synthesis_result.copula_generators {
2068 info!(
2069 " Copula '{}' for table '{}': {} columns",
2070 spec.name,
2071 spec.table,
2072 spec.columns.len()
2073 );
2074 }
2075 }
2076
2077 let mut orchestrator = Self::new(config, phase_config)?;
2079
2080 orchestrator.copula_generators = synthesis_result.copula_generators;
2082
2083 Ok(orchestrator)
2084 }
2085
2086 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2088 use datasynth_config::presets::create_preset;
2089 use datasynth_config::TransactionVolume;
2090 use datasynth_core::models::{CoAComplexity, IndustrySector};
2091
2092 let sector = match industry.to_lowercase().as_str() {
2093 "manufacturing" => IndustrySector::Manufacturing,
2094 "retail" => IndustrySector::Retail,
2095 "financial" | "financial_services" => IndustrySector::FinancialServices,
2096 "healthcare" => IndustrySector::Healthcare,
2097 "technology" | "tech" => IndustrySector::Technology,
2098 _ => IndustrySector::Manufacturing,
2099 };
2100
2101 create_preset(
2103 sector,
2104 1, 12, CoAComplexity::Medium,
2107 TransactionVolume::TenK,
2108 )
2109 }
2110
2111 fn apply_config_patch(
2113 mut config: GeneratorConfig,
2114 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2115 ) -> GeneratorConfig {
2116 use datasynth_fingerprint::synthesis::ConfigValue;
2117
2118 for (key, value) in patch.values() {
2119 match (key.as_str(), value) {
2120 ("transactions.count", ConfigValue::Integer(n)) => {
2123 info!(
2124 "Fingerprint suggests {} transactions (apply via company volumes)",
2125 n
2126 );
2127 }
2128 ("global.period_months", ConfigValue::Integer(n)) => {
2129 config.global.period_months = (*n).clamp(1, 120) as u32;
2130 }
2131 ("global.start_date", ConfigValue::String(s)) => {
2132 config.global.start_date = s.clone();
2133 }
2134 ("global.seed", ConfigValue::Integer(n)) => {
2135 config.global.seed = Some(*n as u64);
2136 }
2137 ("fraud.enabled", ConfigValue::Bool(b)) => {
2138 config.fraud.enabled = *b;
2139 }
2140 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2141 config.fraud.fraud_rate = *f;
2142 }
2143 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2144 config.data_quality.enabled = *b;
2145 }
2146 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2148 config.fraud.enabled = *b;
2149 }
2150 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2151 config.fraud.fraud_rate = *f;
2152 }
2153 _ => {
2154 debug!("Ignoring unknown config patch key: {}", key);
2155 }
2156 }
2157 }
2158
2159 config
2160 }
2161
2162 fn build_resource_guard(
2164 config: &GeneratorConfig,
2165 output_path: Option<PathBuf>,
2166 ) -> ResourceGuard {
2167 let mut builder = ResourceGuardBuilder::new();
2168
2169 if config.global.memory_limit_mb > 0 {
2171 builder = builder.memory_limit(config.global.memory_limit_mb);
2172 }
2173
2174 if let Some(path) = output_path {
2176 builder = builder.output_path(path).min_free_disk(100); }
2178
2179 builder = builder.conservative();
2181
2182 builder.build()
2183 }
2184
2185 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2190 self.resource_guard.check()
2191 }
2192
2193 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2195 let level = self.resource_guard.check()?;
2196
2197 if level != DegradationLevel::Normal {
2198 warn!(
2199 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2200 phase,
2201 level,
2202 self.resource_guard.current_memory_mb(),
2203 self.resource_guard.available_disk_mb()
2204 );
2205 }
2206
2207 Ok(level)
2208 }
2209
2210 fn get_degradation_actions(&self) -> DegradationActions {
2212 self.resource_guard.get_actions()
2213 }
2214
2215 fn check_memory_limit(&self) -> SynthResult<()> {
2217 self.check_resources()?;
2218 Ok(())
2219 }
2220
2221 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2223 info!("Starting enhanced generation workflow");
2224 info!(
2225 "Config: industry={:?}, period_months={}, companies={}",
2226 self.config.global.industry,
2227 self.config.global.period_months,
2228 self.config.companies.len()
2229 );
2230
2231 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2234 datasynth_core::serde_decimal::set_numeric_native(is_native);
2235 struct NumericModeGuard;
2236 impl Drop for NumericModeGuard {
2237 fn drop(&mut self) {
2238 datasynth_core::serde_decimal::set_numeric_native(false);
2239 }
2240 }
2241 let _numeric_guard = if is_native {
2242 Some(NumericModeGuard)
2243 } else {
2244 None
2245 };
2246
2247 let initial_level = self.check_resources_with_log("initial")?;
2249 if initial_level == DegradationLevel::Emergency {
2250 return Err(SynthError::resource(
2251 "Insufficient resources to start generation",
2252 ));
2253 }
2254
2255 let mut stats = EnhancedGenerationStatistics {
2256 companies_count: self.config.companies.len(),
2257 period_months: self.config.global.period_months,
2258 ..Default::default()
2259 };
2260
2261 let coa = self.phase_chart_of_accounts(&mut stats)?;
2263
2264 self.phase_master_data(&mut stats)?;
2266
2267 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2269 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2270 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2271
2272 let (mut document_flows, mut subledger, fa_journal_entries) =
2274 self.phase_document_flows(&mut stats)?;
2275
2276 self.emit_phase_items(
2278 "document_flows",
2279 "PurchaseOrder",
2280 &document_flows.purchase_orders,
2281 );
2282 self.emit_phase_items(
2283 "document_flows",
2284 "GoodsReceipt",
2285 &document_flows.goods_receipts,
2286 );
2287 self.emit_phase_items(
2288 "document_flows",
2289 "VendorInvoice",
2290 &document_flows.vendor_invoices,
2291 );
2292 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2293 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2294
2295 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2297
2298 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2303 .iter()
2304 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2305 .collect();
2306 if !opening_balance_jes.is_empty() {
2307 debug!(
2308 "Prepending {} opening balance JEs to entries",
2309 opening_balance_jes.len()
2310 );
2311 }
2312
2313 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2315
2316 if !opening_balance_jes.is_empty() {
2319 let mut combined = opening_balance_jes;
2320 combined.extend(entries);
2321 entries = combined;
2322 }
2323
2324 if !fa_journal_entries.is_empty() {
2326 debug!(
2327 "Appending {} FA acquisition JEs to main entries",
2328 fa_journal_entries.len()
2329 );
2330 entries.extend(fa_journal_entries);
2331 }
2332
2333 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2335
2336 let actions = self.get_degradation_actions();
2338
2339 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2341
2342 if !sourcing.contracts.is_empty() {
2345 let mut linked_count = 0usize;
2346 let po_vendor_pairs: Vec<(String, String)> = document_flows
2348 .p2p_chains
2349 .iter()
2350 .map(|chain| {
2351 (
2352 chain.purchase_order.vendor_id.clone(),
2353 chain.purchase_order.header.document_id.clone(),
2354 )
2355 })
2356 .collect();
2357
2358 for chain in &mut document_flows.p2p_chains {
2359 if chain.purchase_order.contract_id.is_none() {
2360 if let Some(contract) = sourcing
2361 .contracts
2362 .iter()
2363 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2364 {
2365 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2366 linked_count += 1;
2367 }
2368 }
2369 }
2370
2371 for contract in &mut sourcing.contracts {
2373 let po_ids: Vec<String> = po_vendor_pairs
2374 .iter()
2375 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2376 .map(|(_, po_id)| po_id.clone())
2377 .collect();
2378 if !po_ids.is_empty() {
2379 contract.purchase_order_ids = po_ids;
2380 }
2381 }
2382
2383 if linked_count > 0 {
2384 debug!(
2385 "Linked {} purchase orders to S2C contracts by vendor match",
2386 linked_count
2387 );
2388 }
2389 }
2390
2391 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2393
2394 if !intercompany.seller_journal_entries.is_empty()
2396 || !intercompany.buyer_journal_entries.is_empty()
2397 {
2398 let ic_je_count = intercompany.seller_journal_entries.len()
2399 + intercompany.buyer_journal_entries.len();
2400 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2401 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2402 debug!(
2403 "Appended {} IC journal entries to main entries",
2404 ic_je_count
2405 );
2406 }
2407
2408 if !intercompany.elimination_entries.is_empty() {
2410 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2411 &intercompany.elimination_entries,
2412 );
2413 if !elim_jes.is_empty() {
2414 debug!(
2415 "Appended {} elimination journal entries to main entries",
2416 elim_jes.len()
2417 );
2418 let elim_debit: rust_decimal::Decimal =
2420 elim_jes.iter().map(|je| je.total_debit()).sum();
2421 let elim_credit: rust_decimal::Decimal =
2422 elim_jes.iter().map(|je| je.total_credit()).sum();
2423 let elim_diff = (elim_debit - elim_credit).abs();
2424 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2426 return Err(datasynth_core::error::SynthError::generation(format!(
2427 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2428 elim_debit, elim_credit, elim_diff, tolerance
2429 )));
2430 }
2431 debug!(
2432 "IC elimination balance verified: debits={}, credits={} (diff={})",
2433 elim_debit, elim_credit, elim_diff
2434 );
2435 entries.extend(elim_jes);
2436 }
2437 }
2438
2439 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2441 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2442 document_flows
2443 .customer_invoices
2444 .extend(ic_docs.seller_invoices.iter().cloned());
2445 document_flows
2446 .purchase_orders
2447 .extend(ic_docs.buyer_orders.iter().cloned());
2448 document_flows
2449 .goods_receipts
2450 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2451 document_flows
2452 .vendor_invoices
2453 .extend(ic_docs.buyer_invoices.iter().cloned());
2454 debug!(
2455 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2456 ic_docs.seller_invoices.len(),
2457 ic_docs.buyer_orders.len(),
2458 ic_docs.buyer_goods_receipts.len(),
2459 ic_docs.buyer_invoices.len(),
2460 );
2461 }
2462 }
2463
2464 let hr = self.phase_hr_data(&mut stats)?;
2466
2467 if !hr.payroll_runs.is_empty() {
2469 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2470 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2471 entries.extend(payroll_jes);
2472 }
2473
2474 if !hr.pension_journal_entries.is_empty() {
2476 debug!(
2477 "Generated {} JEs from pension plans",
2478 hr.pension_journal_entries.len()
2479 );
2480 entries.extend(hr.pension_journal_entries.iter().cloned());
2481 }
2482
2483 if !hr.stock_comp_journal_entries.is_empty() {
2485 debug!(
2486 "Generated {} JEs from stock-based compensation",
2487 hr.stock_comp_journal_entries.len()
2488 );
2489 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2490 }
2491
2492 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2494
2495 if !manufacturing_snap.production_orders.is_empty() {
2497 let currency = self
2498 .config
2499 .companies
2500 .first()
2501 .map(|c| c.currency.as_str())
2502 .unwrap_or("USD");
2503 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2504 &manufacturing_snap.production_orders,
2505 &manufacturing_snap.quality_inspections,
2506 currency,
2507 );
2508 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2509 entries.extend(mfg_jes);
2510 }
2511
2512 if !manufacturing_snap.quality_inspections.is_empty() {
2514 let framework = match self.config.accounting_standards.framework {
2515 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2516 _ => "US_GAAP",
2517 };
2518 for company in &self.config.companies {
2519 let company_orders: Vec<_> = manufacturing_snap
2520 .production_orders
2521 .iter()
2522 .filter(|o| o.company_code == company.code)
2523 .cloned()
2524 .collect();
2525 let company_inspections: Vec<_> = manufacturing_snap
2526 .quality_inspections
2527 .iter()
2528 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2529 .cloned()
2530 .collect();
2531 if company_inspections.is_empty() {
2532 continue;
2533 }
2534 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2535 let warranty_result = warranty_gen.generate(
2536 &company.code,
2537 &company_orders,
2538 &company_inspections,
2539 &company.currency,
2540 framework,
2541 );
2542 if !warranty_result.journal_entries.is_empty() {
2543 debug!(
2544 "Generated {} warranty provision JEs for {}",
2545 warranty_result.journal_entries.len(),
2546 company.code
2547 );
2548 entries.extend(warranty_result.journal_entries);
2549 }
2550 }
2551 }
2552
2553 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2555 {
2556 let cogs_currency = self
2557 .config
2558 .companies
2559 .first()
2560 .map(|c| c.currency.as_str())
2561 .unwrap_or("USD");
2562 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2563 &document_flows.deliveries,
2564 &manufacturing_snap.production_orders,
2565 cogs_currency,
2566 );
2567 if !cogs_jes.is_empty() {
2568 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2569 entries.extend(cogs_jes);
2570 }
2571 }
2572
2573 if !manufacturing_snap.inventory_movements.is_empty()
2579 && !subledger.inventory_positions.is_empty()
2580 {
2581 use datasynth_core::models::MovementType as MfgMovementType;
2582 let mut receipt_count = 0usize;
2583 let mut issue_count = 0usize;
2584 for movement in &manufacturing_snap.inventory_movements {
2585 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2587 p.material_id == movement.material_code
2588 && p.company_code == movement.entity_code
2589 }) {
2590 match movement.movement_type {
2591 MfgMovementType::GoodsReceipt => {
2592 pos.add_quantity(
2594 movement.quantity,
2595 movement.value,
2596 movement.movement_date,
2597 );
2598 receipt_count += 1;
2599 }
2600 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2601 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2603 issue_count += 1;
2604 }
2605 _ => {}
2606 }
2607 }
2608 }
2609 debug!(
2610 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2611 manufacturing_snap.inventory_movements.len(),
2612 receipt_count,
2613 issue_count,
2614 );
2615 }
2616
2617 if !entries.is_empty() {
2620 stats.total_entries = entries.len() as u64;
2621 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2622 debug!(
2623 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2624 stats.total_entries, stats.total_line_items
2625 );
2626 }
2627
2628 if self.config.internal_controls.enabled && !entries.is_empty() {
2630 info!("Phase 7b: Applying internal controls to journal entries");
2631 let control_config = ControlGeneratorConfig {
2632 exception_rate: self.config.internal_controls.exception_rate,
2633 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2634 enable_sox_marking: true,
2635 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2636 self.config.internal_controls.sox_materiality_threshold,
2637 )
2638 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2639 ..Default::default()
2640 };
2641 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2642 for entry in &mut entries {
2643 control_gen.apply_controls(entry, &coa);
2644 }
2645 let with_controls = entries
2646 .iter()
2647 .filter(|e| !e.header.control_ids.is_empty())
2648 .count();
2649 info!(
2650 "Applied controls to {} entries ({} with control IDs assigned)",
2651 entries.len(),
2652 with_controls
2653 );
2654 }
2655
2656 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2660 .iter()
2661 .filter(|e| e.header.sod_violation)
2662 .filter_map(|e| {
2663 e.header.sod_conflict_type.map(|ct| {
2664 use datasynth_core::models::{RiskLevel, SodViolation};
2665 let severity = match ct {
2666 datasynth_core::models::SodConflictType::PaymentReleaser
2667 | datasynth_core::models::SodConflictType::RequesterApprover => {
2668 RiskLevel::Critical
2669 }
2670 datasynth_core::models::SodConflictType::PreparerApprover
2671 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2672 | datasynth_core::models::SodConflictType::JournalEntryPoster
2673 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2674 RiskLevel::High
2675 }
2676 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2677 RiskLevel::Medium
2678 }
2679 };
2680 let action = format!(
2681 "SoD conflict {:?} on entry {} ({})",
2682 ct, e.header.document_id, e.header.company_code
2683 );
2684 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2685 })
2686 })
2687 .collect();
2688 if !sod_violations.is_empty() {
2689 info!(
2690 "Phase 7c: Extracted {} SoD violations from {} entries",
2691 sod_violations.len(),
2692 entries.len()
2693 );
2694 }
2695
2696 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2698
2699 {
2707 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2708 if self.config.fraud.enabled && doc_rate > 0.0 {
2709 use datasynth_core::fraud_propagation::{
2710 inject_document_fraud, propagate_documents_to_entries,
2711 };
2712 use datasynth_core::utils::weighted_select;
2713 use datasynth_core::FraudType;
2714 use rand_chacha::rand_core::SeedableRng;
2715
2716 let dist = &self.config.fraud.fraud_type_distribution;
2717 let fraud_type_weights: [(FraudType, f64); 8] = [
2718 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2719 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2720 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2721 (
2722 FraudType::ImproperCapitalization,
2723 dist.expense_capitalization,
2724 ),
2725 (FraudType::SplitTransaction, dist.split_transaction),
2726 (FraudType::TimingAnomaly, dist.timing_anomaly),
2727 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2728 (FraudType::DuplicatePayment, dist.duplicate_payment),
2729 ];
2730 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2731 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2732 if weights_sum <= 0.0 {
2733 FraudType::FictitiousEntry
2734 } else {
2735 *weighted_select(rng, &fraud_type_weights)
2736 }
2737 };
2738
2739 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2740 let mut doc_tagged = 0usize;
2741 macro_rules! inject_into {
2742 ($collection:expr) => {{
2743 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2744 $collection.iter_mut().map(|d| &mut d.header).collect();
2745 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2746 }};
2747 }
2748 inject_into!(document_flows.purchase_orders);
2749 inject_into!(document_flows.goods_receipts);
2750 inject_into!(document_flows.vendor_invoices);
2751 inject_into!(document_flows.payments);
2752 inject_into!(document_flows.sales_orders);
2753 inject_into!(document_flows.deliveries);
2754 inject_into!(document_flows.customer_invoices);
2755 if doc_tagged > 0 {
2756 info!(
2757 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2758 );
2759 }
2760
2761 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2762 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2763 Vec::new();
2764 headers.extend(
2765 document_flows
2766 .purchase_orders
2767 .iter()
2768 .map(|d| d.header.clone()),
2769 );
2770 headers.extend(
2771 document_flows
2772 .goods_receipts
2773 .iter()
2774 .map(|d| d.header.clone()),
2775 );
2776 headers.extend(
2777 document_flows
2778 .vendor_invoices
2779 .iter()
2780 .map(|d| d.header.clone()),
2781 );
2782 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2783 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2784 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2785 headers.extend(
2786 document_flows
2787 .customer_invoices
2788 .iter()
2789 .map(|d| d.header.clone()),
2790 );
2791 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2792 if propagated > 0 {
2793 info!(
2794 "Propagated document-level fraud to {propagated} derived journal entries"
2795 );
2796 }
2797 }
2798 }
2799 }
2800
2801 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2803
2804 {
2822 use datasynth_core::fraud_bias::{
2823 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2824 };
2825 use rand_chacha::rand_core::SeedableRng;
2826 let cfg = FraudBehavioralBiasConfig::default();
2827 if cfg.enabled {
2828 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2829 let mut swept = 0usize;
2830 for entry in entries.iter_mut() {
2831 if entry.header.is_fraud && !entry.header.is_anomaly {
2832 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2833 swept += 1;
2834 }
2835 }
2836 if swept > 0 {
2837 info!(
2838 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2839 (doc-propagated + je_generator intrinsic fraud)"
2840 );
2841 }
2842 }
2843 }
2844
2845 self.emit_phase_items(
2847 "anomaly_injection",
2848 "LabeledAnomaly",
2849 &anomaly_labels.labels,
2850 );
2851
2852 if self.config.fraud.propagate_to_document {
2860 use std::collections::HashMap;
2861 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2874 for je in &entries {
2875 if je.header.is_fraud {
2876 if let Some(ref fraud_type) = je.header.fraud_type {
2877 if let Some(ref reference) = je.header.reference {
2878 fraud_map.insert(reference.clone(), *fraud_type);
2880 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2883 if !bare.is_empty() {
2884 fraud_map.insert(bare.to_string(), *fraud_type);
2885 }
2886 }
2887 }
2888 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2890 }
2891 }
2892 }
2893 if !fraud_map.is_empty() {
2894 let mut propagated = 0usize;
2895 macro_rules! propagate_to {
2897 ($collection:expr) => {
2898 for doc in &mut $collection {
2899 if doc.header.propagate_fraud(&fraud_map) {
2900 propagated += 1;
2901 }
2902 }
2903 };
2904 }
2905 propagate_to!(document_flows.purchase_orders);
2906 propagate_to!(document_flows.goods_receipts);
2907 propagate_to!(document_flows.vendor_invoices);
2908 propagate_to!(document_flows.payments);
2909 propagate_to!(document_flows.sales_orders);
2910 propagate_to!(document_flows.deliveries);
2911 propagate_to!(document_flows.customer_invoices);
2912 if propagated > 0 {
2913 info!(
2914 "Propagated fraud labels to {} document flow records",
2915 propagated
2916 );
2917 }
2918 }
2919 }
2920
2921 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2923
2924 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2926
2927 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2929
2930 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2932
2933 let balance_validation = self.phase_balance_validation(&entries)?;
2935
2936 self.validate_coa_coverage(&entries, coa.as_ref())?;
2940
2941 let subledger_reconciliation =
2943 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2944
2945 let (data_quality_stats, quality_issues) =
2947 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2948
2949 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2951
2952 {
2954 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2959 for je in &entries {
2960 if je.header.is_fraud || je.header.is_anomaly {
2961 continue;
2962 }
2963 let diff = (je.total_debit() - je.total_credit()).abs();
2964 if diff > tolerance {
2965 unbalanced_clean += 1;
2966 if unbalanced_clean <= 3 {
2967 warn!(
2968 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2969 je.header.document_id,
2970 je.total_debit(),
2971 je.total_credit(),
2972 diff
2973 );
2974 }
2975 }
2976 }
2977 if unbalanced_clean > 0 {
2978 return Err(datasynth_core::error::SynthError::generation(format!(
2979 "{} non-anomaly JEs are unbalanced (debits != credits). \
2980 First few logged above. Tolerance={}",
2981 unbalanced_clean, tolerance
2982 )));
2983 }
2984 debug!(
2985 "Phase 10c: All {} non-anomaly JEs individually balanced",
2986 entries
2987 .iter()
2988 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2989 .count()
2990 );
2991
2992 let company_codes: Vec<String> = self
2994 .config
2995 .companies
2996 .iter()
2997 .map(|c| c.code.clone())
2998 .collect();
2999 for company_code in &company_codes {
3000 let mut assets = rust_decimal::Decimal::ZERO;
3001 let mut liab_equity = rust_decimal::Decimal::ZERO;
3002
3003 for entry in &entries {
3004 if entry.header.company_code != *company_code {
3005 continue;
3006 }
3007 for line in &entry.lines {
3008 let acct = &line.gl_account;
3009 let net = line.debit_amount - line.credit_amount;
3010 if acct.starts_with('1') {
3012 assets += net;
3013 }
3014 else if acct.starts_with('2') || acct.starts_with('3') {
3016 liab_equity -= net; }
3018 }
3021 }
3022
3023 let bs_diff = (assets - liab_equity).abs();
3024 if bs_diff > tolerance {
3025 warn!(
3026 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3027 revenue/expense closing entries may not fully offset",
3028 company_code, assets, liab_equity, bs_diff
3029 );
3030 } else {
3034 debug!(
3035 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3036 company_code, assets, liab_equity, bs_diff
3037 );
3038 }
3039 }
3040
3041 info!("Phase 10c: All generation-time accounting assertions passed");
3042 }
3043
3044 let audit = self.phase_audit_data(&entries, &mut stats)?;
3046
3047 let mut banking = self.phase_banking_data(&mut stats)?;
3049
3050 if self.phase_config.generate_banking
3055 && !document_flows.payments.is_empty()
3056 && !banking.accounts.is_empty()
3057 {
3058 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3059 if bridge_rate > 0.0 {
3060 let mut bridge =
3061 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3062 self.seed,
3063 );
3064 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3065 &document_flows.payments,
3066 &banking.customers,
3067 &banking.accounts,
3068 bridge_rate,
3069 );
3070 info!(
3071 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3072 bridge_stats.bridged_count,
3073 bridge_stats.transactions_emitted,
3074 bridge_stats.fraud_propagated,
3075 );
3076 let bridged_count = bridged_txns.len();
3077 banking.transactions.extend(bridged_txns);
3078
3079 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3082 datasynth_banking::generators::velocity_computer::compute_velocity_features(
3083 &mut banking.transactions,
3084 );
3085 }
3086
3087 banking.suspicious_count = banking
3089 .transactions
3090 .iter()
3091 .filter(|t| t.is_suspicious)
3092 .count();
3093 stats.banking_transaction_count = banking.transactions.len();
3094 stats.banking_suspicious_count = banking.suspicious_count;
3095 }
3096 }
3097
3098 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3100
3101 self.phase_llm_enrichment(&mut stats);
3103
3104 self.phase_diffusion_enhancement(&entries, &mut stats);
3106
3107 self.phase_causal_overlay(&mut stats);
3109
3110 let mut financial_reporting = self.phase_financial_reporting(
3114 &document_flows,
3115 &entries,
3116 &coa,
3117 &hr,
3118 &audit,
3119 &mut stats,
3120 )?;
3121
3122 {
3124 use datasynth_core::models::StatementType;
3125 for stmt in &financial_reporting.consolidated_statements {
3126 if stmt.statement_type == StatementType::BalanceSheet {
3127 let total_assets: rust_decimal::Decimal = stmt
3128 .line_items
3129 .iter()
3130 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3131 .map(|li| li.amount)
3132 .sum();
3133 let total_le: rust_decimal::Decimal = stmt
3134 .line_items
3135 .iter()
3136 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3137 .map(|li| li.amount)
3138 .sum();
3139 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3140 warn!(
3141 "BS equation imbalance: assets={}, L+E={}",
3142 total_assets, total_le
3143 );
3144 }
3145 }
3146 }
3147 }
3148
3149 let accounting_standards =
3151 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3152
3153 if !accounting_standards.ecl_journal_entries.is_empty() {
3155 debug!(
3156 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3157 accounting_standards.ecl_journal_entries.len()
3158 );
3159 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3160 }
3161
3162 if !accounting_standards.provision_journal_entries.is_empty() {
3164 debug!(
3165 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3166 accounting_standards.provision_journal_entries.len()
3167 );
3168 entries.extend(
3169 accounting_standards
3170 .provision_journal_entries
3171 .iter()
3172 .cloned(),
3173 );
3174 }
3175
3176 let mut ocpm = self.phase_ocpm_events(
3178 &document_flows,
3179 &sourcing,
3180 &hr,
3181 &manufacturing_snap,
3182 &banking,
3183 &audit,
3184 &financial_reporting,
3185 &mut stats,
3186 )?;
3187
3188 if let Some(ref event_log) = ocpm.event_log {
3190 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3191 }
3192
3193 if let Some(ref event_log) = ocpm.event_log {
3195 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3197 std::collections::HashMap::new();
3198 for (idx, event) in event_log.events.iter().enumerate() {
3199 if let Some(ref doc_ref) = event.document_ref {
3200 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3201 }
3202 }
3203
3204 if !doc_index.is_empty() {
3205 let mut annotated = 0usize;
3206 for entry in &mut entries {
3207 let doc_id_str = entry.header.document_id.to_string();
3208 let mut matched_indices: Vec<usize> = Vec::new();
3210 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3211 matched_indices.extend(indices);
3212 }
3213 if let Some(ref reference) = entry.header.reference {
3214 let bare_ref = reference
3215 .find(':')
3216 .map(|i| &reference[i + 1..])
3217 .unwrap_or(reference.as_str());
3218 if let Some(indices) = doc_index.get(bare_ref) {
3219 for &idx in indices {
3220 if !matched_indices.contains(&idx) {
3221 matched_indices.push(idx);
3222 }
3223 }
3224 }
3225 }
3226 if !matched_indices.is_empty() {
3228 for &idx in &matched_indices {
3229 let event = &event_log.events[idx];
3230 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3231 entry.header.ocpm_event_ids.push(event.event_id);
3232 }
3233 for obj_ref in &event.object_refs {
3234 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3235 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3236 }
3237 }
3238 if entry.header.ocpm_case_id.is_none() {
3239 entry.header.ocpm_case_id = event.case_id;
3240 }
3241 }
3242 annotated += 1;
3243 }
3244 }
3245 debug!(
3246 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3247 annotated
3248 );
3249 }
3250 }
3251
3252 if let Some(ref mut event_log) = ocpm.event_log {
3256 let synthesized =
3257 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3258 if synthesized > 0 {
3259 info!(
3260 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3261 );
3262 }
3263
3264 let anomaly_events =
3269 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3270 if anomaly_events > 0 {
3271 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3272 }
3273
3274 let p2p_cfg = &self.config.ocpm.p2p_process;
3279 let any_imperfection = p2p_cfg.rework_probability > 0.0
3280 || p2p_cfg.skip_step_probability > 0.0
3281 || p2p_cfg.out_of_order_probability > 0.0;
3282 if any_imperfection {
3283 use rand_chacha::rand_core::SeedableRng;
3284 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3285 rework_rate: p2p_cfg.rework_probability,
3286 skip_rate: p2p_cfg.skip_step_probability,
3287 out_of_order_rate: p2p_cfg.out_of_order_probability,
3288 };
3289 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3290 let stats =
3291 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3292 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3293 info!(
3294 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3295 stats.rework, stats.skipped, stats.out_of_order
3296 );
3297 }
3298 }
3299 }
3300
3301 let sales_kpi_budgets =
3303 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3304
3305 let treasury =
3309 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3310
3311 if !treasury.journal_entries.is_empty() {
3313 debug!(
3314 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3315 treasury.journal_entries.len()
3316 );
3317 entries.extend(treasury.journal_entries.iter().cloned());
3318 }
3319
3320 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3322
3323 if !tax.tax_posting_journal_entries.is_empty() {
3325 debug!(
3326 "Merging {} tax posting JEs into GL",
3327 tax.tax_posting_journal_entries.len()
3328 );
3329 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3330 }
3331
3332 {
3350 use datasynth_core::fraud_bias::{
3351 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3352 };
3353 use rand_chacha::rand_core::SeedableRng;
3354 let cfg = FraudBehavioralBiasConfig::default();
3355 if cfg.enabled {
3356 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3357 let mut swept = 0usize;
3358 for entry in entries.iter_mut() {
3359 if entry.header.is_fraud && !entry.header.is_anomaly {
3360 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3361 swept += 1;
3362 }
3363 }
3364 if swept > 0 {
3365 info!(
3366 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3367 non-anomaly fraud entries (covers late-added JEs from \
3368 ECL / provisions / treasury / tax / period-close)"
3369 );
3370 }
3371 }
3372 }
3373
3374 {
3378 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3379
3380 let framework_str = {
3381 use datasynth_config::schema::AccountingFrameworkConfig;
3382 match self
3383 .config
3384 .accounting_standards
3385 .framework
3386 .unwrap_or_default()
3387 {
3388 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3389 "IFRS"
3390 }
3391 _ => "US_GAAP",
3392 }
3393 };
3394
3395 let depreciation_total: rust_decimal::Decimal = entries
3397 .iter()
3398 .filter(|je| je.header.document_type == "CL")
3399 .flat_map(|je| je.lines.iter())
3400 .filter(|l| l.gl_account.starts_with("6000"))
3401 .map(|l| l.debit_amount)
3402 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3403
3404 let interest_paid: rust_decimal::Decimal = entries
3406 .iter()
3407 .flat_map(|je| je.lines.iter())
3408 .filter(|l| l.gl_account.starts_with("7100"))
3409 .map(|l| l.debit_amount)
3410 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3411
3412 let tax_paid: rust_decimal::Decimal = entries
3414 .iter()
3415 .flat_map(|je| je.lines.iter())
3416 .filter(|l| l.gl_account.starts_with("8000"))
3417 .map(|l| l.debit_amount)
3418 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3419
3420 let capex: rust_decimal::Decimal = entries
3422 .iter()
3423 .flat_map(|je| je.lines.iter())
3424 .filter(|l| l.gl_account.starts_with("1500"))
3425 .map(|l| l.debit_amount)
3426 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3427
3428 let dividends_paid: rust_decimal::Decimal = entries
3430 .iter()
3431 .flat_map(|je| je.lines.iter())
3432 .filter(|l| l.gl_account == "2170")
3433 .map(|l| l.debit_amount)
3434 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3435
3436 let cf_data = CashFlowSourceData {
3437 depreciation_total,
3438 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3440 delta_ap: rust_decimal::Decimal::ZERO,
3441 delta_inventory: rust_decimal::Decimal::ZERO,
3442 capex,
3443 debt_issuance: rust_decimal::Decimal::ZERO,
3444 debt_repayment: rust_decimal::Decimal::ZERO,
3445 interest_paid,
3446 tax_paid,
3447 dividends_paid,
3448 framework: framework_str.to_string(),
3449 };
3450
3451 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3452 if !enhanced_cf_items.is_empty() {
3453 use datasynth_core::models::StatementType;
3455 let merge_count = enhanced_cf_items.len();
3456 for stmt in financial_reporting
3457 .financial_statements
3458 .iter_mut()
3459 .chain(financial_reporting.consolidated_statements.iter_mut())
3460 .chain(
3461 financial_reporting
3462 .standalone_statements
3463 .values_mut()
3464 .flat_map(|v| v.iter_mut()),
3465 )
3466 {
3467 if stmt.statement_type == StatementType::CashFlowStatement {
3468 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3469 }
3470 }
3471 info!(
3472 "Enhanced cash flow: {} supplementary items merged into CF statements",
3473 merge_count
3474 );
3475 }
3476 }
3477
3478 self.generate_notes_to_financial_statements(
3481 &mut financial_reporting,
3482 &accounting_standards,
3483 &tax,
3484 &hr,
3485 &audit,
3486 &treasury,
3487 );
3488
3489 if self.config.companies.len() >= 2 && !entries.is_empty() {
3493 let companies: Vec<(String, String)> = self
3494 .config
3495 .companies
3496 .iter()
3497 .map(|c| (c.code.clone(), c.name.clone()))
3498 .collect();
3499 let ic_elim: rust_decimal::Decimal =
3500 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3501 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3502 .unwrap_or(NaiveDate::MIN);
3503 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3504 let period_label = format!(
3505 "{}-{:02}",
3506 end_date.year(),
3507 (end_date - chrono::Days::new(1)).month()
3508 );
3509
3510 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3511 let (je_segments, je_recon) =
3512 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3513 if !je_segments.is_empty() {
3514 info!(
3515 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3516 je_segments.len(),
3517 ic_elim,
3518 );
3519 if financial_reporting.segment_reports.is_empty() {
3521 financial_reporting.segment_reports = je_segments;
3522 financial_reporting.segment_reconciliations = vec![je_recon];
3523 } else {
3524 financial_reporting.segment_reports.extend(je_segments);
3525 financial_reporting.segment_reconciliations.push(je_recon);
3526 }
3527 }
3528 }
3529
3530 let esg_snap =
3532 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3533
3534 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3536
3537 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3539
3540 let disruption_events = self.phase_disruption_events(&mut stats)?;
3542
3543 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3545
3546 let (entity_relationship_graph, cross_process_links) =
3548 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3549
3550 let industry_output = self.phase_industry_data(&mut stats);
3552
3553 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3555
3556 if self.config.diffusion.enabled
3574 && (self.config.diffusion.backend == "neural"
3575 || self.config.diffusion.backend == "hybrid")
3576 {
3577 let neural = &self.config.diffusion.neural;
3578 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3579 stats.neural_hybrid_weight = Some(weight);
3580 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3581 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3582 warn!(
3583 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3584 the neural/hybrid training path is not yet shipped. Config \
3585 is captured in stats (weight={weight:.2}, strategy={}, \
3586 columns={}) but no neural training runs. Statistical \
3587 diffusion (backend='statistical') continues to work.",
3588 self.config.diffusion.backend,
3589 neural.hybrid_strategy,
3590 neural.neural_columns.len(),
3591 );
3592 }
3593
3594 self.phase_hypergraph_export(
3596 &coa,
3597 &entries,
3598 &document_flows,
3599 &sourcing,
3600 &hr,
3601 &manufacturing_snap,
3602 &banking,
3603 &audit,
3604 &financial_reporting,
3605 &ocpm,
3606 &compliance_regulations,
3607 &mut stats,
3608 )?;
3609
3610 if self.phase_config.generate_graph_export {
3613 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3614 }
3615
3616 if self.config.streaming.enabled {
3618 info!("Note: streaming config is enabled but batch mode does not use it");
3619 }
3620 if self.config.vendor_network.enabled {
3621 debug!("Vendor network config available; relationship graph generation is partial");
3622 }
3623 if self.config.customer_segmentation.enabled {
3624 debug!("Customer segmentation config available; segment-aware generation is partial");
3625 }
3626
3627 let resource_stats = self.resource_guard.stats();
3629 info!(
3630 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3631 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3632 resource_stats.disk.estimated_bytes_written,
3633 resource_stats.degradation_level
3634 );
3635
3636 if let Some(ref sink) = self.phase_sink {
3638 if let Err(e) = sink.flush() {
3639 warn!("Stream sink flush failed: {e}");
3640 }
3641 }
3642
3643 let lineage = self.build_lineage_graph();
3645
3646 let gate_result = if self.config.quality_gates.enabled {
3648 let profile_name = &self.config.quality_gates.profile;
3649 match datasynth_eval::gates::get_profile(profile_name) {
3650 Some(profile) => {
3651 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3653
3654 if balance_validation.validated {
3656 eval.coherence.balance =
3657 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3658 equation_balanced: balance_validation.is_balanced,
3659 max_imbalance: (balance_validation.total_debits
3660 - balance_validation.total_credits)
3661 .abs(),
3662 periods_evaluated: 1,
3663 periods_imbalanced: if balance_validation.is_balanced {
3664 0
3665 } else {
3666 1
3667 },
3668 period_results: Vec::new(),
3669 companies_evaluated: self.config.companies.len(),
3670 });
3671 }
3672
3673 eval.coherence.passes = balance_validation.is_balanced;
3675 if !balance_validation.is_balanced {
3676 eval.coherence
3677 .failures
3678 .push("Balance sheet equation not satisfied".to_string());
3679 }
3680
3681 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3683 eval.statistical.passes = !entries.is_empty();
3684
3685 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3688
3689 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3690 info!(
3691 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3692 profile_name, result.gates_passed, result.gates_total, result.summary
3693 );
3694 Some(result)
3695 }
3696 None => {
3697 warn!(
3698 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3699 profile_name
3700 );
3701 None
3702 }
3703 }
3704 } else {
3705 None
3706 };
3707
3708 let internal_controls = if self.config.internal_controls.enabled {
3710 InternalControl::standard_controls()
3711 } else {
3712 Vec::new()
3713 };
3714
3715 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3719
3720 let statistical_validation = self.phase_statistical_validation(&entries)?;
3725
3726 let interconnectivity = self.phase_interconnectivity();
3730
3731 Ok(EnhancedGenerationResult {
3732 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3733 master_data: std::mem::take(&mut self.master_data),
3734 document_flows,
3735 subledger,
3736 ocpm,
3737 audit,
3738 banking,
3739 graph_export,
3740 sourcing,
3741 financial_reporting,
3742 hr,
3743 accounting_standards,
3744 manufacturing: manufacturing_snap,
3745 sales_kpi_budgets,
3746 tax,
3747 esg: esg_snap,
3748 treasury,
3749 project_accounting,
3750 process_evolution,
3751 organizational_events,
3752 disruption_events,
3753 intercompany,
3754 journal_entries: entries,
3755 anomaly_labels,
3756 balance_validation,
3757 data_quality_stats,
3758 quality_issues,
3759 statistics: stats,
3760 lineage: Some(lineage),
3761 gate_result,
3762 internal_controls,
3763 sod_violations,
3764 opening_balances,
3765 subledger_reconciliation,
3766 counterfactual_pairs,
3767 red_flags,
3768 collusion_rings,
3769 temporal_vendor_chains,
3770 entity_relationship_graph,
3771 cross_process_links,
3772 industry_output,
3773 compliance_regulations,
3774 analytics_metadata,
3775 statistical_validation,
3776 interconnectivity,
3777 })
3778 }
3779
3780 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3784 use rand::{RngExt, SeedableRng};
3785 use rand_chacha::ChaCha8Rng;
3786
3787 let mut snap = InterconnectivitySnapshot::default();
3788 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3789
3790 let vn = &self.config.vendor_network;
3792 if vn.enabled {
3793 let total = self.master_data.vendors.len();
3794 if total > 0 {
3795 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3796 let remaining_after_t1 = total.saturating_sub(tier1_count);
3797 let depth = vn.depth.clamp(1, 3);
3798 let tier2_count = if depth >= 2 {
3799 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3800 (tier1_count * avg).min(remaining_after_t1)
3801 } else {
3802 0
3803 };
3804 let tier3_count = total
3805 .saturating_sub(tier1_count)
3806 .saturating_sub(tier2_count);
3807
3808 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3809 let tier = if idx < tier1_count {
3810 1
3811 } else if idx < tier1_count + tier2_count {
3812 2
3813 } else {
3814 3
3815 };
3816 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3817
3818 let cl = &vn.clusters;
3820 let roll: f64 = rng.random();
3821 let cluster = if roll < cl.reliable_strategic {
3822 "reliable_strategic"
3823 } else if roll < cl.reliable_strategic + cl.standard_operational {
3824 "standard_operational"
3825 } else if roll
3826 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3827 {
3828 "transactional"
3829 } else {
3830 "problematic"
3831 };
3832 snap.vendor_clusters
3833 .push((vendor.vendor_id.clone(), cluster.to_string()));
3834 }
3835 let _ = tier3_count; }
3837 }
3838
3839 let cs = &self.config.customer_segmentation;
3841 if cs.enabled {
3842 let seg = &cs.value_segments;
3843 for customer in &self.master_data.customers {
3844 let roll: f64 = rng.random();
3845 let value_segment = if roll < seg.enterprise.customer_share {
3846 "enterprise"
3847 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3848 "mid_market"
3849 } else if roll
3850 < seg.enterprise.customer_share
3851 + seg.mid_market.customer_share
3852 + seg.smb.customer_share
3853 {
3854 "smb"
3855 } else {
3856 "consumer"
3857 };
3858 snap.customer_value_segments
3859 .push((customer.customer_id.clone(), value_segment.to_string()));
3860
3861 let roll2: f64 = rng.random();
3862 let life = &cs.lifecycle;
3863 let lifecycle = if roll2 < life.prospect_rate {
3864 "prospect"
3865 } else if roll2 < life.prospect_rate + life.new_rate {
3866 "new"
3867 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3868 "growth"
3869 } else if roll2
3870 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3871 {
3872 "mature"
3873 } else if roll2
3874 < life.prospect_rate
3875 + life.new_rate
3876 + life.growth_rate
3877 + life.mature_rate
3878 + life.at_risk_rate
3879 {
3880 "at_risk"
3881 } else if roll2
3882 < life.prospect_rate
3883 + life.new_rate
3884 + life.growth_rate
3885 + life.mature_rate
3886 + life.at_risk_rate
3887 + life.churned_rate
3888 {
3889 "churned"
3890 } else {
3891 "won_back"
3892 };
3893 snap.customer_lifecycle_stages
3894 .push((customer.customer_id.clone(), lifecycle.to_string()));
3895 }
3896 }
3897
3898 let is = &self.config.industry_specific;
3900 if is.enabled {
3901 snap.industry_metadata.push(format!(
3902 "industry_specific.enabled=true (industry={:?})",
3903 self.config.global.industry
3904 ));
3905 }
3906
3907 snap
3908 }
3909
3910 fn phase_chart_of_accounts(
3916 &mut self,
3917 stats: &mut EnhancedGenerationStatistics,
3918 ) -> SynthResult<Arc<ChartOfAccounts>> {
3919 info!("Phase 1: Generating Chart of Accounts");
3920 let coa = self.generate_coa()?;
3921 stats.accounts_count = coa.account_count();
3922 info!(
3923 "Chart of Accounts generated: {} accounts",
3924 stats.accounts_count
3925 );
3926 self.check_resources_with_log("post-coa")?;
3927 Ok(coa)
3928 }
3929
3930 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3932 if self.phase_config.generate_master_data {
3933 info!("Phase 2: Generating Master Data");
3934 self.generate_master_data()?;
3935 stats.vendor_count = self.master_data.vendors.len();
3936 stats.customer_count = self.master_data.customers.len();
3937 stats.material_count = self.master_data.materials.len();
3938 stats.asset_count = self.master_data.assets.len();
3939 stats.employee_count = self.master_data.employees.len();
3940 info!(
3941 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3942 stats.vendor_count, stats.customer_count, stats.material_count,
3943 stats.asset_count, stats.employee_count
3944 );
3945 self.check_resources_with_log("post-master-data")?;
3946 } else {
3947 debug!("Phase 2: Skipped (master data generation disabled)");
3948 }
3949 Ok(())
3950 }
3951
3952 fn phase_document_flows(
3954 &mut self,
3955 stats: &mut EnhancedGenerationStatistics,
3956 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3957 let mut document_flows = DocumentFlowSnapshot::default();
3958 let mut subledger = SubledgerSnapshot::default();
3959 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3962
3963 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3964 info!("Phase 3: Generating Document Flows");
3965 self.generate_document_flows(&mut document_flows)?;
3966 stats.p2p_chain_count = document_flows.p2p_chains.len();
3967 stats.o2c_chain_count = document_flows.o2c_chains.len();
3968 info!(
3969 "Document flows generated: {} P2P chains, {} O2C chains",
3970 stats.p2p_chain_count, stats.o2c_chain_count
3971 );
3972
3973 debug!("Phase 3b: Linking document flows to subledgers");
3975 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3976 stats.ap_invoice_count = subledger.ap_invoices.len();
3977 stats.ar_invoice_count = subledger.ar_invoices.len();
3978 debug!(
3979 "Subledgers linked: {} AP invoices, {} AR invoices",
3980 stats.ap_invoice_count, stats.ar_invoice_count
3981 );
3982
3983 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3988 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3989 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3990 debug!("Payment settlements applied to AP and AR subledgers");
3991
3992 if let Ok(start_date) =
3995 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3996 {
3997 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3998 - chrono::Days::new(1);
3999 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4000 for company in &self.config.companies {
4007 let ar_report = ARAgingReport::from_invoices(
4008 company.code.clone(),
4009 &subledger.ar_invoices,
4010 as_of_date,
4011 );
4012 subledger.ar_aging_reports.push(ar_report);
4013
4014 let ap_report = APAgingReport::from_invoices(
4015 company.code.clone(),
4016 &subledger.ap_invoices,
4017 as_of_date,
4018 );
4019 subledger.ap_aging_reports.push(ap_report);
4020 }
4021 debug!(
4022 "AR/AP aging reports built: {} AR, {} AP",
4023 subledger.ar_aging_reports.len(),
4024 subledger.ap_aging_reports.len()
4025 );
4026
4027 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4029 {
4030 use datasynth_generators::DunningGenerator;
4031 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4032 for company in &self.config.companies {
4033 let currency = company.currency.as_str();
4034 let mut company_invoices: Vec<
4037 datasynth_core::models::subledger::ar::ARInvoice,
4038 > = subledger
4039 .ar_invoices
4040 .iter()
4041 .filter(|inv| inv.company_code == company.code)
4042 .cloned()
4043 .collect();
4044
4045 if company_invoices.is_empty() {
4046 continue;
4047 }
4048
4049 let result = dunning_gen.execute_dunning_run(
4050 &company.code,
4051 as_of_date,
4052 &mut company_invoices,
4053 currency,
4054 );
4055
4056 for updated in &company_invoices {
4058 if let Some(orig) = subledger
4059 .ar_invoices
4060 .iter_mut()
4061 .find(|i| i.invoice_number == updated.invoice_number)
4062 {
4063 orig.dunning_info = updated.dunning_info.clone();
4064 }
4065 }
4066
4067 subledger.dunning_runs.push(result.dunning_run);
4068 subledger.dunning_letters.extend(result.letters);
4069 dunning_journal_entries.extend(result.journal_entries);
4071 }
4072 debug!(
4073 "Dunning runs complete: {} runs, {} letters",
4074 subledger.dunning_runs.len(),
4075 subledger.dunning_letters.len()
4076 );
4077 }
4078 }
4079
4080 self.check_resources_with_log("post-document-flows")?;
4081 } else {
4082 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4083 }
4084
4085 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4087 if !self.master_data.assets.is_empty() {
4088 debug!("Generating FA subledger records");
4089 let company_code = self
4090 .config
4091 .companies
4092 .first()
4093 .map(|c| c.code.as_str())
4094 .unwrap_or("1000");
4095 let currency = self
4096 .config
4097 .companies
4098 .first()
4099 .map(|c| c.currency.as_str())
4100 .unwrap_or("USD");
4101
4102 let mut fa_gen = datasynth_generators::FAGenerator::new(
4103 datasynth_generators::FAGeneratorConfig::default(),
4104 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4105 );
4106
4107 for asset in &self.master_data.assets {
4108 let (record, je) = fa_gen.generate_asset_acquisition(
4109 company_code,
4110 &format!("{:?}", asset.asset_class),
4111 &asset.description,
4112 asset.acquisition_date,
4113 currency,
4114 asset.cost_center.as_deref(),
4115 );
4116 subledger.fa_records.push(record);
4117 fa_journal_entries.push(je);
4118 }
4119
4120 stats.fa_subledger_count = subledger.fa_records.len();
4121 debug!(
4122 "FA subledger records generated: {} (with {} acquisition JEs)",
4123 stats.fa_subledger_count,
4124 fa_journal_entries.len()
4125 );
4126 }
4127
4128 if !self.master_data.materials.is_empty() {
4130 debug!("Generating Inventory subledger records");
4131 let first_company = self.config.companies.first();
4132 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4133 let inv_currency = first_company
4134 .map(|c| c.currency.clone())
4135 .unwrap_or_else(|| "USD".to_string());
4136
4137 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4138 datasynth_generators::InventoryGeneratorConfig::default(),
4139 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4140 inv_currency.clone(),
4141 );
4142
4143 for (i, material) in self.master_data.materials.iter().enumerate() {
4144 let plant = format!("PLANT{:02}", (i % 3) + 1);
4145 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4146 let initial_qty = rust_decimal::Decimal::from(
4147 material
4148 .safety_stock
4149 .to_string()
4150 .parse::<i64>()
4151 .unwrap_or(100),
4152 );
4153
4154 let position = inv_gen.generate_position(
4155 company_code,
4156 &plant,
4157 &storage_loc,
4158 &material.material_id,
4159 &material.description,
4160 initial_qty,
4161 Some(material.standard_cost),
4162 &inv_currency,
4163 );
4164 subledger.inventory_positions.push(position);
4165 }
4166
4167 stats.inventory_subledger_count = subledger.inventory_positions.len();
4168 debug!(
4169 "Inventory subledger records generated: {}",
4170 stats.inventory_subledger_count
4171 );
4172 }
4173
4174 if !subledger.fa_records.is_empty() {
4176 if let Ok(start_date) =
4177 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4178 {
4179 let company_code = self
4180 .config
4181 .companies
4182 .first()
4183 .map(|c| c.code.as_str())
4184 .unwrap_or("1000");
4185 let fiscal_year = start_date.year();
4186 let start_period = start_date.month();
4187 let end_period =
4188 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4189
4190 let depr_cfg = FaDepreciationScheduleConfig {
4191 fiscal_year,
4192 start_period,
4193 end_period,
4194 seed_offset: 800,
4195 };
4196 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4197 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4198 let run_count = runs.len();
4199 subledger.depreciation_runs = runs;
4200 debug!(
4201 "Depreciation runs generated: {} runs for {} periods",
4202 run_count, self.config.global.period_months
4203 );
4204 }
4205 }
4206
4207 if !subledger.inventory_positions.is_empty() {
4209 if let Ok(start_date) =
4210 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4211 {
4212 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4213 - chrono::Days::new(1);
4214
4215 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4216 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4217
4218 for company in &self.config.companies {
4219 let result = inv_val_gen.generate(
4220 &company.code,
4221 &subledger.inventory_positions,
4222 as_of_date,
4223 );
4224 subledger.inventory_valuations.push(result);
4225 }
4226 debug!(
4227 "Inventory valuations generated: {} company reports",
4228 subledger.inventory_valuations.len()
4229 );
4230 }
4231 }
4232
4233 Ok((document_flows, subledger, fa_journal_entries))
4234 }
4235
4236 #[allow(clippy::too_many_arguments)]
4238 fn phase_ocpm_events(
4239 &mut self,
4240 document_flows: &DocumentFlowSnapshot,
4241 sourcing: &SourcingSnapshot,
4242 hr: &HrSnapshot,
4243 manufacturing: &ManufacturingSnapshot,
4244 banking: &BankingSnapshot,
4245 audit: &AuditSnapshot,
4246 financial_reporting: &FinancialReportingSnapshot,
4247 stats: &mut EnhancedGenerationStatistics,
4248 ) -> SynthResult<OcpmSnapshot> {
4249 let degradation = self.check_resources()?;
4250 if degradation >= DegradationLevel::Reduced {
4251 debug!(
4252 "Phase skipped due to resource pressure (degradation: {:?})",
4253 degradation
4254 );
4255 return Ok(OcpmSnapshot::default());
4256 }
4257 if self.phase_config.generate_ocpm_events {
4258 info!("Phase 3c: Generating OCPM Events");
4259 let ocpm_snapshot = self.generate_ocpm_events(
4260 document_flows,
4261 sourcing,
4262 hr,
4263 manufacturing,
4264 banking,
4265 audit,
4266 financial_reporting,
4267 )?;
4268 stats.ocpm_event_count = ocpm_snapshot.event_count;
4269 stats.ocpm_object_count = ocpm_snapshot.object_count;
4270 stats.ocpm_case_count = ocpm_snapshot.case_count;
4271 info!(
4272 "OCPM events generated: {} events, {} objects, {} cases",
4273 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4274 );
4275 self.check_resources_with_log("post-ocpm")?;
4276 Ok(ocpm_snapshot)
4277 } else {
4278 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4279 Ok(OcpmSnapshot::default())
4280 }
4281 }
4282
4283 fn phase_journal_entries(
4285 &mut self,
4286 coa: &Arc<ChartOfAccounts>,
4287 document_flows: &DocumentFlowSnapshot,
4288 _stats: &mut EnhancedGenerationStatistics,
4289 ) -> SynthResult<Vec<JournalEntry>> {
4290 let mut entries = Vec::new();
4291
4292 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4294 debug!("Phase 4a: Generating JEs from document flows");
4295 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4296 debug!("Generated {} JEs from document flows", flow_entries.len());
4297 entries.extend(flow_entries);
4298 }
4299
4300 if self.phase_config.generate_journal_entries {
4302 info!("Phase 4: Generating Journal Entries");
4303 let je_entries = self.generate_journal_entries(coa)?;
4304 info!("Generated {} standalone journal entries", je_entries.len());
4305 entries.extend(je_entries);
4306 } else {
4307 debug!("Phase 4: Skipped (journal entry generation disabled)");
4308 }
4309
4310 if let Some(ctx) = &self.shard_context {
4314 if !ctx.extra_journal_entries.is_empty() {
4315 debug!(
4316 "Phase 4c: appending {} shard-mode IC journal entries",
4317 ctx.extra_journal_entries.len()
4318 );
4319 entries.extend(ctx.extra_journal_entries.iter().cloned());
4320 }
4321 }
4322
4323 if !entries.is_empty() {
4324 self.check_resources_with_log("post-journal-entries")?;
4327 }
4328
4329 Ok(entries)
4330 }
4331
4332 fn phase_anomaly_injection(
4334 &mut self,
4335 entries: &mut [JournalEntry],
4336 actions: &DegradationActions,
4337 stats: &mut EnhancedGenerationStatistics,
4338 ) -> SynthResult<AnomalyLabels> {
4339 if self.phase_config.inject_anomalies
4340 && !entries.is_empty()
4341 && !actions.skip_anomaly_injection
4342 {
4343 info!("Phase 5: Injecting Anomalies");
4344 let result = self.inject_anomalies(entries)?;
4345 stats.anomalies_injected = result.labels.len();
4346 info!("Injected {} anomalies", stats.anomalies_injected);
4347 self.check_resources_with_log("post-anomaly-injection")?;
4348 Ok(result)
4349 } else if actions.skip_anomaly_injection {
4350 warn!("Phase 5: Skipped due to resource degradation");
4351 Ok(AnomalyLabels::default())
4352 } else {
4353 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4354 Ok(AnomalyLabels::default())
4355 }
4356 }
4357
4358 fn phase_balance_validation(
4360 &mut self,
4361 entries: &[JournalEntry],
4362 ) -> SynthResult<BalanceValidationResult> {
4363 if self.phase_config.validate_balances && !entries.is_empty() {
4364 debug!("Phase 6: Validating Balances");
4365 let balance_validation = self.validate_journal_entries(entries)?;
4366 if balance_validation.is_balanced {
4367 debug!("Balance validation passed");
4368 } else {
4369 warn!(
4370 "Balance validation found {} errors",
4371 balance_validation.validation_errors.len()
4372 );
4373 }
4374 Ok(balance_validation)
4375 } else {
4376 Ok(BalanceValidationResult::default())
4377 }
4378 }
4379
4380 fn validate_coa_coverage(
4387 &self,
4388 entries: &[JournalEntry],
4389 coa: &ChartOfAccounts,
4390 ) -> SynthResult<()> {
4391 if entries.is_empty() {
4392 return Ok(());
4393 }
4394 let coa_set: std::collections::HashSet<&str> = coa
4395 .accounts
4396 .iter()
4397 .map(|a| a.account_number.as_str())
4398 .collect();
4399 let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4400 for je in entries {
4401 for line in je.lines.iter() {
4402 if !coa_set.contains(line.gl_account.as_str()) {
4403 missing.insert(line.gl_account.clone());
4404 }
4405 }
4406 }
4407 if missing.is_empty() {
4408 debug!("COA coverage validation passed");
4409 return Ok(());
4410 }
4411 let msg = format!(
4412 "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4413 missing.len(),
4414 missing.iter().take(10).collect::<Vec<_>>()
4415 );
4416 if self.phase_config.validate_coa_coverage_strict {
4417 Err(SynthError::generation(msg))
4418 } else {
4419 warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4420 Ok(())
4421 }
4422 }
4423
4424 fn phase_data_quality_injection(
4426 &mut self,
4427 entries: &mut [JournalEntry],
4428 actions: &DegradationActions,
4429 stats: &mut EnhancedGenerationStatistics,
4430 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4431 if self.phase_config.inject_data_quality
4432 && !entries.is_empty()
4433 && !actions.skip_data_quality
4434 {
4435 info!("Phase 7: Injecting Data Quality Variations");
4436 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4437 stats.data_quality_issues = dq_stats.records_with_issues;
4438 info!("Injected {} data quality issues", stats.data_quality_issues);
4439 self.check_resources_with_log("post-data-quality")?;
4440 Ok((dq_stats, quality_issues))
4441 } else if actions.skip_data_quality {
4442 warn!("Phase 7: Skipped due to resource degradation");
4443 Ok((stats_with_denominator(entries.len()), Vec::new()))
4447 } else {
4448 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4449 Ok((stats_with_denominator(entries.len()), Vec::new()))
4450 }
4451 }
4452
4453 fn phase_period_close(
4463 &mut self,
4464 entries: &mut Vec<JournalEntry>,
4465 subledger: &SubledgerSnapshot,
4466 stats: &mut EnhancedGenerationStatistics,
4467 ) -> SynthResult<()> {
4468 if !self.phase_config.generate_period_close || entries.is_empty() {
4469 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4470 return Ok(());
4471 }
4472
4473 info!("Phase 10b: Generating period-close journal entries");
4474
4475 use datasynth_core::accounts::{
4476 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4477 };
4478 use rust_decimal::Decimal;
4479
4480 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4481 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4482 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4483 let close_date = end_date - chrono::Days::new(1);
4485
4486 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4491 .config
4492 .companies
4493 .iter()
4494 .map(|c| c.code.clone())
4495 .collect();
4496
4497 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4499 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4500
4501 let period_months = self.config.global.period_months;
4505 for asset in &subledger.fa_records {
4506 use datasynth_core::models::subledger::fa::AssetStatus;
4508 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4509 continue;
4510 }
4511 let useful_life_months = asset.useful_life_months();
4512 if useful_life_months == 0 {
4513 continue;
4515 }
4516 let salvage_value = asset.salvage_value();
4517 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4518 if depreciable_base == Decimal::ZERO {
4519 continue;
4520 }
4521 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4522 * Decimal::from(period_months))
4523 .round_dp(2);
4524 if period_depr <= Decimal::ZERO {
4525 continue;
4526 }
4527
4528 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4529 depr_header.document_type = "CL".to_string();
4530 depr_header.header_text = Some(format!(
4531 "Depreciation - {} {}",
4532 asset.asset_number, asset.description
4533 ));
4534 depr_header.created_by = "CLOSE_ENGINE".to_string();
4535 depr_header.source = TransactionSource::Automated;
4536 depr_header.business_process = Some(BusinessProcess::R2R);
4537
4538 let doc_id = depr_header.document_id;
4539 let mut depr_je = JournalEntry::new(depr_header);
4540
4541 depr_je.add_line(JournalEntryLine::debit(
4543 doc_id,
4544 1,
4545 expense_accounts::DEPRECIATION.to_string(),
4546 period_depr,
4547 ));
4548 depr_je.add_line(JournalEntryLine::credit(
4550 doc_id,
4551 2,
4552 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4553 period_depr,
4554 ));
4555
4556 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4557 close_jes.push(depr_je);
4558 }
4559
4560 if !subledger.fa_records.is_empty() {
4561 debug!(
4562 "Generated {} depreciation JEs from {} FA records",
4563 close_jes.len(),
4564 subledger.fa_records.len()
4565 );
4566 }
4567
4568 {
4572 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4573 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4574 if let Some(ctx) = &self.temporal_context {
4577 accrual_gen.set_temporal_context(Arc::clone(ctx));
4578 }
4579
4580 let accrual_items: &[(&str, &str, &str)] = &[
4582 ("Accrued Utilities", "6200", "2100"),
4583 ("Accrued Rent", "6300", "2100"),
4584 ("Accrued Interest", "6100", "2150"),
4585 ];
4586
4587 for company_code in &company_codes {
4588 let company_revenue: Decimal = entries
4590 .iter()
4591 .filter(|e| e.header.company_code == *company_code)
4592 .flat_map(|e| e.lines.iter())
4593 .filter(|l| l.gl_account.starts_with('4'))
4594 .map(|l| l.credit_amount - l.debit_amount)
4595 .fold(Decimal::ZERO, |acc, v| acc + v);
4596
4597 if company_revenue <= Decimal::ZERO {
4598 continue;
4599 }
4600
4601 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4603 if accrual_base <= Decimal::ZERO {
4604 continue;
4605 }
4606
4607 for (description, expense_acct, liability_acct) in accrual_items {
4608 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4609 company_code,
4610 description,
4611 accrual_base,
4612 expense_acct,
4613 liability_acct,
4614 close_date,
4615 None,
4616 );
4617 close_jes.push(accrual_je);
4618 if let Some(rev_je) = reversal_je {
4619 close_jes.push(rev_je);
4620 }
4621 }
4622 }
4623
4624 debug!(
4625 "Generated accrual entries for {} companies",
4626 company_codes.len()
4627 );
4628 }
4629
4630 for company_code in &company_codes {
4631 let mut total_revenue = Decimal::ZERO;
4636 let mut total_expenses = Decimal::ZERO;
4637
4638 for entry in entries.iter() {
4639 if entry.header.company_code != *company_code {
4640 continue;
4641 }
4642 for line in &entry.lines {
4643 let category = AccountCategory::from_account(&line.gl_account);
4644 match category {
4645 AccountCategory::Revenue => {
4646 total_revenue += line.credit_amount - line.debit_amount;
4648 }
4649 AccountCategory::Cogs
4650 | AccountCategory::OperatingExpense
4651 | AccountCategory::OtherIncomeExpense
4652 | AccountCategory::Tax => {
4653 total_expenses += line.debit_amount - line.credit_amount;
4655 }
4656 _ => {}
4657 }
4658 }
4659 }
4660
4661 let pre_tax_income = total_revenue - total_expenses;
4662
4663 if pre_tax_income == Decimal::ZERO {
4665 debug!(
4666 "Company {}: no pre-tax income, skipping period close",
4667 company_code
4668 );
4669 continue;
4670 }
4671
4672 if pre_tax_income > Decimal::ZERO {
4674 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4676
4677 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4678 tax_header.document_type = "CL".to_string();
4679 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4680 tax_header.created_by = "CLOSE_ENGINE".to_string();
4681 tax_header.source = TransactionSource::Automated;
4682 tax_header.business_process = Some(BusinessProcess::R2R);
4683
4684 let doc_id = tax_header.document_id;
4685 let mut tax_je = JournalEntry::new(tax_header);
4686
4687 tax_je.add_line(JournalEntryLine::debit(
4689 doc_id,
4690 1,
4691 tax_accounts::TAX_EXPENSE.to_string(),
4692 tax_amount,
4693 ));
4694 tax_je.add_line(JournalEntryLine::credit(
4696 doc_id,
4697 2,
4698 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4699 tax_amount,
4700 ));
4701
4702 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4703 close_jes.push(tax_je);
4704 } else {
4705 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4708 if dta_amount > Decimal::ZERO {
4709 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4710 dta_header.document_type = "CL".to_string();
4711 dta_header.header_text =
4712 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4713 dta_header.created_by = "CLOSE_ENGINE".to_string();
4714 dta_header.source = TransactionSource::Automated;
4715 dta_header.business_process = Some(BusinessProcess::R2R);
4716
4717 let doc_id = dta_header.document_id;
4718 let mut dta_je = JournalEntry::new(dta_header);
4719
4720 dta_je.add_line(JournalEntryLine::debit(
4722 doc_id,
4723 1,
4724 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4725 dta_amount,
4726 ));
4727 dta_je.add_line(JournalEntryLine::credit(
4730 doc_id,
4731 2,
4732 tax_accounts::TAX_EXPENSE.to_string(),
4733 dta_amount,
4734 ));
4735
4736 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4737 close_jes.push(dta_je);
4738 debug!(
4739 "Company {}: loss year — recognised DTA of {}",
4740 company_code, dta_amount
4741 );
4742 }
4743 }
4744
4745 let tax_provision = if pre_tax_income > Decimal::ZERO {
4751 (pre_tax_income * tax_rate).round_dp(2)
4752 } else {
4753 Decimal::ZERO
4754 };
4755 let net_income = pre_tax_income - tax_provision;
4756
4757 if net_income > Decimal::ZERO {
4758 use datasynth_generators::DividendGenerator;
4759 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4761 let currency_str = self
4762 .config
4763 .companies
4764 .iter()
4765 .find(|c| c.code == *company_code)
4766 .map(|c| c.currency.as_str())
4767 .unwrap_or("USD");
4768 let div_result = div_gen.generate(
4769 company_code,
4770 close_date,
4771 Decimal::new(1, 0), dividend_amount,
4773 currency_str,
4774 );
4775 let div_je_count = div_result.journal_entries.len();
4776 close_jes.extend(div_result.journal_entries);
4777 debug!(
4778 "Company {}: declared dividend of {} ({} JEs)",
4779 company_code, dividend_amount, div_je_count
4780 );
4781 }
4782
4783 if net_income != Decimal::ZERO {
4788 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4789 close_header.document_type = "CL".to_string();
4790 close_header.header_text =
4791 Some(format!("Income statement close - {}", company_code));
4792 close_header.created_by = "CLOSE_ENGINE".to_string();
4793 close_header.source = TransactionSource::Automated;
4794 close_header.business_process = Some(BusinessProcess::R2R);
4795
4796 let doc_id = close_header.document_id;
4797 let mut close_je = JournalEntry::new(close_header);
4798
4799 let abs_net_income = net_income.abs();
4800
4801 if net_income > Decimal::ZERO {
4802 close_je.add_line(JournalEntryLine::debit(
4804 doc_id,
4805 1,
4806 equity_accounts::INCOME_SUMMARY.to_string(),
4807 abs_net_income,
4808 ));
4809 close_je.add_line(JournalEntryLine::credit(
4810 doc_id,
4811 2,
4812 equity_accounts::RETAINED_EARNINGS.to_string(),
4813 abs_net_income,
4814 ));
4815 } else {
4816 close_je.add_line(JournalEntryLine::debit(
4818 doc_id,
4819 1,
4820 equity_accounts::RETAINED_EARNINGS.to_string(),
4821 abs_net_income,
4822 ));
4823 close_je.add_line(JournalEntryLine::credit(
4824 doc_id,
4825 2,
4826 equity_accounts::INCOME_SUMMARY.to_string(),
4827 abs_net_income,
4828 ));
4829 }
4830
4831 debug_assert!(
4832 close_je.is_balanced(),
4833 "Income statement closing JE must be balanced"
4834 );
4835 close_jes.push(close_je);
4836 }
4837 }
4838
4839 let close_count = close_jes.len();
4840 if close_count > 0 {
4841 info!("Generated {} period-close journal entries", close_count);
4842 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4843 entries.extend(close_jes);
4844 stats.period_close_je_count = close_count;
4845
4846 stats.total_entries = entries.len() as u64;
4848 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4849 } else {
4850 debug!("No period-close entries generated (no income statement activity)");
4851 }
4852
4853 Ok(())
4854 }
4855
4856 fn phase_audit_data(
4858 &mut self,
4859 entries: &[JournalEntry],
4860 stats: &mut EnhancedGenerationStatistics,
4861 ) -> SynthResult<AuditSnapshot> {
4862 if self.phase_config.generate_audit {
4863 info!("Phase 8: Generating Audit Data");
4864 let audit_snapshot = self.generate_audit_data(entries)?;
4865 stats.audit_engagement_count = audit_snapshot.engagements.len();
4866 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4867 stats.audit_evidence_count = audit_snapshot.evidence.len();
4868 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4869 stats.audit_finding_count = audit_snapshot.findings.len();
4870 stats.audit_judgment_count = audit_snapshot.judgments.len();
4871 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4872 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4873 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4874 stats.audit_sample_count = audit_snapshot.samples.len();
4875 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4876 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4877 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4878 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4879 stats.audit_related_party_transaction_count =
4880 audit_snapshot.related_party_transactions.len();
4881 info!(
4882 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4883 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4884 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4885 {} RP transactions",
4886 stats.audit_engagement_count,
4887 stats.audit_workpaper_count,
4888 stats.audit_evidence_count,
4889 stats.audit_risk_count,
4890 stats.audit_finding_count,
4891 stats.audit_judgment_count,
4892 stats.audit_confirmation_count,
4893 stats.audit_procedure_step_count,
4894 stats.audit_sample_count,
4895 stats.audit_analytical_result_count,
4896 stats.audit_ia_function_count,
4897 stats.audit_ia_report_count,
4898 stats.audit_related_party_count,
4899 stats.audit_related_party_transaction_count,
4900 );
4901 self.check_resources_with_log("post-audit")?;
4902 Ok(audit_snapshot)
4903 } else {
4904 debug!("Phase 8: Skipped (audit generation disabled)");
4905 Ok(AuditSnapshot::default())
4906 }
4907 }
4908
4909 fn phase_banking_data(
4911 &mut self,
4912 stats: &mut EnhancedGenerationStatistics,
4913 ) -> SynthResult<BankingSnapshot> {
4914 if self.phase_config.generate_banking {
4915 info!("Phase 9: Generating Banking KYC/AML Data");
4916 let banking_snapshot = self.generate_banking_data()?;
4917 stats.banking_customer_count = banking_snapshot.customers.len();
4918 stats.banking_account_count = banking_snapshot.accounts.len();
4919 stats.banking_transaction_count = banking_snapshot.transactions.len();
4920 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4921 info!(
4922 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4923 stats.banking_customer_count, stats.banking_account_count,
4924 stats.banking_transaction_count, stats.banking_suspicious_count
4925 );
4926 self.check_resources_with_log("post-banking")?;
4927 Ok(banking_snapshot)
4928 } else {
4929 debug!("Phase 9: Skipped (banking generation disabled)");
4930 Ok(BankingSnapshot::default())
4931 }
4932 }
4933
4934 fn phase_graph_export(
4936 &mut self,
4937 entries: &[JournalEntry],
4938 coa: &Arc<ChartOfAccounts>,
4939 stats: &mut EnhancedGenerationStatistics,
4940 ) -> SynthResult<GraphExportSnapshot> {
4941 if self.phase_config.generate_graph_export && !entries.is_empty() {
4942 info!("Phase 10: Exporting Accounting Network Graphs");
4943 match self.export_graphs(entries, coa, stats) {
4944 Ok(snapshot) => {
4945 info!(
4946 "Graph export complete: {} graphs ({} nodes, {} edges)",
4947 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4948 );
4949 Ok(snapshot)
4950 }
4951 Err(e) => {
4952 warn!("Phase 10: Graph export failed: {}", e);
4953 Ok(GraphExportSnapshot::default())
4954 }
4955 }
4956 } else {
4957 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4958 Ok(GraphExportSnapshot::default())
4959 }
4960 }
4961
4962 #[allow(clippy::too_many_arguments)]
4964 fn phase_hypergraph_export(
4965 &self,
4966 coa: &Arc<ChartOfAccounts>,
4967 entries: &[JournalEntry],
4968 document_flows: &DocumentFlowSnapshot,
4969 sourcing: &SourcingSnapshot,
4970 hr: &HrSnapshot,
4971 manufacturing: &ManufacturingSnapshot,
4972 banking: &BankingSnapshot,
4973 audit: &AuditSnapshot,
4974 financial_reporting: &FinancialReportingSnapshot,
4975 ocpm: &OcpmSnapshot,
4976 compliance: &ComplianceRegulationsSnapshot,
4977 stats: &mut EnhancedGenerationStatistics,
4978 ) -> SynthResult<()> {
4979 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4980 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4981 match self.export_hypergraph(
4982 coa,
4983 entries,
4984 document_flows,
4985 sourcing,
4986 hr,
4987 manufacturing,
4988 banking,
4989 audit,
4990 financial_reporting,
4991 ocpm,
4992 compliance,
4993 stats,
4994 ) {
4995 Ok(info) => {
4996 info!(
4997 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4998 info.node_count, info.edge_count, info.hyperedge_count
4999 );
5000 }
5001 Err(e) => {
5002 warn!("Phase 10b: Hypergraph export failed: {}", e);
5003 }
5004 }
5005 } else {
5006 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5007 }
5008 Ok(())
5009 }
5010
5011 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5017 if !self.config.llm.enabled {
5018 debug!("Phase 11: Skipped (LLM enrichment disabled)");
5019 return;
5020 }
5021
5022 info!("Phase 11: Starting LLM Enrichment");
5023 let start = std::time::Instant::now();
5024
5025 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5026 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5029 let schema_provider = &self.config.llm.provider;
5030 let api_key_env = match schema_provider.as_str() {
5031 "openai" => Some("OPENAI_API_KEY"),
5032 "anthropic" => Some("ANTHROPIC_API_KEY"),
5033 "custom" => Some("LLM_API_KEY"),
5034 _ => None,
5035 };
5036 if let Some(key_env) = api_key_env {
5037 if std::env::var(key_env).is_ok() {
5038 let llm_config = datasynth_core::llm::LlmConfig {
5039 model: self.config.llm.model.clone(),
5040 api_key_env: key_env.to_string(),
5041 ..datasynth_core::llm::LlmConfig::default()
5042 };
5043 match HttpLlmProvider::new(llm_config) {
5044 Ok(p) => Arc::new(p),
5045 Err(e) => {
5046 warn!(
5047 "Failed to create HttpLlmProvider: {}; falling back to mock",
5048 e
5049 );
5050 Arc::new(MockLlmProvider::new(self.seed))
5051 }
5052 }
5053 } else {
5054 Arc::new(MockLlmProvider::new(self.seed))
5055 }
5056 } else {
5057 Arc::new(MockLlmProvider::new(self.seed))
5058 }
5059 };
5060 let industry = format!("{:?}", self.config.global.industry);
5064
5065 let vendor_enricher =
5066 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5067 let max_vendors = self
5068 .config
5069 .llm
5070 .max_vendor_enrichments
5071 .min(self.master_data.vendors.len());
5072 let mut vendors_enriched = 0usize;
5073 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5074 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5075 Ok(name) => {
5076 vendor.name = name;
5077 vendors_enriched += 1;
5078 }
5079 Err(e) => warn!(
5080 "LLM vendor enrichment failed for {}: {}",
5081 vendor.vendor_id, e
5082 ),
5083 }
5084 }
5085
5086 let mut customers_enriched = 0usize;
5087 if self.config.llm.enrich_customers {
5088 let customer_enricher =
5089 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5090 &provider,
5091 ));
5092 let max_customers = self
5093 .config
5094 .llm
5095 .max_customer_enrichments
5096 .min(self.master_data.customers.len());
5097 for customer in self.master_data.customers.iter_mut().take(max_customers) {
5098 match customer_enricher.enrich_customer_name(
5099 &industry,
5100 "general",
5101 &customer.country,
5102 ) {
5103 Ok(name) => {
5104 customer.name = name;
5105 customers_enriched += 1;
5106 }
5107 Err(e) => warn!(
5108 "LLM customer enrichment failed for {}: {}",
5109 customer.customer_id, e
5110 ),
5111 }
5112 }
5113 }
5114
5115 let mut materials_enriched = 0usize;
5116 if self.config.llm.enrich_materials {
5117 let material_enricher =
5118 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5119 &provider,
5120 ));
5121 let max_materials = self
5122 .config
5123 .llm
5124 .max_material_enrichments
5125 .min(self.master_data.materials.len());
5126 for material in self.master_data.materials.iter_mut().take(max_materials) {
5127 let material_type = format!("{:?}", material.material_type);
5128 match material_enricher.enrich_material_description(&material_type, &industry) {
5129 Ok(desc) => {
5130 material.description = desc;
5131 materials_enriched += 1;
5132 }
5133 Err(e) => warn!(
5134 "LLM material enrichment failed for {}: {}",
5135 material.material_id, e
5136 ),
5137 }
5138 }
5139 }
5140
5141 (vendors_enriched, customers_enriched, materials_enriched)
5142 }));
5143
5144 match result {
5145 Ok((v, c, m)) => {
5146 stats.llm_vendors_enriched = v;
5147 stats.llm_customers_enriched = c;
5148 stats.llm_materials_enriched = m;
5149 let elapsed = start.elapsed();
5150 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5151 info!(
5152 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5153 v, c, m, stats.llm_enrichment_ms
5154 );
5155 }
5156 Err(_) => {
5157 let elapsed = start.elapsed();
5158 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5159 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5160 }
5161 }
5162 }
5163
5164 fn phase_diffusion_enhancement(
5176 &self,
5177 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5178 stats: &mut EnhancedGenerationStatistics,
5179 ) {
5180 if !self.config.diffusion.enabled {
5181 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5182 return;
5183 }
5184
5185 info!("Phase 12: Starting Diffusion Enhancement");
5186 let start = std::time::Instant::now();
5187
5188 let backend_choice = self.config.diffusion.backend.as_str();
5189 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5190
5191 if use_neural {
5192 #[cfg(feature = "neural")]
5193 {
5194 match self.run_neural_diffusion_phase(entries) {
5195 Ok(sample_count) => {
5196 stats.diffusion_samples_generated = sample_count;
5197 let elapsed = start.elapsed();
5198 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5199 info!(
5200 "Phase 12 complete ({}): {} samples in {}ms",
5201 backend_choice, sample_count, stats.diffusion_enhancement_ms
5202 );
5203 return;
5204 }
5205 Err(e) => {
5206 warn!(
5207 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5208 );
5209 }
5211 }
5212 }
5213 #[cfg(not(feature = "neural"))]
5214 {
5215 warn!(
5216 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5217 not compiled in — falling back to statistical. Rebuild with \
5218 `--features neural` (or `neural-cuda` for GPU) to enable.",
5219 backend_choice
5220 );
5221 }
5222 } else if !matches!(backend_choice, "statistical" | "") {
5223 warn!(
5224 "Phase 12: unknown backend '{}', falling back to statistical",
5225 backend_choice
5226 );
5227 }
5228
5229 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5231 let means = vec![5000.0, 3.0, 2.0];
5232 let stds = vec![2000.0, 1.5, 1.0];
5233
5234 let diffusion_config = DiffusionConfig {
5235 n_steps: self.config.diffusion.n_steps,
5236 seed: self.seed,
5237 ..Default::default()
5238 };
5239
5240 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5241 let n_samples = self.config.diffusion.sample_size;
5242 let n_features = 3;
5243 backend.generate(n_samples, n_features, self.seed).len()
5244 }));
5245
5246 match result {
5247 Ok(sample_count) => {
5248 stats.diffusion_samples_generated = sample_count;
5249 let elapsed = start.elapsed();
5250 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5251 info!(
5252 "Phase 12 complete (statistical): {} samples in {}ms",
5253 sample_count, stats.diffusion_enhancement_ms
5254 );
5255 }
5256 Err(_) => {
5257 let elapsed = start.elapsed();
5258 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5259 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5260 }
5261 }
5262 }
5263
5264 #[cfg(feature = "neural")]
5269 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5270 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5271
5272 if entries.is_empty() {
5273 return Err(SynthError::generation(
5274 "neural diffusion: no journal entries available as training data",
5275 ));
5276 }
5277
5278 let training_data: Vec<Vec<f64>> = entries
5279 .iter()
5280 .take(5000)
5281 .map(|je| {
5282 let total_amount: f64 = je
5283 .lines
5284 .iter()
5285 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5286 .map(|l| {
5287 use rust_decimal::prelude::ToPrimitive;
5288 l.debit_amount.to_f64().unwrap_or(0.0)
5289 })
5290 .sum();
5291 let line_count = je.lines.len() as f64;
5292 let approval_level = je
5295 .header
5296 .approval_workflow
5297 .as_ref()
5298 .map(|w| w.required_levels as f64)
5299 .unwrap_or(1.0);
5300 vec![total_amount, line_count, approval_level]
5301 })
5302 .collect();
5303
5304 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5305
5306 let cfg = &self.config.diffusion;
5307 let neural_cfg = &cfg.neural;
5308
5309 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5310 neural_cfg.checkpoint_path.as_ref()
5311 {
5312 let path = std::path::Path::new(ckpt_path);
5313 info!(
5314 " Neural diffusion: loading checkpoint from {}",
5315 path.display()
5316 );
5317 NeuralDiffusionBackend::load(path)
5318 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5319 } else {
5320 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5321 info!(
5322 " Neural diffusion: training score network on {} rows × {} features, \
5323 {} epochs, hidden_dims={:?}",
5324 training_data.len(),
5325 n_features,
5326 neural_cfg.training_epochs,
5327 neural_cfg.hidden_dims
5328 );
5329 let training_config = NeuralTrainingConfig {
5330 n_steps: cfg.n_steps,
5331 schedule: cfg.schedule.clone(),
5332 hidden_dims: neural_cfg.hidden_dims.clone(),
5333 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5334 learning_rate: neural_cfg.learning_rate,
5335 epochs: neural_cfg.training_epochs,
5336 batch_size: neural_cfg.batch_size,
5337 };
5338 let (backend, report) =
5339 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5340 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5341 info!(
5342 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5343 report.epochs_completed, report.final_loss
5344 );
5345 backend
5346 };
5347
5348 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5349 Ok(samples.len())
5350 }
5351
5352 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5359 if !self.config.causal.enabled {
5360 debug!("Phase 13: Skipped (causal generation disabled)");
5361 return;
5362 }
5363
5364 info!("Phase 13: Starting Causal Overlay");
5365 let start = std::time::Instant::now();
5366
5367 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5368 let graph = match self.config.causal.template.as_str() {
5370 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5371 _ => CausalGraph::fraud_detection_template(),
5372 };
5373
5374 let scm = StructuralCausalModel::new(graph.clone())
5375 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5376
5377 let n_samples = self.config.causal.sample_size;
5378 let samples = scm
5379 .generate(n_samples, self.seed)
5380 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5381
5382 let validation_passed = if self.config.causal.validate {
5384 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5385 if report.valid {
5386 info!(
5387 "Causal validation passed: all {} checks OK",
5388 report.checks.len()
5389 );
5390 } else {
5391 warn!(
5392 "Causal validation: {} violations detected: {:?}",
5393 report.violations.len(),
5394 report.violations
5395 );
5396 }
5397 Some(report.valid)
5398 } else {
5399 None
5400 };
5401
5402 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5403 }));
5404
5405 match result {
5406 Ok(Ok((sample_count, validation_passed))) => {
5407 stats.causal_samples_generated = sample_count;
5408 stats.causal_validation_passed = validation_passed;
5409 let elapsed = start.elapsed();
5410 stats.causal_generation_ms = elapsed.as_millis() as u64;
5411 info!(
5412 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5413 sample_count, stats.causal_generation_ms, validation_passed,
5414 );
5415 }
5416 Ok(Err(e)) => {
5417 let elapsed = start.elapsed();
5418 stats.causal_generation_ms = elapsed.as_millis() as u64;
5419 warn!("Phase 13: Causal generation failed: {}", e);
5420 }
5421 Err(_) => {
5422 let elapsed = start.elapsed();
5423 stats.causal_generation_ms = elapsed.as_millis() as u64;
5424 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5425 }
5426 }
5427 }
5428
5429 fn phase_sourcing_data(
5431 &mut self,
5432 stats: &mut EnhancedGenerationStatistics,
5433 ) -> SynthResult<SourcingSnapshot> {
5434 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5435 debug!("Phase 14: Skipped (sourcing generation disabled)");
5436 return Ok(SourcingSnapshot::default());
5437 }
5438 let degradation = self.check_resources()?;
5439 if degradation >= DegradationLevel::Reduced {
5440 debug!(
5441 "Phase skipped due to resource pressure (degradation: {:?})",
5442 degradation
5443 );
5444 return Ok(SourcingSnapshot::default());
5445 }
5446
5447 info!("Phase 14: Generating S2C Sourcing Data");
5448 let seed = self.seed;
5449
5450 let vendor_ids: Vec<String> = self
5452 .master_data
5453 .vendors
5454 .iter()
5455 .map(|v| v.vendor_id.clone())
5456 .collect();
5457 if vendor_ids.is_empty() {
5458 debug!("Phase 14: Skipped (no vendors available)");
5459 return Ok(SourcingSnapshot::default());
5460 }
5461
5462 let categories: Vec<(String, String)> = vec![
5463 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5464 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5465 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5466 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5467 ("CAT-LOG".to_string(), "Logistics".to_string()),
5468 ];
5469 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5470 .iter()
5471 .map(|(id, name)| {
5472 (
5473 id.clone(),
5474 name.clone(),
5475 rust_decimal::Decimal::from(100_000),
5476 )
5477 })
5478 .collect();
5479
5480 let company_code = self
5481 .config
5482 .companies
5483 .first()
5484 .map(|c| c.code.as_str())
5485 .unwrap_or("1000");
5486 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5487 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5488 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5489 let fiscal_year = start_date.year() as u16;
5490 let owner_ids: Vec<String> = self
5491 .master_data
5492 .employees
5493 .iter()
5494 .take(5)
5495 .map(|e| e.employee_id.clone())
5496 .collect();
5497 let owner_id = owner_ids
5498 .first()
5499 .map(std::string::String::as_str)
5500 .unwrap_or("BUYER-001");
5501
5502 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5504 let spend_analyses =
5505 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5506
5507 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5509 let sourcing_projects = if owner_ids.is_empty() {
5510 Vec::new()
5511 } else {
5512 project_gen.generate(
5513 company_code,
5514 &categories_with_spend,
5515 &owner_ids,
5516 start_date,
5517 self.config.global.period_months,
5518 )
5519 };
5520 stats.sourcing_project_count = sourcing_projects.len();
5521
5522 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5524 let mut qual_gen = QualificationGenerator::new(seed + 2);
5525 let qualifications = qual_gen.generate(
5526 company_code,
5527 &qual_vendor_ids,
5528 sourcing_projects.first().map(|p| p.project_id.as_str()),
5529 owner_id,
5530 start_date,
5531 );
5532
5533 let mut rfx_gen = RfxGenerator::new(seed + 3);
5535 let rfx_events: Vec<RfxEvent> = sourcing_projects
5536 .iter()
5537 .map(|proj| {
5538 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5539 rfx_gen.generate(
5540 company_code,
5541 &proj.project_id,
5542 &proj.category_id,
5543 &qualified_vids,
5544 owner_id,
5545 start_date,
5546 50000.0,
5547 )
5548 })
5549 .collect();
5550 stats.rfx_event_count = rfx_events.len();
5551
5552 let mut bid_gen = BidGenerator::new(seed + 4);
5554 let mut all_bids = Vec::new();
5555 for rfx in &rfx_events {
5556 let bidder_count = vendor_ids.len().clamp(2, 5);
5557 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5558 let bids = bid_gen.generate(rfx, &responding, start_date);
5559 all_bids.extend(bids);
5560 }
5561 stats.bid_count = all_bids.len();
5562
5563 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5565 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5566 .iter()
5567 .map(|rfx| {
5568 let rfx_bids: Vec<SupplierBid> = all_bids
5569 .iter()
5570 .filter(|b| b.rfx_id == rfx.rfx_id)
5571 .cloned()
5572 .collect();
5573 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5574 })
5575 .collect();
5576
5577 let mut contract_gen = ContractGenerator::new(seed + 6);
5579 let contracts: Vec<ProcurementContract> = bid_evaluations
5580 .iter()
5581 .zip(rfx_events.iter())
5582 .filter_map(|(eval, rfx)| {
5583 eval.ranked_bids.first().and_then(|winner| {
5584 all_bids
5585 .iter()
5586 .find(|b| b.bid_id == winner.bid_id)
5587 .map(|winning_bid| {
5588 contract_gen.generate_from_bid(
5589 winning_bid,
5590 Some(&rfx.sourcing_project_id),
5591 &rfx.category_id,
5592 owner_id,
5593 start_date,
5594 )
5595 })
5596 })
5597 })
5598 .collect();
5599 stats.contract_count = contracts.len();
5600
5601 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5603 let catalog_items = catalog_gen.generate(&contracts);
5604 stats.catalog_item_count = catalog_items.len();
5605
5606 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5608 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5609 .iter()
5610 .fold(
5611 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5612 |mut acc, c| {
5613 acc.entry(c.vendor_id.clone()).or_default().push(c);
5614 acc
5615 },
5616 )
5617 .into_iter()
5618 .collect();
5619 let scorecards = scorecard_gen.generate(
5620 company_code,
5621 &vendor_contracts,
5622 start_date,
5623 end_date,
5624 owner_id,
5625 );
5626 stats.scorecard_count = scorecards.len();
5627
5628 let mut sourcing_projects = sourcing_projects;
5631 for project in &mut sourcing_projects {
5632 project.rfx_ids = rfx_events
5634 .iter()
5635 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5636 .map(|rfx| rfx.rfx_id.clone())
5637 .collect();
5638
5639 project.contract_id = contracts
5641 .iter()
5642 .find(|c| {
5643 c.sourcing_project_id
5644 .as_deref()
5645 .is_some_and(|sp| sp == project.project_id)
5646 })
5647 .map(|c| c.contract_id.clone());
5648
5649 project.spend_analysis_id = spend_analyses
5651 .iter()
5652 .find(|sa| sa.category_id == project.category_id)
5653 .map(|sa| sa.category_id.clone());
5654 }
5655
5656 info!(
5657 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5658 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5659 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5660 );
5661 self.check_resources_with_log("post-sourcing")?;
5662
5663 Ok(SourcingSnapshot {
5664 spend_analyses,
5665 sourcing_projects,
5666 qualifications,
5667 rfx_events,
5668 bids: all_bids,
5669 bid_evaluations,
5670 contracts,
5671 catalog_items,
5672 scorecards,
5673 })
5674 }
5675
5676 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5682 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5683
5684 let parent_code = self
5685 .config
5686 .companies
5687 .first()
5688 .map(|c| c.code.clone())
5689 .unwrap_or_else(|| "PARENT".to_string());
5690
5691 let mut group = GroupStructure::new(parent_code);
5692
5693 for company in self.config.companies.iter().skip(1) {
5694 let sub =
5695 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5696 group.add_subsidiary(sub);
5697 }
5698
5699 group
5700 }
5701
5702 fn phase_intercompany(
5704 &mut self,
5705 journal_entries: &[JournalEntry],
5706 stats: &mut EnhancedGenerationStatistics,
5707 ) -> SynthResult<IntercompanySnapshot> {
5708 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5710 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5711 return Ok(IntercompanySnapshot::default());
5712 }
5713
5714 if self.config.companies.len() < 2 {
5716 debug!(
5717 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5718 self.config.companies.len()
5719 );
5720 return Ok(IntercompanySnapshot::default());
5721 }
5722
5723 info!("Phase 14b: Generating Intercompany Transactions");
5724
5725 let group_structure = self.build_group_structure();
5728 debug!(
5729 "Group structure built: parent={}, subsidiaries={}",
5730 group_structure.parent_entity,
5731 group_structure.subsidiaries.len()
5732 );
5733
5734 let seed = self.seed;
5735 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5736 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5737 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5738
5739 let parent_code = self.config.companies[0].code.clone();
5742 let mut ownership_structure =
5743 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5744
5745 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5746 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5747 format!("REL{:03}", i + 1),
5748 parent_code.clone(),
5749 company.code.clone(),
5750 rust_decimal::Decimal::from(100), start_date,
5752 );
5753 ownership_structure.add_relationship(relationship);
5754 }
5755
5756 let tp_method = match self.config.intercompany.transfer_pricing_method {
5758 datasynth_config::schema::TransferPricingMethod::CostPlus => {
5759 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5760 }
5761 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5762 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5763 }
5764 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5765 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5766 }
5767 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5768 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5769 }
5770 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5771 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5772 }
5773 };
5774
5775 let ic_currency = self
5777 .config
5778 .companies
5779 .first()
5780 .map(|c| c.currency.clone())
5781 .unwrap_or_else(|| "USD".to_string());
5782 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5783 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5784 transfer_pricing_method: tp_method,
5785 markup_percent: rust_decimal::Decimal::from_f64_retain(
5786 self.config.intercompany.markup_percent,
5787 )
5788 .unwrap_or(rust_decimal::Decimal::from(5)),
5789 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5790 default_currency: ic_currency,
5791 ..Default::default()
5792 };
5793
5794 let mut ic_generator = datasynth_generators::ICGenerator::new(
5796 ic_gen_config,
5797 ownership_structure.clone(),
5798 seed + 50,
5799 );
5800
5801 let transactions_per_day = 3;
5804 let matched_pairs = ic_generator.generate_transactions_for_period(
5805 start_date,
5806 end_date,
5807 transactions_per_day,
5808 );
5809
5810 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5812 debug!(
5813 "Generated {} IC seller invoices, {} IC buyer POs",
5814 ic_doc_chains.seller_invoices.len(),
5815 ic_doc_chains.buyer_orders.len()
5816 );
5817
5818 let mut seller_entries = Vec::new();
5820 let mut buyer_entries = Vec::new();
5821 let fiscal_year = start_date.year();
5822
5823 for pair in &matched_pairs {
5824 let fiscal_period = pair.posting_date.month();
5825 let (seller_je, buyer_je) =
5826 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5827 seller_entries.push(seller_je);
5828 buyer_entries.push(buyer_je);
5829 }
5830
5831 let matching_config = datasynth_generators::ICMatchingConfig {
5833 base_currency: self
5834 .config
5835 .companies
5836 .first()
5837 .map(|c| c.currency.clone())
5838 .unwrap_or_else(|| "USD".to_string()),
5839 ..Default::default()
5840 };
5841 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5842 matching_engine.load_matched_pairs(&matched_pairs);
5843 let matching_result = matching_engine.run_matching(end_date);
5844
5845 let mut elimination_entries = Vec::new();
5847 if self.config.intercompany.generate_eliminations {
5848 let elim_config = datasynth_generators::EliminationConfig {
5849 consolidation_entity: "GROUP".to_string(),
5850 base_currency: self
5851 .config
5852 .companies
5853 .first()
5854 .map(|c| c.currency.clone())
5855 .unwrap_or_else(|| "USD".to_string()),
5856 ..Default::default()
5857 };
5858
5859 let mut elim_generator =
5860 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5861
5862 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5863 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5864 matching_result
5865 .matched_balances
5866 .iter()
5867 .chain(matching_result.unmatched_balances.iter())
5868 .cloned()
5869 .collect();
5870
5871 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5883 std::collections::HashMap::new();
5884 let mut equity_amounts: std::collections::HashMap<
5885 String,
5886 std::collections::HashMap<String, rust_decimal::Decimal>,
5887 > = std::collections::HashMap::new();
5888 {
5889 use rust_decimal::Decimal;
5890 let hundred = Decimal::from(100u32);
5891 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5895 for sub in &group_structure.subsidiaries {
5896 let net_assets = {
5897 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5898 if na > Decimal::ZERO {
5899 na
5900 } else {
5901 Decimal::from(1_000_000u64)
5902 }
5903 };
5904 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5906 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5907
5908 let mut eq_map = std::collections::HashMap::new();
5911 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5912 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5913 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5914 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5915 }
5916 }
5917
5918 let journal = elim_generator.generate_eliminations(
5919 &fiscal_period,
5920 end_date,
5921 &all_balances,
5922 &matched_pairs,
5923 &investment_amounts,
5924 &equity_amounts,
5925 );
5926
5927 elimination_entries = journal.entries.clone();
5928 }
5929
5930 let matched_pair_count = matched_pairs.len();
5931 let elimination_entry_count = elimination_entries.len();
5932 let match_rate = matching_result.match_rate;
5933
5934 stats.ic_matched_pair_count = matched_pair_count;
5935 stats.ic_elimination_count = elimination_entry_count;
5936 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5937
5938 info!(
5939 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5940 matched_pair_count,
5941 stats.ic_transaction_count,
5942 seller_entries.len(),
5943 buyer_entries.len(),
5944 elimination_entry_count,
5945 match_rate * 100.0
5946 );
5947 self.check_resources_with_log("post-intercompany")?;
5948
5949 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5953 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5954 use rust_decimal::Decimal;
5955
5956 let eight_pct = Decimal::new(8, 2); group_structure
5959 .subsidiaries
5960 .iter()
5961 .filter(|sub| {
5962 sub.nci_percentage > Decimal::ZERO
5963 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5964 })
5965 .map(|sub| {
5966 let net_assets_from_jes =
5970 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5971
5972 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5973 net_assets_from_jes.round_dp(2)
5974 } else {
5975 Decimal::from(1_000_000u64)
5977 };
5978
5979 let net_income = (net_assets * eight_pct).round_dp(2);
5981
5982 NciMeasurement::compute(
5983 sub.entity_code.clone(),
5984 sub.nci_percentage,
5985 net_assets,
5986 net_income,
5987 )
5988 })
5989 .collect()
5990 };
5991
5992 if !nci_measurements.is_empty() {
5993 info!(
5994 "NCI measurements: {} subsidiaries with non-controlling interests",
5995 nci_measurements.len()
5996 );
5997 }
5998
5999 Ok(IntercompanySnapshot {
6000 group_structure: Some(group_structure),
6001 matched_pairs,
6002 seller_journal_entries: seller_entries,
6003 buyer_journal_entries: buyer_entries,
6004 elimination_entries,
6005 nci_measurements,
6006 ic_document_chains: Some(ic_doc_chains),
6007 matched_pair_count,
6008 elimination_entry_count,
6009 match_rate,
6010 })
6011 }
6012
6013 fn phase_financial_reporting(
6015 &mut self,
6016 document_flows: &DocumentFlowSnapshot,
6017 journal_entries: &[JournalEntry],
6018 coa: &Arc<ChartOfAccounts>,
6019 _hr: &HrSnapshot,
6020 _audit: &AuditSnapshot,
6021 stats: &mut EnhancedGenerationStatistics,
6022 ) -> SynthResult<FinancialReportingSnapshot> {
6023 let fs_enabled = self.phase_config.generate_financial_statements
6024 || self.config.financial_reporting.enabled;
6025 let br_enabled = self.phase_config.generate_bank_reconciliation;
6026
6027 if !fs_enabled && !br_enabled {
6028 debug!("Phase 15: Skipped (financial reporting disabled)");
6029 return Ok(FinancialReportingSnapshot::default());
6030 }
6031
6032 info!("Phase 15: Generating Financial Reporting Data");
6033
6034 let seed = self.seed;
6035 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6036 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6037
6038 let mut financial_statements = Vec::new();
6039 let mut bank_reconciliations = Vec::new();
6040 let mut trial_balances = Vec::new();
6041 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6042 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6043 Vec::new();
6044 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6046 std::collections::HashMap::new();
6047 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6049 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6051
6052 if fs_enabled {
6060 let has_journal_entries = !journal_entries.is_empty();
6061
6062 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6065 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6067
6068 let elimination_entries: Vec<&JournalEntry> = journal_entries
6070 .iter()
6071 .filter(|je| je.header.is_elimination)
6072 .collect();
6073
6074 for period in 0..self.config.global.period_months {
6076 let period_start = start_date + chrono::Months::new(period);
6077 let period_end =
6078 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6079 let fiscal_year = period_end.year() as u16;
6080 let fiscal_period = period_end.month() as u8;
6081 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6082
6083 let mut entity_tb_map: std::collections::HashMap<
6086 String,
6087 std::collections::HashMap<String, rust_decimal::Decimal>,
6088 > = std::collections::HashMap::new();
6089
6090 for (company_idx, company) in self.config.companies.iter().enumerate() {
6092 let company_code = company.code.as_str();
6093 let currency = company.currency.as_str();
6094 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6097 let mut company_fs_gen =
6098 FinancialStatementGenerator::new(seed + company_seed_offset);
6099
6100 if has_journal_entries {
6101 let tb_entries = Self::build_cumulative_trial_balance(
6102 journal_entries,
6103 coa,
6104 company_code,
6105 start_date,
6106 period_end,
6107 fiscal_year,
6108 fiscal_period,
6109 );
6110
6111 let entity_cat_map =
6113 entity_tb_map.entry(company_code.to_string()).or_default();
6114 for tb_entry in &tb_entries {
6115 let net = tb_entry.debit_balance - tb_entry.credit_balance;
6116 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6117 }
6118
6119 let stmts = company_fs_gen.generate(
6120 company_code,
6121 currency,
6122 &tb_entries,
6123 period_start,
6124 period_end,
6125 fiscal_year,
6126 fiscal_period,
6127 None,
6128 "SYS-AUTOCLOSE",
6129 );
6130
6131 let mut entity_stmts = Vec::new();
6132 for stmt in stmts {
6133 if stmt.statement_type == StatementType::CashFlowStatement {
6134 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6135 let cf_items = Self::build_cash_flow_from_trial_balances(
6136 &tb_entries,
6137 None,
6138 net_income,
6139 );
6140 entity_stmts.push(FinancialStatement {
6141 cash_flow_items: cf_items,
6142 ..stmt
6143 });
6144 } else {
6145 entity_stmts.push(stmt);
6146 }
6147 }
6148
6149 financial_statements.extend(entity_stmts.clone());
6151
6152 standalone_statements
6154 .entry(company_code.to_string())
6155 .or_default()
6156 .extend(entity_stmts);
6157
6158 if company_idx == 0 {
6161 trial_balances.push(PeriodTrialBalance {
6162 fiscal_year,
6163 fiscal_period,
6164 period_start,
6165 period_end,
6166 entries: tb_entries,
6167 });
6168 }
6169 } else {
6170 let tb_entries = Self::build_trial_balance_from_entries(
6172 journal_entries,
6173 coa,
6174 company_code,
6175 fiscal_year,
6176 fiscal_period,
6177 );
6178
6179 let stmts = company_fs_gen.generate(
6180 company_code,
6181 currency,
6182 &tb_entries,
6183 period_start,
6184 period_end,
6185 fiscal_year,
6186 fiscal_period,
6187 None,
6188 "SYS-AUTOCLOSE",
6189 );
6190 financial_statements.extend(stmts.clone());
6191 standalone_statements
6192 .entry(company_code.to_string())
6193 .or_default()
6194 .extend(stmts);
6195
6196 if company_idx == 0 && !tb_entries.is_empty() {
6197 trial_balances.push(PeriodTrialBalance {
6198 fiscal_year,
6199 fiscal_period,
6200 period_start,
6201 period_end,
6202 entries: tb_entries,
6203 });
6204 }
6205 }
6206 }
6207
6208 let group_currency = self
6211 .config
6212 .companies
6213 .first()
6214 .map(|c| c.currency.as_str())
6215 .unwrap_or("USD");
6216
6217 let period_eliminations: Vec<JournalEntry> = elimination_entries
6219 .iter()
6220 .filter(|je| {
6221 je.header.fiscal_year == fiscal_year
6222 && je.header.fiscal_period == fiscal_period
6223 })
6224 .map(|je| (*je).clone())
6225 .collect();
6226
6227 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6228 &entity_tb_map,
6229 &period_eliminations,
6230 &period_label,
6231 );
6232
6233 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6236 .line_items
6237 .iter()
6238 .map(|li| {
6239 let net = li.post_elimination_total;
6240 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6241 (net, rust_decimal::Decimal::ZERO)
6242 } else {
6243 (rust_decimal::Decimal::ZERO, -net)
6244 };
6245 datasynth_generators::TrialBalanceEntry {
6246 account_code: li.account_category.clone(),
6247 account_name: li.account_category.clone(),
6248 category: li.account_category.clone(),
6249 debit_balance: debit,
6250 credit_balance: credit,
6251 }
6252 })
6253 .collect();
6254
6255 let mut cons_stmts = cons_gen.generate(
6256 "GROUP",
6257 group_currency,
6258 &cons_tb,
6259 period_start,
6260 period_end,
6261 fiscal_year,
6262 fiscal_period,
6263 None,
6264 "SYS-AUTOCLOSE",
6265 );
6266
6267 let bs_categories: &[&str] = &[
6271 "CASH",
6272 "RECEIVABLES",
6273 "INVENTORY",
6274 "FIXEDASSETS",
6275 "PAYABLES",
6276 "ACCRUEDLIABILITIES",
6277 "LONGTERMDEBT",
6278 "EQUITY",
6279 ];
6280 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6281 cons_line_items.into_iter().partition(|li| {
6282 let upper = li.label.to_uppercase();
6283 bs_categories.iter().any(|c| upper == *c)
6284 });
6285
6286 for stmt in &mut cons_stmts {
6287 stmt.is_consolidated = true;
6288 match stmt.statement_type {
6289 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6290 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6291 _ => {} }
6293 }
6294
6295 consolidated_statements.extend(cons_stmts);
6296 consolidation_schedules.push(schedule);
6297 }
6298
6299 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6305 info!(
6306 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6307 stats.financial_statement_count,
6308 consolidated_statements.len(),
6309 has_journal_entries
6310 );
6311
6312 let entity_seeds: Vec<SegmentSeed> = self
6317 .config
6318 .companies
6319 .iter()
6320 .map(|c| SegmentSeed {
6321 code: c.code.clone(),
6322 name: c.name.clone(),
6323 currency: c.currency.clone(),
6324 })
6325 .collect();
6326
6327 let mut seg_gen = SegmentGenerator::new(seed + 30);
6328
6329 for period in 0..self.config.global.period_months {
6334 let period_end =
6335 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6336 let fiscal_year = period_end.year() as u16;
6337 let fiscal_period = period_end.month() as u8;
6338 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6339
6340 use datasynth_core::models::StatementType;
6341
6342 let cons_is = consolidated_statements.iter().find(|s| {
6344 s.fiscal_year == fiscal_year
6345 && s.fiscal_period == fiscal_period
6346 && s.statement_type == StatementType::IncomeStatement
6347 });
6348 let cons_bs = consolidated_statements.iter().find(|s| {
6349 s.fiscal_year == fiscal_year
6350 && s.fiscal_period == fiscal_period
6351 && s.statement_type == StatementType::BalanceSheet
6352 });
6353
6354 let is_stmt = cons_is.or_else(|| {
6356 financial_statements.iter().find(|s| {
6357 s.fiscal_year == fiscal_year
6358 && s.fiscal_period == fiscal_period
6359 && s.statement_type == StatementType::IncomeStatement
6360 })
6361 });
6362 let bs_stmt = cons_bs.or_else(|| {
6363 financial_statements.iter().find(|s| {
6364 s.fiscal_year == fiscal_year
6365 && s.fiscal_period == fiscal_period
6366 && s.statement_type == StatementType::BalanceSheet
6367 })
6368 });
6369
6370 let consolidated_revenue = is_stmt
6371 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6372 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6374
6375 let consolidated_profit = is_stmt
6376 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6377 .map(|li| li.amount)
6378 .unwrap_or(rust_decimal::Decimal::ZERO);
6379
6380 let consolidated_assets = bs_stmt
6381 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6382 .map(|li| li.amount)
6383 .unwrap_or(rust_decimal::Decimal::ZERO);
6384
6385 if consolidated_revenue == rust_decimal::Decimal::ZERO
6387 && consolidated_assets == rust_decimal::Decimal::ZERO
6388 {
6389 continue;
6390 }
6391
6392 let group_code = self
6393 .config
6394 .companies
6395 .first()
6396 .map(|c| c.code.as_str())
6397 .unwrap_or("GROUP");
6398
6399 let total_depr: rust_decimal::Decimal = journal_entries
6402 .iter()
6403 .filter(|je| je.header.document_type == "CL")
6404 .flat_map(|je| je.lines.iter())
6405 .filter(|l| l.gl_account.starts_with("6000"))
6406 .map(|l| l.debit_amount)
6407 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6408 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6409 Some(total_depr)
6410 } else {
6411 None
6412 };
6413
6414 let (segs, recon) = seg_gen.generate(
6415 group_code,
6416 &period_label,
6417 consolidated_revenue,
6418 consolidated_profit,
6419 consolidated_assets,
6420 &entity_seeds,
6421 depr_param,
6422 );
6423 segment_reports.extend(segs);
6424 segment_reconciliations.push(recon);
6425 }
6426
6427 info!(
6428 "Segment reports generated: {} segments, {} reconciliations",
6429 segment_reports.len(),
6430 segment_reconciliations.len()
6431 );
6432 }
6433
6434 if br_enabled && !document_flows.payments.is_empty() {
6436 let employee_ids: Vec<String> = self
6437 .master_data
6438 .employees
6439 .iter()
6440 .map(|e| e.employee_id.clone())
6441 .collect();
6442 let mut br_gen =
6443 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6444
6445 for company in &self.config.companies {
6447 let company_payments: Vec<PaymentReference> = document_flows
6448 .payments
6449 .iter()
6450 .filter(|p| p.header.company_code == company.code)
6451 .map(|p| PaymentReference {
6452 id: p.header.document_id.clone(),
6453 amount: if p.is_vendor { p.amount } else { -p.amount },
6454 date: p.header.document_date,
6455 reference: p
6456 .check_number
6457 .clone()
6458 .or_else(|| p.wire_reference.clone())
6459 .unwrap_or_else(|| p.header.document_id.clone()),
6460 })
6461 .collect();
6462
6463 if company_payments.is_empty() {
6464 continue;
6465 }
6466
6467 let bank_account_id = format!("{}-MAIN", company.code);
6468
6469 for period in 0..self.config.global.period_months {
6471 let period_start = start_date + chrono::Months::new(period);
6472 let period_end =
6473 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6474
6475 let period_payments: Vec<PaymentReference> = company_payments
6476 .iter()
6477 .filter(|p| p.date >= period_start && p.date <= period_end)
6478 .cloned()
6479 .collect();
6480
6481 let recon = br_gen.generate(
6482 &company.code,
6483 &bank_account_id,
6484 period_start,
6485 period_end,
6486 &company.currency,
6487 &period_payments,
6488 );
6489 bank_reconciliations.push(recon);
6490 }
6491 }
6492 info!(
6493 "Bank reconciliations generated: {} reconciliations",
6494 bank_reconciliations.len()
6495 );
6496 }
6497
6498 stats.bank_reconciliation_count = bank_reconciliations.len();
6499 self.check_resources_with_log("post-financial-reporting")?;
6500
6501 if !trial_balances.is_empty() {
6502 info!(
6503 "Period-close trial balances captured: {} periods",
6504 trial_balances.len()
6505 );
6506 }
6507
6508 let notes_to_financial_statements = Vec::new();
6512
6513 Ok(FinancialReportingSnapshot {
6514 financial_statements,
6515 standalone_statements,
6516 consolidated_statements,
6517 consolidation_schedules,
6518 bank_reconciliations,
6519 trial_balances,
6520 segment_reports,
6521 segment_reconciliations,
6522 notes_to_financial_statements,
6523 })
6524 }
6525
6526 fn generate_notes_to_financial_statements(
6533 &self,
6534 financial_reporting: &mut FinancialReportingSnapshot,
6535 accounting_standards: &AccountingStandardsSnapshot,
6536 tax: &TaxSnapshot,
6537 hr: &HrSnapshot,
6538 audit: &AuditSnapshot,
6539 treasury: &TreasurySnapshot,
6540 ) {
6541 use datasynth_config::schema::AccountingFrameworkConfig;
6542 use datasynth_core::models::StatementType;
6543 use datasynth_generators::period_close::notes_generator::{
6544 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6545 };
6546
6547 let seed = self.seed;
6548 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6549 {
6550 Ok(d) => d,
6551 Err(_) => return,
6552 };
6553
6554 let mut notes_gen = NotesGenerator::new(seed + 4235);
6555
6556 for company in &self.config.companies {
6557 let last_period_end = start_date
6558 + chrono::Months::new(self.config.global.period_months)
6559 - chrono::Days::new(1);
6560 let fiscal_year = last_period_end.year() as u16;
6561
6562 let entity_is = financial_reporting
6564 .standalone_statements
6565 .get(&company.code)
6566 .and_then(|stmts| {
6567 stmts.iter().find(|s| {
6568 s.fiscal_year == fiscal_year
6569 && s.statement_type == StatementType::IncomeStatement
6570 })
6571 });
6572 let entity_bs = financial_reporting
6573 .standalone_statements
6574 .get(&company.code)
6575 .and_then(|stmts| {
6576 stmts.iter().find(|s| {
6577 s.fiscal_year == fiscal_year
6578 && s.statement_type == StatementType::BalanceSheet
6579 })
6580 });
6581
6582 let revenue_amount = entity_is
6584 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6585 .map(|li| li.amount);
6586 let ppe_gross = entity_bs
6587 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6588 .map(|li| li.amount);
6589
6590 let framework = match self
6591 .config
6592 .accounting_standards
6593 .framework
6594 .unwrap_or_default()
6595 {
6596 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6597 "IFRS".to_string()
6598 }
6599 _ => "US GAAP".to_string(),
6600 };
6601
6602 let (entity_dta, entity_dtl) = {
6605 let mut dta = rust_decimal::Decimal::ZERO;
6606 let mut dtl = rust_decimal::Decimal::ZERO;
6607 for rf in &tax.deferred_tax.rollforwards {
6608 if rf.entity_code == company.code {
6609 dta += rf.closing_dta;
6610 dtl += rf.closing_dtl;
6611 }
6612 }
6613 (
6614 if dta > rust_decimal::Decimal::ZERO {
6615 Some(dta)
6616 } else {
6617 None
6618 },
6619 if dtl > rust_decimal::Decimal::ZERO {
6620 Some(dtl)
6621 } else {
6622 None
6623 },
6624 )
6625 };
6626
6627 let entity_provisions: Vec<_> = accounting_standards
6630 .provisions
6631 .iter()
6632 .filter(|p| p.entity_code == company.code)
6633 .collect();
6634 let provision_count = entity_provisions.len();
6635 let total_provisions = if provision_count > 0 {
6636 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6637 } else {
6638 None
6639 };
6640
6641 let entity_pension_plan_count = hr
6643 .pension_plans
6644 .iter()
6645 .filter(|p| p.entity_code == company.code)
6646 .count();
6647 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6648 let sum: rust_decimal::Decimal = hr
6649 .pension_disclosures
6650 .iter()
6651 .filter(|d| {
6652 hr.pension_plans
6653 .iter()
6654 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6655 })
6656 .map(|d| d.net_pension_liability)
6657 .sum();
6658 let plan_assets_sum: rust_decimal::Decimal = hr
6659 .pension_plan_assets
6660 .iter()
6661 .filter(|a| {
6662 hr.pension_plans
6663 .iter()
6664 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6665 })
6666 .map(|a| a.fair_value_closing)
6667 .sum();
6668 if entity_pension_plan_count > 0 {
6669 Some(sum + plan_assets_sum)
6670 } else {
6671 None
6672 }
6673 };
6674 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6675 let sum: rust_decimal::Decimal = hr
6676 .pension_plan_assets
6677 .iter()
6678 .filter(|a| {
6679 hr.pension_plans
6680 .iter()
6681 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6682 })
6683 .map(|a| a.fair_value_closing)
6684 .sum();
6685 if entity_pension_plan_count > 0 {
6686 Some(sum)
6687 } else {
6688 None
6689 }
6690 };
6691
6692 let rp_count = audit.related_party_transactions.len();
6695 let se_count = audit.subsequent_events.len();
6696 let adjusting_count = audit
6697 .subsequent_events
6698 .iter()
6699 .filter(|e| {
6700 matches!(
6701 e.classification,
6702 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6703 )
6704 })
6705 .count();
6706
6707 let ctx = NotesGeneratorContext {
6708 entity_code: company.code.clone(),
6709 framework,
6710 period: format!("FY{}", fiscal_year),
6711 period_end: last_period_end,
6712 currency: company.currency.clone(),
6713 revenue_amount,
6714 total_ppe_gross: ppe_gross,
6715 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6716 deferred_tax_asset: entity_dta,
6718 deferred_tax_liability: entity_dtl,
6719 provision_count,
6721 total_provisions,
6722 pension_plan_count: entity_pension_plan_count,
6724 total_dbo: entity_total_dbo,
6725 total_plan_assets: entity_total_plan_assets,
6726 related_party_transaction_count: rp_count,
6728 subsequent_event_count: se_count,
6729 adjusting_event_count: adjusting_count,
6730 ..NotesGeneratorContext::default()
6731 };
6732
6733 let entity_notes = notes_gen.generate(&ctx);
6734 let standard_note_count = entity_notes.len() as u32;
6735 info!(
6736 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6737 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6738 );
6739 financial_reporting
6740 .notes_to_financial_statements
6741 .extend(entity_notes);
6742
6743 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6745 .debt_instruments
6746 .iter()
6747 .filter(|d| d.entity_id == company.code)
6748 .map(|d| {
6749 (
6750 format!("{:?}", d.instrument_type),
6751 d.principal,
6752 d.maturity_date.to_string(),
6753 )
6754 })
6755 .collect();
6756
6757 let hedge_count = treasury.hedge_relationships.len();
6758 let effective_hedges = treasury
6759 .hedge_relationships
6760 .iter()
6761 .filter(|h| h.is_effective)
6762 .count();
6763 let total_notional: rust_decimal::Decimal = treasury
6764 .hedging_instruments
6765 .iter()
6766 .map(|h| h.notional_amount)
6767 .sum();
6768 let total_fair_value: rust_decimal::Decimal = treasury
6769 .hedging_instruments
6770 .iter()
6771 .map(|h| h.fair_value)
6772 .sum();
6773
6774 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6776 .provisions
6777 .iter()
6778 .filter(|p| p.entity_code == company.code)
6779 .map(|p| p.id.as_str())
6780 .collect();
6781 let provision_movements: Vec<(
6782 String,
6783 rust_decimal::Decimal,
6784 rust_decimal::Decimal,
6785 rust_decimal::Decimal,
6786 )> = accounting_standards
6787 .provision_movements
6788 .iter()
6789 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6790 .map(|m| {
6791 let prov_type = accounting_standards
6792 .provisions
6793 .iter()
6794 .find(|p| p.id == m.provision_id)
6795 .map(|p| format!("{:?}", p.provision_type))
6796 .unwrap_or_else(|| "Unknown".to_string());
6797 (prov_type, m.opening, m.additions, m.closing)
6798 })
6799 .collect();
6800
6801 let enhanced_ctx = EnhancedNotesContext {
6802 entity_code: company.code.clone(),
6803 period: format!("FY{}", fiscal_year),
6804 currency: company.currency.clone(),
6805 finished_goods_value: rust_decimal::Decimal::ZERO,
6807 wip_value: rust_decimal::Decimal::ZERO,
6808 raw_materials_value: rust_decimal::Decimal::ZERO,
6809 debt_instruments,
6810 hedge_count,
6811 effective_hedges,
6812 total_notional,
6813 total_fair_value,
6814 provision_movements,
6815 };
6816
6817 let enhanced_notes =
6818 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6819 if !enhanced_notes.is_empty() {
6820 info!(
6821 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6822 company.code,
6823 enhanced_notes.len(),
6824 enhanced_ctx.debt_instruments.len(),
6825 hedge_count,
6826 enhanced_ctx.provision_movements.len(),
6827 );
6828 financial_reporting
6829 .notes_to_financial_statements
6830 .extend(enhanced_notes);
6831 }
6832 }
6833 }
6834
6835 fn build_trial_balance_from_entries(
6841 journal_entries: &[JournalEntry],
6842 coa: &ChartOfAccounts,
6843 company_code: &str,
6844 fiscal_year: u16,
6845 fiscal_period: u8,
6846 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6847 use rust_decimal::Decimal;
6848
6849 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6851 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6852
6853 for je in journal_entries {
6854 if je.header.company_code != company_code
6856 || je.header.fiscal_year != fiscal_year
6857 || je.header.fiscal_period != fiscal_period
6858 {
6859 continue;
6860 }
6861
6862 for line in &je.lines {
6863 let acct = &line.gl_account;
6864 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6865 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6866 }
6867 }
6868
6869 let mut all_accounts: Vec<&String> = account_debits
6871 .keys()
6872 .chain(account_credits.keys())
6873 .collect::<std::collections::HashSet<_>>()
6874 .into_iter()
6875 .collect();
6876 all_accounts.sort();
6877
6878 let mut entries = Vec::new();
6879
6880 for acct_number in all_accounts {
6881 let debit = account_debits
6882 .get(acct_number)
6883 .copied()
6884 .unwrap_or(Decimal::ZERO);
6885 let credit = account_credits
6886 .get(acct_number)
6887 .copied()
6888 .unwrap_or(Decimal::ZERO);
6889
6890 if debit.is_zero() && credit.is_zero() {
6891 continue;
6892 }
6893
6894 let account_name = coa
6896 .get_account(acct_number)
6897 .map(|gl| gl.short_description.clone())
6898 .unwrap_or_else(|| format!("Account {acct_number}"));
6899
6900 let category = Self::category_from_account_code(acct_number);
6905
6906 entries.push(datasynth_generators::TrialBalanceEntry {
6907 account_code: acct_number.clone(),
6908 account_name,
6909 category,
6910 debit_balance: debit,
6911 credit_balance: credit,
6912 });
6913 }
6914
6915 entries
6916 }
6917
6918 fn build_cumulative_trial_balance(
6925 journal_entries: &[JournalEntry],
6926 coa: &ChartOfAccounts,
6927 company_code: &str,
6928 start_date: NaiveDate,
6929 period_end: NaiveDate,
6930 fiscal_year: u16,
6931 fiscal_period: u8,
6932 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6933 use rust_decimal::Decimal;
6934
6935 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6937 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6938
6939 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6941 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6942
6943 for je in journal_entries {
6944 if je.header.company_code != company_code {
6945 continue;
6946 }
6947
6948 for line in &je.lines {
6949 let acct = &line.gl_account;
6950 let category = Self::category_from_account_code(acct);
6951 let is_bs_account = matches!(
6952 category.as_str(),
6953 "Cash"
6954 | "Receivables"
6955 | "Inventory"
6956 | "FixedAssets"
6957 | "Payables"
6958 | "AccruedLiabilities"
6959 | "LongTermDebt"
6960 | "Equity"
6961 );
6962
6963 if is_bs_account {
6964 if je.header.document_date <= period_end
6966 && je.header.document_date >= start_date
6967 {
6968 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6969 line.debit_amount;
6970 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6971 line.credit_amount;
6972 }
6973 } else {
6974 if je.header.fiscal_year == fiscal_year
6976 && je.header.fiscal_period == fiscal_period
6977 {
6978 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6979 line.debit_amount;
6980 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6981 line.credit_amount;
6982 }
6983 }
6984 }
6985 }
6986
6987 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6989 all_accounts.extend(bs_debits.keys().cloned());
6990 all_accounts.extend(bs_credits.keys().cloned());
6991 all_accounts.extend(is_debits.keys().cloned());
6992 all_accounts.extend(is_credits.keys().cloned());
6993
6994 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6995 sorted_accounts.sort();
6996
6997 let mut entries = Vec::new();
6998
6999 for acct_number in &sorted_accounts {
7000 let category = Self::category_from_account_code(acct_number);
7001 let is_bs_account = matches!(
7002 category.as_str(),
7003 "Cash"
7004 | "Receivables"
7005 | "Inventory"
7006 | "FixedAssets"
7007 | "Payables"
7008 | "AccruedLiabilities"
7009 | "LongTermDebt"
7010 | "Equity"
7011 );
7012
7013 let (debit, credit) = if is_bs_account {
7014 (
7015 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7016 bs_credits
7017 .get(acct_number)
7018 .copied()
7019 .unwrap_or(Decimal::ZERO),
7020 )
7021 } else {
7022 (
7023 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7024 is_credits
7025 .get(acct_number)
7026 .copied()
7027 .unwrap_or(Decimal::ZERO),
7028 )
7029 };
7030
7031 if debit.is_zero() && credit.is_zero() {
7032 continue;
7033 }
7034
7035 let account_name = coa
7036 .get_account(acct_number)
7037 .map(|gl| gl.short_description.clone())
7038 .unwrap_or_else(|| format!("Account {acct_number}"));
7039
7040 entries.push(datasynth_generators::TrialBalanceEntry {
7041 account_code: acct_number.clone(),
7042 account_name,
7043 category,
7044 debit_balance: debit,
7045 credit_balance: credit,
7046 });
7047 }
7048
7049 entries
7050 }
7051
7052 fn build_cash_flow_from_trial_balances(
7057 current_tb: &[datasynth_generators::TrialBalanceEntry],
7058 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7059 net_income: rust_decimal::Decimal,
7060 ) -> Vec<CashFlowItem> {
7061 use rust_decimal::Decimal;
7062
7063 let aggregate =
7065 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7066 let mut map: HashMap<String, Decimal> = HashMap::new();
7067 for entry in tb {
7068 let net = entry.debit_balance - entry.credit_balance;
7069 *map.entry(entry.category.clone()).or_default() += net;
7070 }
7071 map
7072 };
7073
7074 let current = aggregate(current_tb);
7075 let prior = prior_tb.map(aggregate);
7076
7077 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7079 *map.get(key).unwrap_or(&Decimal::ZERO)
7080 };
7081
7082 let change = |key: &str| -> Decimal {
7084 let curr = get(¤t, key);
7085 match &prior {
7086 Some(p) => curr - get(p, key),
7087 None => curr,
7088 }
7089 };
7090
7091 let fixed_asset_change = change("FixedAssets");
7094 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7095 -fixed_asset_change
7096 } else {
7097 Decimal::ZERO
7098 };
7099
7100 let ar_change = change("Receivables");
7102 let inventory_change = change("Inventory");
7103 let ap_change = change("Payables");
7105 let accrued_change = change("AccruedLiabilities");
7106
7107 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7108 + (-ap_change)
7109 + (-accrued_change);
7110
7111 let capex = if fixed_asset_change > Decimal::ZERO {
7113 -fixed_asset_change
7114 } else {
7115 Decimal::ZERO
7116 };
7117 let investing_cf = capex;
7118
7119 let debt_change = -change("LongTermDebt");
7121 let equity_change = -change("Equity");
7122 let financing_cf = debt_change + equity_change;
7123
7124 let net_change = operating_cf + investing_cf + financing_cf;
7125
7126 vec![
7127 CashFlowItem {
7128 item_code: "CF-NI".to_string(),
7129 label: "Net Income".to_string(),
7130 category: CashFlowCategory::Operating,
7131 amount: net_income,
7132 amount_prior: None,
7133 sort_order: 1,
7134 is_total: false,
7135 },
7136 CashFlowItem {
7137 item_code: "CF-DEP".to_string(),
7138 label: "Depreciation & Amortization".to_string(),
7139 category: CashFlowCategory::Operating,
7140 amount: depreciation_addback,
7141 amount_prior: None,
7142 sort_order: 2,
7143 is_total: false,
7144 },
7145 CashFlowItem {
7146 item_code: "CF-AR".to_string(),
7147 label: "Change in Accounts Receivable".to_string(),
7148 category: CashFlowCategory::Operating,
7149 amount: -ar_change,
7150 amount_prior: None,
7151 sort_order: 3,
7152 is_total: false,
7153 },
7154 CashFlowItem {
7155 item_code: "CF-AP".to_string(),
7156 label: "Change in Accounts Payable".to_string(),
7157 category: CashFlowCategory::Operating,
7158 amount: -ap_change,
7159 amount_prior: None,
7160 sort_order: 4,
7161 is_total: false,
7162 },
7163 CashFlowItem {
7164 item_code: "CF-INV".to_string(),
7165 label: "Change in Inventory".to_string(),
7166 category: CashFlowCategory::Operating,
7167 amount: -inventory_change,
7168 amount_prior: None,
7169 sort_order: 5,
7170 is_total: false,
7171 },
7172 CashFlowItem {
7173 item_code: "CF-OP".to_string(),
7174 label: "Net Cash from Operating Activities".to_string(),
7175 category: CashFlowCategory::Operating,
7176 amount: operating_cf,
7177 amount_prior: None,
7178 sort_order: 6,
7179 is_total: true,
7180 },
7181 CashFlowItem {
7182 item_code: "CF-CAPEX".to_string(),
7183 label: "Capital Expenditures".to_string(),
7184 category: CashFlowCategory::Investing,
7185 amount: capex,
7186 amount_prior: None,
7187 sort_order: 7,
7188 is_total: false,
7189 },
7190 CashFlowItem {
7191 item_code: "CF-INV-T".to_string(),
7192 label: "Net Cash from Investing Activities".to_string(),
7193 category: CashFlowCategory::Investing,
7194 amount: investing_cf,
7195 amount_prior: None,
7196 sort_order: 8,
7197 is_total: true,
7198 },
7199 CashFlowItem {
7200 item_code: "CF-DEBT".to_string(),
7201 label: "Net Borrowings / (Repayments)".to_string(),
7202 category: CashFlowCategory::Financing,
7203 amount: debt_change,
7204 amount_prior: None,
7205 sort_order: 9,
7206 is_total: false,
7207 },
7208 CashFlowItem {
7209 item_code: "CF-EQ".to_string(),
7210 label: "Equity Changes".to_string(),
7211 category: CashFlowCategory::Financing,
7212 amount: equity_change,
7213 amount_prior: None,
7214 sort_order: 10,
7215 is_total: false,
7216 },
7217 CashFlowItem {
7218 item_code: "CF-FIN-T".to_string(),
7219 label: "Net Cash from Financing Activities".to_string(),
7220 category: CashFlowCategory::Financing,
7221 amount: financing_cf,
7222 amount_prior: None,
7223 sort_order: 11,
7224 is_total: true,
7225 },
7226 CashFlowItem {
7227 item_code: "CF-NET".to_string(),
7228 label: "Net Change in Cash".to_string(),
7229 category: CashFlowCategory::Operating,
7230 amount: net_change,
7231 amount_prior: None,
7232 sort_order: 12,
7233 is_total: true,
7234 },
7235 ]
7236 }
7237
7238 fn calculate_net_income_from_tb(
7242 tb: &[datasynth_generators::TrialBalanceEntry],
7243 ) -> rust_decimal::Decimal {
7244 use rust_decimal::Decimal;
7245
7246 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7247 for entry in tb {
7248 let net = entry.debit_balance - entry.credit_balance;
7249 *aggregated.entry(entry.category.clone()).or_default() += net;
7250 }
7251
7252 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7253 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7254 let opex = *aggregated
7255 .get("OperatingExpenses")
7256 .unwrap_or(&Decimal::ZERO);
7257 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7258 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7259
7260 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7263 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7265 operating_income - tax
7266 }
7267
7268 fn category_from_account_code(code: &str) -> String {
7275 let prefix: String = code.chars().take(2).collect();
7276 match prefix.as_str() {
7277 "10" => "Cash",
7278 "11" => "Receivables",
7279 "12" | "13" | "14" => "Inventory",
7280 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7281 "20" => "Payables",
7282 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7283 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7284 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7285 "40" | "41" | "42" | "43" | "44" => "Revenue",
7286 "50" | "51" | "52" => "CostOfSales",
7287 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7288 "OperatingExpenses"
7289 }
7290 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7291 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7292 _ => "OperatingExpenses",
7293 }
7294 .to_string()
7295 }
7296
7297 fn phase_hr_data(
7299 &mut self,
7300 stats: &mut EnhancedGenerationStatistics,
7301 ) -> SynthResult<HrSnapshot> {
7302 if !self.phase_config.generate_hr {
7303 debug!("Phase 16: Skipped (HR generation disabled)");
7304 return Ok(HrSnapshot::default());
7305 }
7306
7307 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7308
7309 let seed = self.seed;
7310 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7311 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7312 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7313 let company_code = self
7314 .config
7315 .companies
7316 .first()
7317 .map(|c| c.code.as_str())
7318 .unwrap_or("1000");
7319 let currency = self
7320 .config
7321 .companies
7322 .first()
7323 .map(|c| c.currency.as_str())
7324 .unwrap_or("USD");
7325
7326 let employee_ids: Vec<String> = self
7327 .master_data
7328 .employees
7329 .iter()
7330 .map(|e| e.employee_id.clone())
7331 .collect();
7332
7333 if employee_ids.is_empty() {
7334 debug!("Phase 16: Skipped (no employees available)");
7335 return Ok(HrSnapshot::default());
7336 }
7337
7338 let cost_center_ids: Vec<String> = self
7341 .master_data
7342 .employees
7343 .iter()
7344 .filter_map(|e| e.cost_center.clone())
7345 .collect::<std::collections::HashSet<_>>()
7346 .into_iter()
7347 .collect();
7348
7349 let mut snapshot = HrSnapshot::default();
7350
7351 if self.config.hr.payroll.enabled {
7353 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7354 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7355
7356 let payroll_pack = self.primary_pack();
7358
7359 payroll_gen.set_country_pack(payroll_pack.clone());
7362
7363 let employees_with_salary: Vec<(
7364 String,
7365 rust_decimal::Decimal,
7366 Option<String>,
7367 Option<String>,
7368 )> = self
7369 .master_data
7370 .employees
7371 .iter()
7372 .map(|e| {
7373 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7376 e.base_salary
7377 } else {
7378 rust_decimal::Decimal::from(60_000)
7379 };
7380 (
7381 e.employee_id.clone(),
7382 annual, e.cost_center.clone(),
7384 e.department_id.clone(),
7385 )
7386 })
7387 .collect();
7388
7389 let change_history = &self.master_data.employee_change_history;
7392 let has_changes = !change_history.is_empty();
7393 if has_changes {
7394 debug!(
7395 "Payroll will incorporate {} employee change events",
7396 change_history.len()
7397 );
7398 }
7399
7400 for month in 0..self.config.global.period_months {
7401 let period_start = start_date + chrono::Months::new(month);
7402 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7403 let (run, items) = if has_changes {
7404 payroll_gen.generate_with_changes(
7405 company_code,
7406 &employees_with_salary,
7407 period_start,
7408 period_end,
7409 currency,
7410 change_history,
7411 )
7412 } else {
7413 payroll_gen.generate(
7414 company_code,
7415 &employees_with_salary,
7416 period_start,
7417 period_end,
7418 currency,
7419 )
7420 };
7421 snapshot.payroll_runs.push(run);
7422 snapshot.payroll_run_count += 1;
7423 snapshot.payroll_line_item_count += items.len();
7424 snapshot.payroll_line_items.extend(items);
7425 }
7426 }
7427
7428 if self.config.hr.time_attendance.enabled {
7430 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7431 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7432 if let Some(ctx) = &self.temporal_context {
7436 time_gen.set_temporal_context(Arc::clone(ctx));
7437 }
7438 let entries = time_gen.generate(
7439 &employee_ids,
7440 start_date,
7441 end_date,
7442 &self.config.hr.time_attendance,
7443 );
7444 snapshot.time_entry_count = entries.len();
7445 snapshot.time_entries = entries;
7446 }
7447
7448 if self.config.hr.expenses.enabled {
7450 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7451 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7452 expense_gen.set_country_pack(self.primary_pack().clone());
7453 if let Some(ctx) = &self.temporal_context {
7456 expense_gen.set_temporal_context(Arc::clone(ctx));
7457 }
7458 let company_currency = self
7459 .config
7460 .companies
7461 .first()
7462 .map(|c| c.currency.as_str())
7463 .unwrap_or("USD");
7464 let reports = expense_gen.generate_with_currency(
7465 &employee_ids,
7466 start_date,
7467 end_date,
7468 &self.config.hr.expenses,
7469 company_currency,
7470 );
7471 snapshot.expense_report_count = reports.len();
7472 snapshot.expense_reports = reports;
7473 }
7474
7475 if self.config.hr.payroll.enabled {
7477 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7478 let employee_pairs: Vec<(String, String)> = self
7479 .master_data
7480 .employees
7481 .iter()
7482 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7483 .collect();
7484 let enrollments =
7485 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7486 snapshot.benefit_enrollment_count = enrollments.len();
7487 snapshot.benefit_enrollments = enrollments;
7488 }
7489
7490 if self.phase_config.generate_hr {
7492 let entity_name = self
7493 .config
7494 .companies
7495 .first()
7496 .map(|c| c.name.as_str())
7497 .unwrap_or("Entity");
7498 let period_months = self.config.global.period_months;
7499 let period_label = {
7500 let y = start_date.year();
7501 let m = start_date.month();
7502 if period_months >= 12 {
7503 format!("FY{y}")
7504 } else {
7505 format!("{y}-{m:02}")
7506 }
7507 };
7508 let reporting_date =
7509 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7510
7511 let avg_salary: Option<rust_decimal::Decimal> = {
7516 let employee_count = employee_ids.len();
7517 if self.config.hr.payroll.enabled
7518 && employee_count > 0
7519 && !snapshot.payroll_runs.is_empty()
7520 {
7521 let total_gross: rust_decimal::Decimal = snapshot
7523 .payroll_runs
7524 .iter()
7525 .filter(|r| r.company_code == company_code)
7526 .map(|r| r.total_gross)
7527 .sum();
7528 if total_gross > rust_decimal::Decimal::ZERO {
7529 let annual_total = if period_months > 0 && period_months < 12 {
7531 total_gross * rust_decimal::Decimal::from(12u32)
7532 / rust_decimal::Decimal::from(period_months)
7533 } else {
7534 total_gross
7535 };
7536 Some(
7537 (annual_total / rust_decimal::Decimal::from(employee_count))
7538 .round_dp(2),
7539 )
7540 } else {
7541 None
7542 }
7543 } else {
7544 None
7545 }
7546 };
7547
7548 let mut pension_gen =
7549 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7550 let pension_snap = pension_gen.generate(
7551 company_code,
7552 entity_name,
7553 &period_label,
7554 reporting_date,
7555 employee_ids.len(),
7556 currency,
7557 avg_salary,
7558 period_months,
7559 );
7560 snapshot.pension_plan_count = pension_snap.plans.len();
7561 snapshot.pension_plans = pension_snap.plans;
7562 snapshot.pension_obligations = pension_snap.obligations;
7563 snapshot.pension_plan_assets = pension_snap.plan_assets;
7564 snapshot.pension_disclosures = pension_snap.disclosures;
7565 snapshot.pension_journal_entries = pension_snap.journal_entries;
7570 }
7571
7572 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7574 let period_months = self.config.global.period_months;
7575 let period_label = {
7576 let y = start_date.year();
7577 let m = start_date.month();
7578 if period_months >= 12 {
7579 format!("FY{y}")
7580 } else {
7581 format!("{y}-{m:02}")
7582 }
7583 };
7584 let reporting_date =
7585 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7586
7587 let mut stock_comp_gen =
7588 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7589 let stock_snap = stock_comp_gen.generate(
7590 company_code,
7591 &employee_ids,
7592 start_date,
7593 &period_label,
7594 reporting_date,
7595 currency,
7596 );
7597 snapshot.stock_grant_count = stock_snap.grants.len();
7598 snapshot.stock_grants = stock_snap.grants;
7599 snapshot.stock_comp_expenses = stock_snap.expenses;
7600 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7601 }
7602
7603 stats.payroll_run_count = snapshot.payroll_run_count;
7604 stats.time_entry_count = snapshot.time_entry_count;
7605 stats.expense_report_count = snapshot.expense_report_count;
7606 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7607 stats.pension_plan_count = snapshot.pension_plan_count;
7608 stats.stock_grant_count = snapshot.stock_grant_count;
7609
7610 info!(
7611 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7612 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7613 snapshot.time_entry_count, snapshot.expense_report_count,
7614 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7615 snapshot.stock_grant_count
7616 );
7617 self.check_resources_with_log("post-hr")?;
7618
7619 Ok(snapshot)
7620 }
7621
7622 fn phase_accounting_standards(
7624 &mut self,
7625 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7626 journal_entries: &[JournalEntry],
7627 stats: &mut EnhancedGenerationStatistics,
7628 ) -> SynthResult<AccountingStandardsSnapshot> {
7629 if !self.phase_config.generate_accounting_standards {
7630 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7631 return Ok(AccountingStandardsSnapshot::default());
7632 }
7633 info!("Phase 17: Generating Accounting Standards Data");
7634
7635 let seed = self.seed;
7636 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7637 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7638 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7639 let company_code = self
7640 .config
7641 .companies
7642 .first()
7643 .map(|c| c.code.as_str())
7644 .unwrap_or("1000");
7645 let currency = self
7646 .config
7647 .companies
7648 .first()
7649 .map(|c| c.currency.as_str())
7650 .unwrap_or("USD");
7651
7652 let framework = match self.config.accounting_standards.framework {
7657 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7658 datasynth_standards::framework::AccountingFramework::UsGaap
7659 }
7660 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7661 datasynth_standards::framework::AccountingFramework::Ifrs
7662 }
7663 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7664 datasynth_standards::framework::AccountingFramework::DualReporting
7665 }
7666 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7667 datasynth_standards::framework::AccountingFramework::FrenchGaap
7668 }
7669 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7670 datasynth_standards::framework::AccountingFramework::GermanGaap
7671 }
7672 None => {
7673 let pack = self.primary_pack();
7675 let pack_fw = pack.accounting.framework.as_str();
7676 match pack_fw {
7677 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7678 "dual_reporting" => {
7679 datasynth_standards::framework::AccountingFramework::DualReporting
7680 }
7681 "french_gaap" => {
7682 datasynth_standards::framework::AccountingFramework::FrenchGaap
7683 }
7684 "german_gaap" | "hgb" => {
7685 datasynth_standards::framework::AccountingFramework::GermanGaap
7686 }
7687 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7689 }
7690 }
7691 };
7692
7693 let mut snapshot = AccountingStandardsSnapshot::default();
7694
7695 if self.config.accounting_standards.revenue_recognition.enabled {
7697 let customer_ids: Vec<String> = self
7698 .master_data
7699 .customers
7700 .iter()
7701 .map(|c| c.customer_id.clone())
7702 .collect();
7703
7704 if !customer_ids.is_empty() {
7705 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7706 let contracts = rev_gen.generate(
7707 company_code,
7708 &customer_ids,
7709 start_date,
7710 end_date,
7711 currency,
7712 &self.config.accounting_standards.revenue_recognition,
7713 framework,
7714 );
7715 snapshot.revenue_contract_count = contracts.len();
7716 snapshot.contracts = contracts;
7717 }
7718 }
7719
7720 if self.config.accounting_standards.impairment.enabled {
7722 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7723 .master_data
7724 .assets
7725 .iter()
7726 .map(|a| {
7727 (
7728 a.asset_id.clone(),
7729 a.description.clone(),
7730 a.acquisition_cost,
7731 )
7732 })
7733 .collect();
7734
7735 if !asset_data.is_empty() {
7736 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7737 let tests = imp_gen.generate(
7738 company_code,
7739 &asset_data,
7740 end_date,
7741 &self.config.accounting_standards.impairment,
7742 framework,
7743 );
7744 snapshot.impairment_test_count = tests.len();
7745 snapshot.impairment_tests = tests;
7746 }
7747 }
7748
7749 if self
7751 .config
7752 .accounting_standards
7753 .business_combinations
7754 .enabled
7755 {
7756 let bc_config = &self.config.accounting_standards.business_combinations;
7757 let framework_str = match framework {
7758 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7759 _ => "US_GAAP",
7760 };
7761 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7762 let bc_snap = bc_gen.generate(
7763 company_code,
7764 currency,
7765 start_date,
7766 end_date,
7767 bc_config.acquisition_count,
7768 framework_str,
7769 );
7770 snapshot.business_combination_count = bc_snap.combinations.len();
7771 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7772 snapshot.business_combinations = bc_snap.combinations;
7773 }
7774
7775 if self
7777 .config
7778 .accounting_standards
7779 .expected_credit_loss
7780 .enabled
7781 {
7782 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7783 let framework_str = match framework {
7784 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7785 _ => "ASC_326",
7786 };
7787
7788 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7791
7792 let mut ecl_gen = EclGenerator::new(seed + 43);
7793
7794 let bucket_exposures: Vec<(
7796 datasynth_core::models::subledger::ar::AgingBucket,
7797 rust_decimal::Decimal,
7798 )> = if ar_aging_reports.is_empty() {
7799 use datasynth_core::models::subledger::ar::AgingBucket;
7801 vec![
7802 (
7803 AgingBucket::Current,
7804 rust_decimal::Decimal::from(500_000_u32),
7805 ),
7806 (
7807 AgingBucket::Days1To30,
7808 rust_decimal::Decimal::from(120_000_u32),
7809 ),
7810 (
7811 AgingBucket::Days31To60,
7812 rust_decimal::Decimal::from(45_000_u32),
7813 ),
7814 (
7815 AgingBucket::Days61To90,
7816 rust_decimal::Decimal::from(15_000_u32),
7817 ),
7818 (
7819 AgingBucket::Over90Days,
7820 rust_decimal::Decimal::from(8_000_u32),
7821 ),
7822 ]
7823 } else {
7824 use datasynth_core::models::subledger::ar::AgingBucket;
7825 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7827 std::collections::HashMap::new();
7828 for report in ar_aging_reports {
7829 for (bucket, amount) in &report.bucket_totals {
7830 *totals.entry(*bucket).or_default() += amount;
7831 }
7832 }
7833 AgingBucket::all()
7834 .into_iter()
7835 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7836 .collect()
7837 };
7838
7839 let ecl_snap = ecl_gen.generate(
7840 company_code,
7841 end_date,
7842 &bucket_exposures,
7843 ecl_config,
7844 &period_label,
7845 framework_str,
7846 );
7847
7848 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7849 snapshot.ecl_models = ecl_snap.ecl_models;
7850 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7851 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7852 }
7853
7854 {
7856 let framework_str = match framework {
7857 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7858 _ => "US_GAAP",
7859 };
7860
7861 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7866 .max(rust_decimal::Decimal::from(100_000_u32));
7867
7868 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7869
7870 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7871 let prov_snap = prov_gen.generate(
7872 company_code,
7873 currency,
7874 revenue_proxy,
7875 end_date,
7876 &period_label,
7877 framework_str,
7878 None, );
7880
7881 snapshot.provision_count = prov_snap.provisions.len();
7882 snapshot.provisions = prov_snap.provisions;
7883 snapshot.provision_movements = prov_snap.movements;
7884 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7885 snapshot.provision_journal_entries = prov_snap.journal_entries;
7886 }
7887
7888 {
7892 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7893
7894 let presentation_currency = self
7895 .config
7896 .global
7897 .presentation_currency
7898 .clone()
7899 .unwrap_or_else(|| self.config.global.group_currency.clone());
7900
7901 let mut rate_table = FxRateTable::new(&presentation_currency);
7904
7905 let base_rates = base_rates_usd();
7909 for (ccy, rate) in &base_rates {
7910 rate_table.add_rate(FxRate::new(
7911 ccy,
7912 "USD",
7913 RateType::Closing,
7914 end_date,
7915 *rate,
7916 "SYNTHETIC",
7917 ));
7918 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7921 rate_table.add_rate(FxRate::new(
7922 ccy,
7923 "USD",
7924 RateType::Average,
7925 end_date,
7926 avg,
7927 "SYNTHETIC",
7928 ));
7929 }
7930
7931 let mut translation_results = Vec::new();
7932 for company in &self.config.companies {
7933 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7936 .max(rust_decimal::Decimal::from(100_000_u32));
7937
7938 let func_ccy = company
7939 .functional_currency
7940 .clone()
7941 .unwrap_or_else(|| company.currency.clone());
7942
7943 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7944 &company.code,
7945 &func_ccy,
7946 &presentation_currency,
7947 &ias21_period_label,
7948 end_date,
7949 company_revenue,
7950 &rate_table,
7951 );
7952 translation_results.push(result);
7953 }
7954
7955 snapshot.currency_translation_count = translation_results.len();
7956 snapshot.currency_translation_results = translation_results;
7957 }
7958
7959 stats.revenue_contract_count = snapshot.revenue_contract_count;
7960 stats.impairment_test_count = snapshot.impairment_test_count;
7961 stats.business_combination_count = snapshot.business_combination_count;
7962 stats.ecl_model_count = snapshot.ecl_model_count;
7963 stats.provision_count = snapshot.provision_count;
7964
7965 if self.config.accounting_standards.leases.enabled {
7969 use datasynth_generators::standards::LeaseGenerator;
7970 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7971 .unwrap_or_else(|_| {
7972 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7973 });
7974 let framework =
7975 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7976 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7977 for company in &self.config.companies {
7978 let leases = lease_gen.generate(
7979 &company.code,
7980 start_date,
7981 &self.config.accounting_standards.leases,
7982 framework,
7983 );
7984 snapshot.lease_count += leases.len();
7985 snapshot.leases.extend(leases);
7986 }
7987 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7988 }
7989
7990 if self.config.accounting_standards.fair_value.enabled {
7994 use datasynth_generators::standards::FairValueGenerator;
7995 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7996 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7997 + chrono::Months::new(self.config.global.period_months);
7998 let framework =
7999 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8000 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8001 for company in &self.config.companies {
8002 let measurements = fv_gen.generate(
8003 &company.code,
8004 end_date,
8005 &company.currency,
8006 &self.config.accounting_standards.fair_value,
8007 framework,
8008 );
8009 snapshot.fair_value_measurement_count += measurements.len();
8010 snapshot.fair_value_measurements.extend(measurements);
8011 }
8012 info!(
8013 "v3.3.1 fair value measurements: {}",
8014 snapshot.fair_value_measurement_count
8015 );
8016 }
8017
8018 if self.config.accounting_standards.generate_differences
8022 && matches!(
8023 self.config.accounting_standards.framework,
8024 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8025 )
8026 {
8027 use datasynth_generators::standards::FrameworkReconciliationGenerator;
8028 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8029 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8030 + chrono::Months::new(self.config.global.period_months);
8031 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8032 for company in &self.config.companies {
8033 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8034 snapshot.framework_difference_count += records.len();
8035 snapshot.framework_differences.extend(records);
8036 snapshot.framework_reconciliations.push(reconciliation);
8037 }
8038 info!(
8039 "v3.3.1 framework reconciliation: {} differences across {} entities",
8040 snapshot.framework_difference_count,
8041 snapshot.framework_reconciliations.len()
8042 );
8043 }
8044
8045 info!(
8046 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8047 snapshot.revenue_contract_count,
8048 snapshot.impairment_test_count,
8049 snapshot.business_combination_count,
8050 snapshot.ecl_model_count,
8051 snapshot.provision_count,
8052 snapshot.currency_translation_count,
8053 snapshot.lease_count,
8054 snapshot.fair_value_measurement_count,
8055 snapshot.framework_difference_count,
8056 );
8057 self.check_resources_with_log("post-accounting-standards")?;
8058
8059 Ok(snapshot)
8060 }
8061
8062 fn resolve_accounting_framework(
8066 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8067 ) -> datasynth_standards::framework::AccountingFramework {
8068 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8069 use datasynth_standards::framework::AccountingFramework as Fw;
8070 match cfg {
8071 Some(Cfg::Ifrs) => Fw::Ifrs,
8072 Some(Cfg::DualReporting) => Fw::DualReporting,
8073 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8074 Some(Cfg::GermanGaap) => Fw::GermanGaap,
8075 _ => Fw::UsGaap,
8076 }
8077 }
8078
8079 fn phase_manufacturing(
8081 &mut self,
8082 stats: &mut EnhancedGenerationStatistics,
8083 ) -> SynthResult<ManufacturingSnapshot> {
8084 if !self.phase_config.generate_manufacturing {
8085 debug!("Phase 18: Skipped (manufacturing generation disabled)");
8086 return Ok(ManufacturingSnapshot::default());
8087 }
8088 info!("Phase 18: Generating Manufacturing Data");
8089
8090 let seed = self.seed;
8091 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8092 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8093 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8094 let company_code = self
8095 .config
8096 .companies
8097 .first()
8098 .map(|c| c.code.as_str())
8099 .unwrap_or("1000");
8100
8101 let material_data: Vec<(String, String)> = self
8102 .master_data
8103 .materials
8104 .iter()
8105 .map(|m| (m.material_id.clone(), m.description.clone()))
8106 .collect();
8107
8108 if material_data.is_empty() {
8109 debug!("Phase 18: Skipped (no materials available)");
8110 return Ok(ManufacturingSnapshot::default());
8111 }
8112
8113 let mut snapshot = ManufacturingSnapshot::default();
8114
8115 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8117 if let Some(ctx) = &self.temporal_context {
8119 prod_gen.set_temporal_context(Arc::clone(ctx));
8120 }
8121 let production_orders = prod_gen.generate(
8122 company_code,
8123 &material_data,
8124 start_date,
8125 end_date,
8126 &self.config.manufacturing.production_orders,
8127 &self.config.manufacturing.costing,
8128 &self.config.manufacturing.routing,
8129 );
8130 snapshot.production_order_count = production_orders.len();
8131
8132 let inspection_data: Vec<(String, String, String)> = production_orders
8134 .iter()
8135 .map(|po| {
8136 (
8137 po.order_id.clone(),
8138 po.material_id.clone(),
8139 po.material_description.clone(),
8140 )
8141 })
8142 .collect();
8143
8144 snapshot.production_orders = production_orders;
8145
8146 if !inspection_data.is_empty() {
8147 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8148 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8149 snapshot.quality_inspection_count = inspections.len();
8150 snapshot.quality_inspections = inspections;
8151 }
8152
8153 let storage_locations: Vec<(String, String)> = material_data
8155 .iter()
8156 .enumerate()
8157 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8158 .collect();
8159
8160 let employee_ids: Vec<String> = self
8161 .master_data
8162 .employees
8163 .iter()
8164 .map(|e| e.employee_id.clone())
8165 .collect();
8166 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8167 .with_employee_pool(employee_ids);
8168 let mut cycle_count_total = 0usize;
8169 for month in 0..self.config.global.period_months {
8170 let count_date = start_date + chrono::Months::new(month);
8171 let items_per_count = storage_locations.len().clamp(10, 50);
8172 let cc = cc_gen.generate(
8173 company_code,
8174 &storage_locations,
8175 count_date,
8176 items_per_count,
8177 );
8178 snapshot.cycle_counts.push(cc);
8179 cycle_count_total += 1;
8180 }
8181 snapshot.cycle_count_count = cycle_count_total;
8182
8183 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8185 let bom_components = bom_gen.generate(company_code, &material_data);
8186 snapshot.bom_component_count = bom_components.len();
8187 snapshot.bom_components = bom_components;
8188
8189 let currency = self
8191 .config
8192 .companies
8193 .first()
8194 .map(|c| c.currency.as_str())
8195 .unwrap_or("USD");
8196 let production_order_ids: Vec<String> = snapshot
8197 .production_orders
8198 .iter()
8199 .map(|po| po.order_id.clone())
8200 .collect();
8201 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8202 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8203 company_code,
8204 &material_data,
8205 start_date,
8206 end_date,
8207 2,
8208 currency,
8209 &production_order_ids,
8210 );
8211 snapshot.inventory_movement_count = inventory_movements.len();
8212 snapshot.inventory_movements = inventory_movements;
8213
8214 stats.production_order_count = snapshot.production_order_count;
8215 stats.quality_inspection_count = snapshot.quality_inspection_count;
8216 stats.cycle_count_count = snapshot.cycle_count_count;
8217 stats.bom_component_count = snapshot.bom_component_count;
8218 stats.inventory_movement_count = snapshot.inventory_movement_count;
8219
8220 info!(
8221 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8222 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8223 snapshot.bom_component_count, snapshot.inventory_movement_count
8224 );
8225 self.check_resources_with_log("post-manufacturing")?;
8226
8227 Ok(snapshot)
8228 }
8229
8230 fn phase_sales_kpi_budgets(
8232 &mut self,
8233 coa: &Arc<ChartOfAccounts>,
8234 financial_reporting: &FinancialReportingSnapshot,
8235 stats: &mut EnhancedGenerationStatistics,
8236 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8237 if !self.phase_config.generate_sales_kpi_budgets {
8238 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8239 return Ok(SalesKpiBudgetsSnapshot::default());
8240 }
8241 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8242
8243 let seed = self.seed;
8244 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8245 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8246 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8247 let company_code = self
8248 .config
8249 .companies
8250 .first()
8251 .map(|c| c.code.as_str())
8252 .unwrap_or("1000");
8253
8254 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8255
8256 if self.config.sales_quotes.enabled {
8258 let customer_data: Vec<(String, String)> = self
8259 .master_data
8260 .customers
8261 .iter()
8262 .map(|c| (c.customer_id.clone(), c.name.clone()))
8263 .collect();
8264 let material_data: Vec<(String, String)> = self
8265 .master_data
8266 .materials
8267 .iter()
8268 .map(|m| (m.material_id.clone(), m.description.clone()))
8269 .collect();
8270
8271 if !customer_data.is_empty() && !material_data.is_empty() {
8272 let employee_ids: Vec<String> = self
8273 .master_data
8274 .employees
8275 .iter()
8276 .map(|e| e.employee_id.clone())
8277 .collect();
8278 let customer_ids: Vec<String> = self
8279 .master_data
8280 .customers
8281 .iter()
8282 .map(|c| c.customer_id.clone())
8283 .collect();
8284 let company_currency = self
8285 .config
8286 .companies
8287 .first()
8288 .map(|c| c.currency.as_str())
8289 .unwrap_or("USD");
8290
8291 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8292 .with_pools(employee_ids, customer_ids);
8293 let quotes = quote_gen.generate_with_currency(
8294 company_code,
8295 &customer_data,
8296 &material_data,
8297 start_date,
8298 end_date,
8299 &self.config.sales_quotes,
8300 company_currency,
8301 );
8302 snapshot.sales_quote_count = quotes.len();
8303 snapshot.sales_quotes = quotes;
8304 }
8305 }
8306
8307 if self.config.financial_reporting.management_kpis.enabled {
8309 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8310 let mut kpis = kpi_gen.generate(
8311 company_code,
8312 start_date,
8313 end_date,
8314 &self.config.financial_reporting.management_kpis,
8315 );
8316
8317 {
8319 use rust_decimal::Decimal;
8320
8321 if let Some(income_stmt) =
8322 financial_reporting.financial_statements.iter().find(|fs| {
8323 fs.statement_type == StatementType::IncomeStatement
8324 && fs.company_code == company_code
8325 })
8326 {
8327 let total_revenue: Decimal = income_stmt
8329 .line_items
8330 .iter()
8331 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8332 .map(|li| li.amount)
8333 .sum();
8334 let total_cogs: Decimal = income_stmt
8335 .line_items
8336 .iter()
8337 .filter(|li| {
8338 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8339 && !li.is_total
8340 })
8341 .map(|li| li.amount.abs())
8342 .sum();
8343 let total_opex: Decimal = income_stmt
8344 .line_items
8345 .iter()
8346 .filter(|li| {
8347 li.section.contains("Expense")
8348 && !li.is_total
8349 && !li.section.contains("Cost")
8350 })
8351 .map(|li| li.amount.abs())
8352 .sum();
8353
8354 if total_revenue > Decimal::ZERO {
8355 let hundred = Decimal::from(100);
8356 let gross_margin_pct =
8357 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8358 let operating_income = total_revenue - total_cogs - total_opex;
8359 let op_margin_pct =
8360 (operating_income * hundred / total_revenue).round_dp(2);
8361
8362 for kpi in &mut kpis {
8364 if kpi.name == "Gross Margin" {
8365 kpi.value = gross_margin_pct;
8366 } else if kpi.name == "Operating Margin" {
8367 kpi.value = op_margin_pct;
8368 }
8369 }
8370 }
8371 }
8372
8373 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8375 fs.statement_type == StatementType::BalanceSheet
8376 && fs.company_code == company_code
8377 }) {
8378 let current_assets: Decimal = bs
8379 .line_items
8380 .iter()
8381 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8382 .map(|li| li.amount)
8383 .sum();
8384 let current_liabilities: Decimal = bs
8385 .line_items
8386 .iter()
8387 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8388 .map(|li| li.amount.abs())
8389 .sum();
8390
8391 if current_liabilities > Decimal::ZERO {
8392 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8393 for kpi in &mut kpis {
8394 if kpi.name == "Current Ratio" {
8395 kpi.value = current_ratio;
8396 }
8397 }
8398 }
8399 }
8400 }
8401
8402 snapshot.kpi_count = kpis.len();
8403 snapshot.kpis = kpis;
8404 }
8405
8406 if self.config.financial_reporting.budgets.enabled {
8408 let account_data: Vec<(String, String)> = coa
8409 .accounts
8410 .iter()
8411 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8412 .collect();
8413
8414 if !account_data.is_empty() {
8415 let fiscal_year = start_date.year() as u32;
8416 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8417 let budget = budget_gen.generate(
8418 company_code,
8419 fiscal_year,
8420 &account_data,
8421 &self.config.financial_reporting.budgets,
8422 );
8423 snapshot.budget_line_count = budget.line_items.len();
8424 snapshot.budgets.push(budget);
8425 }
8426 }
8427
8428 stats.sales_quote_count = snapshot.sales_quote_count;
8429 stats.kpi_count = snapshot.kpi_count;
8430 stats.budget_line_count = snapshot.budget_line_count;
8431
8432 info!(
8433 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8434 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8435 );
8436 self.check_resources_with_log("post-sales-kpi-budgets")?;
8437
8438 Ok(snapshot)
8439 }
8440
8441 fn compute_pre_tax_income(
8448 company_code: &str,
8449 journal_entries: &[JournalEntry],
8450 ) -> rust_decimal::Decimal {
8451 use datasynth_core::accounts::AccountCategory;
8452 use rust_decimal::Decimal;
8453
8454 let mut total_revenue = Decimal::ZERO;
8455 let mut total_expenses = Decimal::ZERO;
8456
8457 for je in journal_entries {
8458 if je.header.company_code != company_code {
8459 continue;
8460 }
8461 for line in &je.lines {
8462 let cat = AccountCategory::from_account(&line.gl_account);
8463 match cat {
8464 AccountCategory::Revenue => {
8465 total_revenue += line.credit_amount - line.debit_amount;
8466 }
8467 AccountCategory::Cogs
8468 | AccountCategory::OperatingExpense
8469 | AccountCategory::OtherIncomeExpense => {
8470 total_expenses += line.debit_amount - line.credit_amount;
8471 }
8472 _ => {}
8473 }
8474 }
8475 }
8476
8477 let pti = (total_revenue - total_expenses).round_dp(2);
8478 if pti == rust_decimal::Decimal::ZERO {
8479 rust_decimal::Decimal::from(1_000_000u32)
8482 } else {
8483 pti
8484 }
8485 }
8486
8487 fn phase_tax_generation(
8489 &mut self,
8490 document_flows: &DocumentFlowSnapshot,
8491 journal_entries: &[JournalEntry],
8492 stats: &mut EnhancedGenerationStatistics,
8493 ) -> SynthResult<TaxSnapshot> {
8494 if !self.phase_config.generate_tax {
8495 debug!("Phase 20: Skipped (tax generation disabled)");
8496 return Ok(TaxSnapshot::default());
8497 }
8498 info!("Phase 20: Generating Tax Data");
8499
8500 let seed = self.seed;
8501 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8502 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8503 let fiscal_year = start_date.year();
8504 let company_code = self
8505 .config
8506 .companies
8507 .first()
8508 .map(|c| c.code.as_str())
8509 .unwrap_or("1000");
8510
8511 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8512 seed + 370,
8513 self.config.tax.clone(),
8514 );
8515
8516 let pack = self.primary_pack().clone();
8517 let (jurisdictions, codes) =
8518 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8519
8520 let mut provisions = Vec::new();
8522 if self.config.tax.provisions.enabled {
8523 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8524 for company in &self.config.companies {
8525 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8526 let statutory_rate = rust_decimal::Decimal::new(
8527 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8528 2,
8529 );
8530 let provision = provision_gen.generate(
8531 &company.code,
8532 start_date,
8533 pre_tax_income,
8534 statutory_rate,
8535 );
8536 provisions.push(provision);
8537 }
8538 }
8539
8540 let mut tax_lines = Vec::new();
8542 if !codes.is_empty() {
8543 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8544 datasynth_generators::TaxLineGeneratorConfig::default(),
8545 codes.clone(),
8546 seed + 372,
8547 );
8548
8549 let buyer_country = self
8552 .config
8553 .companies
8554 .first()
8555 .map(|c| c.country.as_str())
8556 .unwrap_or("US");
8557 for vi in &document_flows.vendor_invoices {
8558 let lines = tax_line_gen.generate_for_document(
8559 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8560 &vi.header.document_id,
8561 buyer_country, buyer_country,
8563 vi.payable_amount,
8564 vi.header.document_date,
8565 None,
8566 );
8567 tax_lines.extend(lines);
8568 }
8569
8570 for ci in &document_flows.customer_invoices {
8572 let lines = tax_line_gen.generate_for_document(
8573 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8574 &ci.header.document_id,
8575 buyer_country, buyer_country,
8577 ci.total_gross_amount,
8578 ci.header.document_date,
8579 None,
8580 );
8581 tax_lines.extend(lines);
8582 }
8583 }
8584
8585 let deferred_tax = {
8587 let companies: Vec<(&str, &str)> = self
8588 .config
8589 .companies
8590 .iter()
8591 .map(|c| (c.code.as_str(), c.country.as_str()))
8592 .collect();
8593 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8594 deferred_gen.generate(&companies, start_date, journal_entries)
8595 };
8596
8597 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8600 std::collections::HashMap::new();
8601 for vi in &document_flows.vendor_invoices {
8602 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8603 }
8604 for ci in &document_flows.customer_invoices {
8605 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8606 }
8607
8608 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8611 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8612 &tax_lines,
8613 company_code,
8614 &doc_dates,
8615 end_date,
8616 );
8617 debug!("Generated {} tax posting JEs", jes.len());
8618 jes
8619 } else {
8620 Vec::new()
8621 };
8622
8623 let snapshot = TaxSnapshot {
8624 jurisdiction_count: jurisdictions.len(),
8625 code_count: codes.len(),
8626 jurisdictions,
8627 codes,
8628 tax_provisions: provisions,
8629 tax_lines,
8630 tax_returns: Vec::new(),
8631 withholding_records: Vec::new(),
8632 tax_anomaly_labels: Vec::new(),
8633 deferred_tax,
8634 tax_posting_journal_entries,
8635 };
8636
8637 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8638 stats.tax_code_count = snapshot.code_count;
8639 stats.tax_provision_count = snapshot.tax_provisions.len();
8640 stats.tax_line_count = snapshot.tax_lines.len();
8641
8642 info!(
8643 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8644 snapshot.jurisdiction_count,
8645 snapshot.code_count,
8646 snapshot.tax_provisions.len(),
8647 snapshot.deferred_tax.temporary_differences.len(),
8648 snapshot.deferred_tax.journal_entries.len(),
8649 snapshot.tax_posting_journal_entries.len(),
8650 );
8651 self.check_resources_with_log("post-tax")?;
8652
8653 Ok(snapshot)
8654 }
8655
8656 fn phase_esg_generation(
8658 &mut self,
8659 document_flows: &DocumentFlowSnapshot,
8660 manufacturing: &ManufacturingSnapshot,
8661 stats: &mut EnhancedGenerationStatistics,
8662 ) -> SynthResult<EsgSnapshot> {
8663 if !self.phase_config.generate_esg {
8664 debug!("Phase 21: Skipped (ESG generation disabled)");
8665 return Ok(EsgSnapshot::default());
8666 }
8667 let degradation = self.check_resources()?;
8668 if degradation >= DegradationLevel::Reduced {
8669 debug!(
8670 "Phase skipped due to resource pressure (degradation: {:?})",
8671 degradation
8672 );
8673 return Ok(EsgSnapshot::default());
8674 }
8675 info!("Phase 21: Generating ESG Data");
8676
8677 let seed = self.seed;
8678 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8679 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8680 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8681 let entity_id = self
8682 .config
8683 .companies
8684 .first()
8685 .map(|c| c.code.as_str())
8686 .unwrap_or("1000");
8687
8688 let esg_cfg = &self.config.esg;
8689 let mut snapshot = EsgSnapshot::default();
8690
8691 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8693 esg_cfg.environmental.energy.clone(),
8694 seed + 80,
8695 );
8696 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8697
8698 let facility_count = esg_cfg.environmental.energy.facility_count;
8700 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8701 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8702
8703 let mut waste_gen = datasynth_generators::WasteGenerator::new(
8705 seed + 82,
8706 esg_cfg.environmental.waste.diversion_target,
8707 facility_count,
8708 );
8709 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8710
8711 let mut emission_gen =
8713 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8714
8715 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8717 .iter()
8718 .map(|e| datasynth_generators::EnergyInput {
8719 facility_id: e.facility_id.clone(),
8720 energy_type: match e.energy_source {
8721 EnergySourceType::NaturalGas => {
8722 datasynth_generators::EnergyInputType::NaturalGas
8723 }
8724 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8725 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8726 _ => datasynth_generators::EnergyInputType::Electricity,
8727 },
8728 consumption_kwh: e.consumption_kwh,
8729 period: e.period,
8730 })
8731 .collect();
8732
8733 if !manufacturing.production_orders.is_empty() {
8735 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8736 &manufacturing.production_orders,
8737 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
8740 if !mfg_energy.is_empty() {
8741 info!(
8742 "ESG: {} energy inputs derived from {} production orders",
8743 mfg_energy.len(),
8744 manufacturing.production_orders.len(),
8745 );
8746 energy_inputs.extend(mfg_energy);
8747 }
8748 }
8749
8750 let mut emissions = Vec::new();
8751 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8752 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8753
8754 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8756 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8757 for payment in &document_flows.payments {
8758 if payment.is_vendor {
8759 *totals
8760 .entry(payment.business_partner_id.clone())
8761 .or_default() += payment.amount;
8762 }
8763 }
8764 totals
8765 };
8766 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8767 .master_data
8768 .vendors
8769 .iter()
8770 .map(|v| {
8771 let spend = vendor_payment_totals
8772 .get(&v.vendor_id)
8773 .copied()
8774 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8775 datasynth_generators::VendorSpendInput {
8776 vendor_id: v.vendor_id.clone(),
8777 category: format!("{:?}", v.vendor_type).to_lowercase(),
8778 spend,
8779 country: v.country.clone(),
8780 }
8781 })
8782 .collect();
8783 if !vendor_spend.is_empty() {
8784 emissions.extend(emission_gen.generate_scope3_purchased_goods(
8785 entity_id,
8786 &vendor_spend,
8787 start_date,
8788 end_date,
8789 ));
8790 }
8791
8792 let headcount = self.master_data.employees.len() as u32;
8794 if headcount > 0 {
8795 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8796 emissions.extend(emission_gen.generate_scope3_business_travel(
8797 entity_id,
8798 travel_spend,
8799 start_date,
8800 ));
8801 emissions
8802 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8803 }
8804
8805 snapshot.emission_count = emissions.len();
8806 snapshot.emissions = emissions;
8807 snapshot.energy = energy_records;
8808
8809 let mut workforce_gen =
8811 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8812 let total_headcount = headcount.max(100);
8813 snapshot.diversity =
8814 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8815 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8816
8817 if !self.master_data.employees.is_empty() {
8819 let hr_diversity = workforce_gen.generate_diversity_from_employees(
8820 entity_id,
8821 &self.master_data.employees,
8822 end_date,
8823 );
8824 if !hr_diversity.is_empty() {
8825 info!(
8826 "ESG: {} diversity metrics derived from {} actual employees",
8827 hr_diversity.len(),
8828 self.master_data.employees.len(),
8829 );
8830 snapshot.diversity.extend(hr_diversity);
8831 }
8832 }
8833
8834 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8835 entity_id,
8836 facility_count,
8837 start_date,
8838 end_date,
8839 );
8840
8841 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
8844 entity_id,
8845 &snapshot.safety_incidents,
8846 total_hours,
8847 start_date,
8848 );
8849 snapshot.safety_metrics = vec![safety_metric];
8850
8851 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8853 seed + 85,
8854 esg_cfg.governance.board_size,
8855 esg_cfg.governance.independence_target,
8856 );
8857 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8858
8859 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8861 esg_cfg.supply_chain_esg.clone(),
8862 seed + 86,
8863 );
8864 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8865 .master_data
8866 .vendors
8867 .iter()
8868 .map(|v| datasynth_generators::VendorInput {
8869 vendor_id: v.vendor_id.clone(),
8870 country: v.country.clone(),
8871 industry: format!("{:?}", v.vendor_type).to_lowercase(),
8872 quality_score: None,
8873 })
8874 .collect();
8875 snapshot.supplier_assessments =
8876 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8877
8878 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8880 seed + 87,
8881 esg_cfg.reporting.clone(),
8882 esg_cfg.climate_scenarios.clone(),
8883 );
8884 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8885 snapshot.disclosures = disclosure_gen.generate_disclosures(
8886 entity_id,
8887 &snapshot.materiality,
8888 start_date,
8889 end_date,
8890 );
8891 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8892 snapshot.disclosure_count = snapshot.disclosures.len();
8893
8894 if esg_cfg.anomaly_rate > 0.0 {
8896 let mut anomaly_injector =
8897 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8898 let mut labels = Vec::new();
8899 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8900 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8901 labels.extend(
8902 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8903 );
8904 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8905 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8906 snapshot.anomaly_labels = labels;
8907 }
8908
8909 stats.esg_emission_count = snapshot.emission_count;
8910 stats.esg_disclosure_count = snapshot.disclosure_count;
8911
8912 info!(
8913 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8914 snapshot.emission_count,
8915 snapshot.disclosure_count,
8916 snapshot.supplier_assessments.len()
8917 );
8918 self.check_resources_with_log("post-esg")?;
8919
8920 Ok(snapshot)
8921 }
8922
8923 fn phase_treasury_data(
8925 &mut self,
8926 document_flows: &DocumentFlowSnapshot,
8927 subledger: &SubledgerSnapshot,
8928 intercompany: &IntercompanySnapshot,
8929 stats: &mut EnhancedGenerationStatistics,
8930 ) -> SynthResult<TreasurySnapshot> {
8931 if !self.phase_config.generate_treasury {
8932 debug!("Phase 22: Skipped (treasury generation disabled)");
8933 return Ok(TreasurySnapshot::default());
8934 }
8935 let degradation = self.check_resources()?;
8936 if degradation >= DegradationLevel::Reduced {
8937 debug!(
8938 "Phase skipped due to resource pressure (degradation: {:?})",
8939 degradation
8940 );
8941 return Ok(TreasurySnapshot::default());
8942 }
8943 info!("Phase 22: Generating Treasury Data");
8944
8945 let seed = self.seed;
8946 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8947 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8948 let currency = self
8949 .config
8950 .companies
8951 .first()
8952 .map(|c| c.currency.as_str())
8953 .unwrap_or("USD");
8954 let entity_id = self
8955 .config
8956 .companies
8957 .first()
8958 .map(|c| c.code.as_str())
8959 .unwrap_or("1000");
8960
8961 let mut snapshot = TreasurySnapshot::default();
8962
8963 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8965 self.config.treasury.debt.clone(),
8966 seed + 90,
8967 );
8968 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8969
8970 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8972 self.config.treasury.hedging.clone(),
8973 seed + 91,
8974 );
8975 for debt in &snapshot.debt_instruments {
8976 if debt.rate_type == InterestRateType::Variable {
8977 let swap = hedge_gen.generate_ir_swap(
8978 currency,
8979 debt.principal,
8980 debt.origination_date,
8981 debt.maturity_date,
8982 );
8983 snapshot.hedging_instruments.push(swap);
8984 }
8985 }
8986
8987 {
8990 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8991 for payment in &document_flows.payments {
8992 if payment.currency != currency {
8993 let entry = fx_map
8994 .entry(payment.currency.clone())
8995 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8996 entry.0 += payment.amount;
8997 if payment.header.document_date > entry.1 {
8999 entry.1 = payment.header.document_date;
9000 }
9001 }
9002 }
9003 if !fx_map.is_empty() {
9004 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9005 .into_iter()
9006 .map(|(foreign_ccy, (net_amount, settlement_date))| {
9007 datasynth_generators::treasury::FxExposure {
9008 currency_pair: format!("{foreign_ccy}/{currency}"),
9009 foreign_currency: foreign_ccy,
9010 net_amount,
9011 settlement_date,
9012 description: "AP payment FX exposure".to_string(),
9013 }
9014 })
9015 .collect();
9016 let (fx_instruments, fx_relationships) =
9017 hedge_gen.generate(start_date, &fx_exposures);
9018 snapshot.hedging_instruments.extend(fx_instruments);
9019 snapshot.hedge_relationships.extend(fx_relationships);
9020 }
9021 }
9022
9023 if self.config.treasury.anomaly_rate > 0.0 {
9025 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9026 seed + 92,
9027 self.config.treasury.anomaly_rate,
9028 );
9029 let mut labels = Vec::new();
9030 labels.extend(
9031 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9032 );
9033 snapshot.treasury_anomaly_labels = labels;
9034 }
9035
9036 if self.config.treasury.cash_positioning.enabled {
9038 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9039
9040 for payment in &document_flows.payments {
9042 cash_flows.push(datasynth_generators::treasury::CashFlow {
9043 date: payment.header.document_date,
9044 account_id: format!("{entity_id}-MAIN"),
9045 amount: payment.amount,
9046 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9047 });
9048 }
9049
9050 for chain in &document_flows.o2c_chains {
9052 if let Some(ref receipt) = chain.customer_receipt {
9053 cash_flows.push(datasynth_generators::treasury::CashFlow {
9054 date: receipt.header.document_date,
9055 account_id: format!("{entity_id}-MAIN"),
9056 amount: receipt.amount,
9057 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9058 });
9059 }
9060 for receipt in &chain.remainder_receipts {
9062 cash_flows.push(datasynth_generators::treasury::CashFlow {
9063 date: receipt.header.document_date,
9064 account_id: format!("{entity_id}-MAIN"),
9065 amount: receipt.amount,
9066 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9067 });
9068 }
9069 }
9070
9071 if !cash_flows.is_empty() {
9072 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9073 self.config.treasury.cash_positioning.clone(),
9074 seed + 93,
9075 );
9076 let account_id = format!("{entity_id}-MAIN");
9077 snapshot.cash_positions = cash_gen.generate(
9078 entity_id,
9079 &account_id,
9080 currency,
9081 &cash_flows,
9082 start_date,
9083 start_date + chrono::Months::new(self.config.global.period_months),
9084 rust_decimal::Decimal::new(1_000_000, 0), );
9086 }
9087 }
9088
9089 if self.config.treasury.cash_forecasting.enabled {
9091 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9092
9093 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9095 .ar_invoices
9096 .iter()
9097 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9098 .map(|inv| {
9099 let days_past_due = if inv.due_date < end_date {
9100 (end_date - inv.due_date).num_days().max(0) as u32
9101 } else {
9102 0
9103 };
9104 datasynth_generators::treasury::ArAgingItem {
9105 expected_date: inv.due_date,
9106 amount: inv.amount_remaining,
9107 days_past_due,
9108 document_id: inv.invoice_number.clone(),
9109 }
9110 })
9111 .collect();
9112
9113 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9115 .ap_invoices
9116 .iter()
9117 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9118 .map(|inv| datasynth_generators::treasury::ApAgingItem {
9119 payment_date: inv.due_date,
9120 amount: inv.amount_remaining,
9121 document_id: inv.invoice_number.clone(),
9122 })
9123 .collect();
9124
9125 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9126 self.config.treasury.cash_forecasting.clone(),
9127 seed + 94,
9128 );
9129 let forecast = forecast_gen.generate(
9130 entity_id,
9131 currency,
9132 end_date,
9133 &ar_items,
9134 &ap_items,
9135 &[], );
9137 snapshot.cash_forecasts.push(forecast);
9138 }
9139
9140 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9142 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9143 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9144 self.config.treasury.cash_pooling.clone(),
9145 seed + 95,
9146 );
9147
9148 let account_ids: Vec<String> = snapshot
9150 .cash_positions
9151 .iter()
9152 .map(|cp| cp.bank_account_id.clone())
9153 .collect::<std::collections::HashSet<_>>()
9154 .into_iter()
9155 .collect();
9156
9157 if let Some(pool) =
9158 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9159 {
9160 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9162 for cp in &snapshot.cash_positions {
9163 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9164 }
9165
9166 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9167 latest_balances
9168 .into_iter()
9169 .filter(|(id, _)| pool.participant_accounts.contains(id))
9170 .map(
9171 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9172 account_id: id,
9173 balance,
9174 },
9175 )
9176 .collect();
9177
9178 let sweeps =
9179 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9180 snapshot.cash_pool_sweeps = sweeps;
9181 snapshot.cash_pools.push(pool);
9182 }
9183 }
9184
9185 if self.config.treasury.bank_guarantees.enabled {
9187 let vendor_names: Vec<String> = self
9188 .master_data
9189 .vendors
9190 .iter()
9191 .map(|v| v.name.clone())
9192 .collect();
9193 if !vendor_names.is_empty() {
9194 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9195 self.config.treasury.bank_guarantees.clone(),
9196 seed + 96,
9197 );
9198 snapshot.bank_guarantees =
9199 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9200 }
9201 }
9202
9203 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9205 let entity_ids: Vec<String> = self
9206 .config
9207 .companies
9208 .iter()
9209 .map(|c| c.code.clone())
9210 .collect();
9211 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9212 .matched_pairs
9213 .iter()
9214 .map(|mp| {
9215 (
9216 mp.seller_company.clone(),
9217 mp.buyer_company.clone(),
9218 mp.amount,
9219 )
9220 })
9221 .collect();
9222 if entity_ids.len() >= 2 {
9223 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9224 self.config.treasury.netting.clone(),
9225 seed + 97,
9226 );
9227 snapshot.netting_runs = netting_gen.generate(
9228 &entity_ids,
9229 currency,
9230 start_date,
9231 self.config.global.period_months,
9232 &ic_amounts,
9233 );
9234 }
9235 }
9236
9237 {
9239 use datasynth_generators::treasury::TreasuryAccounting;
9240
9241 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9242 let mut treasury_jes = Vec::new();
9243
9244 if !snapshot.debt_instruments.is_empty() {
9246 let debt_jes =
9247 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9248 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9249 treasury_jes.extend(debt_jes);
9250 }
9251
9252 if !snapshot.hedging_instruments.is_empty() {
9254 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9255 &snapshot.hedging_instruments,
9256 &snapshot.hedge_relationships,
9257 end_date,
9258 entity_id,
9259 );
9260 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9261 treasury_jes.extend(hedge_jes);
9262 }
9263
9264 if !snapshot.cash_pool_sweeps.is_empty() {
9266 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9267 &snapshot.cash_pool_sweeps,
9268 entity_id,
9269 );
9270 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9271 treasury_jes.extend(sweep_jes);
9272 }
9273
9274 if !treasury_jes.is_empty() {
9275 debug!("Total treasury journal entries: {}", treasury_jes.len());
9276 }
9277 snapshot.journal_entries = treasury_jes;
9278 }
9279
9280 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9281 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9282 stats.cash_position_count = snapshot.cash_positions.len();
9283 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9284 stats.cash_pool_count = snapshot.cash_pools.len();
9285
9286 info!(
9287 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9288 snapshot.debt_instruments.len(),
9289 snapshot.hedging_instruments.len(),
9290 snapshot.cash_positions.len(),
9291 snapshot.cash_forecasts.len(),
9292 snapshot.cash_pools.len(),
9293 snapshot.bank_guarantees.len(),
9294 snapshot.netting_runs.len(),
9295 snapshot.journal_entries.len(),
9296 );
9297 self.check_resources_with_log("post-treasury")?;
9298
9299 Ok(snapshot)
9300 }
9301
9302 fn phase_project_accounting(
9304 &mut self,
9305 document_flows: &DocumentFlowSnapshot,
9306 hr: &HrSnapshot,
9307 stats: &mut EnhancedGenerationStatistics,
9308 ) -> SynthResult<ProjectAccountingSnapshot> {
9309 if !self.phase_config.generate_project_accounting {
9310 debug!("Phase 23: Skipped (project accounting disabled)");
9311 return Ok(ProjectAccountingSnapshot::default());
9312 }
9313 let degradation = self.check_resources()?;
9314 if degradation >= DegradationLevel::Reduced {
9315 debug!(
9316 "Phase skipped due to resource pressure (degradation: {:?})",
9317 degradation
9318 );
9319 return Ok(ProjectAccountingSnapshot::default());
9320 }
9321 info!("Phase 23: Generating Project Accounting Data");
9322
9323 let seed = self.seed;
9324 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9325 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9326 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9327 let company_code = self
9328 .config
9329 .companies
9330 .first()
9331 .map(|c| c.code.as_str())
9332 .unwrap_or("1000");
9333
9334 let mut snapshot = ProjectAccountingSnapshot::default();
9335
9336 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9338 self.config.project_accounting.clone(),
9339 seed + 95,
9340 );
9341 let pool = project_gen.generate(company_code, start_date, end_date);
9342 snapshot.projects = pool.projects.clone();
9343
9344 {
9346 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9347 Vec::new();
9348
9349 for te in &hr.time_entries {
9351 let total_hours = te.hours_regular + te.hours_overtime;
9352 if total_hours > 0.0 {
9353 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9354 id: te.entry_id.clone(),
9355 entity_id: company_code.to_string(),
9356 date: te.date,
9357 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9358 .unwrap_or(rust_decimal::Decimal::ZERO),
9359 source_type: CostSourceType::TimeEntry,
9360 hours: Some(
9361 rust_decimal::Decimal::from_f64_retain(total_hours)
9362 .unwrap_or(rust_decimal::Decimal::ZERO),
9363 ),
9364 });
9365 }
9366 }
9367
9368 for er in &hr.expense_reports {
9370 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9371 id: er.report_id.clone(),
9372 entity_id: company_code.to_string(),
9373 date: er.submission_date,
9374 amount: er.total_amount,
9375 source_type: CostSourceType::ExpenseReport,
9376 hours: None,
9377 });
9378 }
9379
9380 for po in &document_flows.purchase_orders {
9382 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9383 id: po.header.document_id.clone(),
9384 entity_id: company_code.to_string(),
9385 date: po.header.document_date,
9386 amount: po.total_net_amount,
9387 source_type: CostSourceType::PurchaseOrder,
9388 hours: None,
9389 });
9390 }
9391
9392 for vi in &document_flows.vendor_invoices {
9394 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9395 id: vi.header.document_id.clone(),
9396 entity_id: company_code.to_string(),
9397 date: vi.header.document_date,
9398 amount: vi.payable_amount,
9399 source_type: CostSourceType::VendorInvoice,
9400 hours: None,
9401 });
9402 }
9403
9404 if !source_docs.is_empty() && !pool.projects.is_empty() {
9405 let mut cost_gen =
9406 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9407 self.config.project_accounting.cost_allocation.clone(),
9408 seed + 99,
9409 );
9410 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9411 }
9412 }
9413
9414 if self.config.project_accounting.change_orders.enabled {
9416 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9417 self.config.project_accounting.change_orders.clone(),
9418 seed + 96,
9419 );
9420 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9421 }
9422
9423 if self.config.project_accounting.milestones.enabled {
9425 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9426 self.config.project_accounting.milestones.clone(),
9427 seed + 97,
9428 );
9429 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9430 }
9431
9432 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9434 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9435 self.config.project_accounting.earned_value.clone(),
9436 seed + 98,
9437 );
9438 snapshot.earned_value_metrics =
9439 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9440 }
9441
9442 if self.config.project_accounting.revenue_recognition.enabled
9444 && !snapshot.projects.is_empty()
9445 && !snapshot.cost_lines.is_empty()
9446 {
9447 use datasynth_generators::project_accounting::RevenueGenerator;
9448 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9449 let avg_contract_value =
9450 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9451 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9452
9453 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9456 snapshot
9457 .projects
9458 .iter()
9459 .filter(|p| {
9460 matches!(
9461 p.project_type,
9462 datasynth_core::models::ProjectType::Customer
9463 )
9464 })
9465 .map(|p| {
9466 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9467 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9468 } else {
9470 avg_contract_value
9471 };
9472 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9474 })
9475 .collect();
9476
9477 if !contract_values.is_empty() {
9478 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9479 snapshot.revenue_records = rev_gen.generate(
9480 &snapshot.projects,
9481 &snapshot.cost_lines,
9482 &contract_values,
9483 start_date,
9484 end_date,
9485 );
9486 debug!(
9487 "Generated {} revenue recognition records for {} customer projects",
9488 snapshot.revenue_records.len(),
9489 contract_values.len()
9490 );
9491 }
9492 }
9493
9494 stats.project_count = snapshot.projects.len();
9495 stats.project_change_order_count = snapshot.change_orders.len();
9496 stats.project_cost_line_count = snapshot.cost_lines.len();
9497
9498 info!(
9499 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9500 snapshot.projects.len(),
9501 snapshot.change_orders.len(),
9502 snapshot.milestones.len(),
9503 snapshot.earned_value_metrics.len()
9504 );
9505 self.check_resources_with_log("post-project-accounting")?;
9506
9507 Ok(snapshot)
9508 }
9509
9510 fn phase_evolution_events(
9512 &mut self,
9513 stats: &mut EnhancedGenerationStatistics,
9514 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9515 if !self.phase_config.generate_evolution_events {
9516 debug!("Phase 24: Skipped (evolution events disabled)");
9517 return Ok((Vec::new(), Vec::new()));
9518 }
9519 info!("Phase 24: Generating Process Evolution + Organizational Events");
9520
9521 let seed = self.seed;
9522 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9523 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9524 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9525
9526 let mut proc_gen =
9528 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9529 seed + 100,
9530 );
9531 let process_events = proc_gen.generate_events(start_date, end_date);
9532
9533 let company_codes: Vec<String> = self
9535 .config
9536 .companies
9537 .iter()
9538 .map(|c| c.code.clone())
9539 .collect();
9540 let mut org_gen =
9541 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9542 seed + 101,
9543 );
9544 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9545
9546 stats.process_evolution_event_count = process_events.len();
9547 stats.organizational_event_count = org_events.len();
9548
9549 info!(
9550 "Evolution events generated: {} process evolution, {} organizational",
9551 process_events.len(),
9552 org_events.len()
9553 );
9554 self.check_resources_with_log("post-evolution-events")?;
9555
9556 Ok((process_events, org_events))
9557 }
9558
9559 fn phase_disruption_events(
9562 &self,
9563 stats: &mut EnhancedGenerationStatistics,
9564 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9565 if !self.config.organizational_events.enabled {
9566 debug!("Phase 24b: Skipped (organizational events disabled)");
9567 return Ok(Vec::new());
9568 }
9569 info!("Phase 24b: Generating Disruption Events");
9570
9571 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9572 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9573 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9574
9575 let company_codes: Vec<String> = self
9576 .config
9577 .companies
9578 .iter()
9579 .map(|c| c.code.clone())
9580 .collect();
9581
9582 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9583 let events = gen.generate(start_date, end_date, &company_codes);
9584
9585 stats.disruption_event_count = events.len();
9586 info!("Disruption events generated: {} events", events.len());
9587 self.check_resources_with_log("post-disruption-events")?;
9588
9589 Ok(events)
9590 }
9591
9592 fn phase_counterfactuals(
9599 &self,
9600 journal_entries: &[JournalEntry],
9601 stats: &mut EnhancedGenerationStatistics,
9602 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9603 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9604 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9605 return Ok(Vec::new());
9606 }
9607 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9608
9609 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9610
9611 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9612
9613 let specs = [
9615 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9616 CounterfactualSpec::ShiftDate { days: -14 },
9617 CounterfactualSpec::SelfApprove,
9618 CounterfactualSpec::SplitTransaction { split_count: 3 },
9619 ];
9620
9621 let pairs: Vec<_> = journal_entries
9622 .iter()
9623 .enumerate()
9624 .map(|(i, je)| {
9625 let spec = &specs[i % specs.len()];
9626 gen.generate(je, spec)
9627 })
9628 .collect();
9629
9630 stats.counterfactual_pair_count = pairs.len();
9631 info!(
9632 "Counterfactual pairs generated: {} pairs from {} journal entries",
9633 pairs.len(),
9634 journal_entries.len()
9635 );
9636 self.check_resources_with_log("post-counterfactuals")?;
9637
9638 Ok(pairs)
9639 }
9640
9641 fn phase_red_flags(
9648 &self,
9649 anomaly_labels: &AnomalyLabels,
9650 document_flows: &DocumentFlowSnapshot,
9651 stats: &mut EnhancedGenerationStatistics,
9652 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9653 if !self.config.fraud.enabled {
9654 debug!("Phase 26: Skipped (fraud generation disabled)");
9655 return Ok(Vec::new());
9656 }
9657 info!("Phase 26: Generating Fraud Red-Flag Indicators");
9658
9659 use datasynth_generators::fraud::RedFlagGenerator;
9660
9661 let generator = RedFlagGenerator::new();
9662 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9663
9664 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9666 .labels
9667 .iter()
9668 .filter(|label| label.anomaly_type.is_intentional())
9669 .map(|label| label.document_id.as_str())
9670 .collect();
9671
9672 let mut flags = Vec::new();
9673
9674 for chain in &document_flows.p2p_chains {
9676 let doc_id = &chain.purchase_order.header.document_id;
9677 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9678 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9679 }
9680
9681 for chain in &document_flows.o2c_chains {
9683 let doc_id = &chain.sales_order.header.document_id;
9684 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9685 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9686 }
9687
9688 stats.red_flag_count = flags.len();
9689 info!(
9690 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9691 flags.len(),
9692 document_flows.p2p_chains.len(),
9693 document_flows.o2c_chains.len(),
9694 fraud_doc_ids.len()
9695 );
9696 self.check_resources_with_log("post-red-flags")?;
9697
9698 Ok(flags)
9699 }
9700
9701 fn phase_collusion_rings(
9707 &mut self,
9708 stats: &mut EnhancedGenerationStatistics,
9709 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9710 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9711 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9712 return Ok(Vec::new());
9713 }
9714 info!("Phase 26b: Generating Collusion Rings");
9715
9716 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9717 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9718 let months = self.config.global.period_months;
9719
9720 let employee_ids: Vec<String> = self
9721 .master_data
9722 .employees
9723 .iter()
9724 .map(|e| e.employee_id.clone())
9725 .collect();
9726 let vendor_ids: Vec<String> = self
9727 .master_data
9728 .vendors
9729 .iter()
9730 .map(|v| v.vendor_id.clone())
9731 .collect();
9732
9733 let mut generator =
9734 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9735 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9736
9737 stats.collusion_ring_count = rings.len();
9738 info!(
9739 "Collusion rings generated: {} rings, total members: {}",
9740 rings.len(),
9741 rings
9742 .iter()
9743 .map(datasynth_generators::fraud::CollusionRing::size)
9744 .sum::<usize>()
9745 );
9746 self.check_resources_with_log("post-collusion-rings")?;
9747
9748 Ok(rings)
9749 }
9750
9751 fn phase_temporal_attributes(
9756 &mut self,
9757 stats: &mut EnhancedGenerationStatistics,
9758 ) -> SynthResult<
9759 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9760 > {
9761 if !self.config.temporal_attributes.enabled {
9762 debug!("Phase 27: Skipped (temporal attributes disabled)");
9763 return Ok(Vec::new());
9764 }
9765 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9766
9767 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9768 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9769
9770 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9774 || self.config.temporal_attributes.enabled;
9775 let temporal_config = {
9776 let ta = &self.config.temporal_attributes;
9777 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9778 .enabled(ta.enabled)
9779 .closed_probability(ta.valid_time.closed_probability)
9780 .avg_validity_days(ta.valid_time.avg_validity_days)
9781 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9782 .with_version_chains(if generate_version_chains {
9783 ta.avg_versions_per_entity
9784 } else {
9785 1.0
9786 })
9787 .build()
9788 };
9789 let temporal_config = if self
9791 .config
9792 .temporal_attributes
9793 .transaction_time
9794 .allow_backdating
9795 {
9796 let mut c = temporal_config;
9797 c.transaction_time.allow_backdating = true;
9798 c.transaction_time.backdating_probability = self
9799 .config
9800 .temporal_attributes
9801 .transaction_time
9802 .backdating_probability;
9803 c.transaction_time.max_backdate_days = self
9804 .config
9805 .temporal_attributes
9806 .transaction_time
9807 .max_backdate_days;
9808 c
9809 } else {
9810 temporal_config
9811 };
9812 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9813 temporal_config,
9814 self.seed + 130,
9815 start_date,
9816 );
9817
9818 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9819 self.seed + 130,
9820 datasynth_core::GeneratorType::Vendor,
9821 );
9822
9823 let chains: Vec<_> = self
9824 .master_data
9825 .vendors
9826 .iter()
9827 .map(|vendor| {
9828 let id = uuid_factory.next();
9829 gen.generate_version_chain(vendor.clone(), id)
9830 })
9831 .collect();
9832
9833 stats.temporal_version_chain_count = chains.len();
9834 info!("Temporal version chains generated: {} chains", chains.len());
9835 self.check_resources_with_log("post-temporal-attributes")?;
9836
9837 Ok(chains)
9838 }
9839
9840 fn phase_entity_relationships(
9850 &self,
9851 journal_entries: &[JournalEntry],
9852 document_flows: &DocumentFlowSnapshot,
9853 stats: &mut EnhancedGenerationStatistics,
9854 ) -> SynthResult<(
9855 Option<datasynth_core::models::EntityGraph>,
9856 Vec<datasynth_core::models::CrossProcessLink>,
9857 )> {
9858 use datasynth_generators::relationships::{
9859 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9860 TransactionSummary,
9861 };
9862
9863 let rs_enabled = self.config.relationship_strength.enabled;
9864 let cpl_enabled = self.config.cross_process_links.enabled
9865 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9866
9867 if !rs_enabled && !cpl_enabled {
9868 debug!(
9869 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9870 );
9871 return Ok((None, Vec::new()));
9872 }
9873
9874 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9875
9876 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9877 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9878
9879 let company_code = self
9880 .config
9881 .companies
9882 .first()
9883 .map(|c| c.code.as_str())
9884 .unwrap_or("1000");
9885
9886 let gen_config = EntityGraphConfig {
9888 enabled: rs_enabled,
9889 cross_process: datasynth_generators::relationships::CrossProcessConfig {
9890 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9891 enable_return_flows: false,
9892 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9893 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9894 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9896 1.0
9897 } else {
9898 0.30
9899 },
9900 ..Default::default()
9901 },
9902 strength_config: datasynth_generators::relationships::StrengthConfig {
9903 transaction_volume_weight: self
9904 .config
9905 .relationship_strength
9906 .calculation
9907 .transaction_volume_weight,
9908 transaction_count_weight: self
9909 .config
9910 .relationship_strength
9911 .calculation
9912 .transaction_count_weight,
9913 duration_weight: self
9914 .config
9915 .relationship_strength
9916 .calculation
9917 .relationship_duration_weight,
9918 recency_weight: self.config.relationship_strength.calculation.recency_weight,
9919 mutual_connections_weight: self
9920 .config
9921 .relationship_strength
9922 .calculation
9923 .mutual_connections_weight,
9924 recency_half_life_days: self
9925 .config
9926 .relationship_strength
9927 .calculation
9928 .recency_half_life_days,
9929 },
9930 ..Default::default()
9931 };
9932
9933 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9934
9935 let entity_graph = if rs_enabled {
9937 let vendor_summaries: Vec<EntitySummary> = self
9939 .master_data
9940 .vendors
9941 .iter()
9942 .map(|v| {
9943 EntitySummary::new(
9944 &v.vendor_id,
9945 &v.name,
9946 datasynth_core::models::GraphEntityType::Vendor,
9947 start_date,
9948 )
9949 })
9950 .collect();
9951
9952 let customer_summaries: Vec<EntitySummary> = self
9953 .master_data
9954 .customers
9955 .iter()
9956 .map(|c| {
9957 EntitySummary::new(
9958 &c.customer_id,
9959 &c.name,
9960 datasynth_core::models::GraphEntityType::Customer,
9961 start_date,
9962 )
9963 })
9964 .collect();
9965
9966 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9971 std::collections::HashMap::new();
9972
9973 for je in journal_entries {
9974 let cc = je.header.company_code.clone();
9975 let posting_date = je.header.posting_date;
9976 for line in &je.lines {
9977 if let Some(ref tp) = line.trading_partner {
9978 let amount = if line.debit_amount > line.credit_amount {
9979 line.debit_amount
9980 } else {
9981 line.credit_amount
9982 };
9983 let entry = txn_summaries
9984 .entry((cc.clone(), tp.clone()))
9985 .or_insert_with(|| TransactionSummary {
9986 total_volume: rust_decimal::Decimal::ZERO,
9987 transaction_count: 0,
9988 first_transaction_date: posting_date,
9989 last_transaction_date: posting_date,
9990 related_entities: std::collections::HashSet::new(),
9991 });
9992 entry.total_volume += amount;
9993 entry.transaction_count += 1;
9994 if posting_date < entry.first_transaction_date {
9995 entry.first_transaction_date = posting_date;
9996 }
9997 if posting_date > entry.last_transaction_date {
9998 entry.last_transaction_date = posting_date;
9999 }
10000 entry.related_entities.insert(cc.clone());
10001 }
10002 }
10003 }
10004
10005 for chain in &document_flows.p2p_chains {
10008 let cc = chain.purchase_order.header.company_code.clone();
10009 let vendor_id = chain.purchase_order.vendor_id.clone();
10010 let po_date = chain.purchase_order.header.document_date;
10011 let amount = chain.purchase_order.total_net_amount;
10012
10013 let entry = txn_summaries
10014 .entry((cc.clone(), vendor_id))
10015 .or_insert_with(|| TransactionSummary {
10016 total_volume: rust_decimal::Decimal::ZERO,
10017 transaction_count: 0,
10018 first_transaction_date: po_date,
10019 last_transaction_date: po_date,
10020 related_entities: std::collections::HashSet::new(),
10021 });
10022 entry.total_volume += amount;
10023 entry.transaction_count += 1;
10024 if po_date < entry.first_transaction_date {
10025 entry.first_transaction_date = po_date;
10026 }
10027 if po_date > entry.last_transaction_date {
10028 entry.last_transaction_date = po_date;
10029 }
10030 entry.related_entities.insert(cc);
10031 }
10032
10033 for chain in &document_flows.o2c_chains {
10035 let cc = chain.sales_order.header.company_code.clone();
10036 let customer_id = chain.sales_order.customer_id.clone();
10037 let so_date = chain.sales_order.header.document_date;
10038 let amount = chain.sales_order.total_net_amount;
10039
10040 let entry = txn_summaries
10041 .entry((cc.clone(), customer_id))
10042 .or_insert_with(|| TransactionSummary {
10043 total_volume: rust_decimal::Decimal::ZERO,
10044 transaction_count: 0,
10045 first_transaction_date: so_date,
10046 last_transaction_date: so_date,
10047 related_entities: std::collections::HashSet::new(),
10048 });
10049 entry.total_volume += amount;
10050 entry.transaction_count += 1;
10051 if so_date < entry.first_transaction_date {
10052 entry.first_transaction_date = so_date;
10053 }
10054 if so_date > entry.last_transaction_date {
10055 entry.last_transaction_date = so_date;
10056 }
10057 entry.related_entities.insert(cc);
10058 }
10059
10060 let as_of_date = journal_entries
10061 .last()
10062 .map(|je| je.header.posting_date)
10063 .unwrap_or(start_date);
10064
10065 let graph = gen.generate_entity_graph(
10066 company_code,
10067 as_of_date,
10068 &vendor_summaries,
10069 &customer_summaries,
10070 &txn_summaries,
10071 );
10072
10073 info!(
10074 "Entity relationship graph: {} nodes, {} edges",
10075 graph.nodes.len(),
10076 graph.edges.len()
10077 );
10078 stats.entity_relationship_node_count = graph.nodes.len();
10079 stats.entity_relationship_edge_count = graph.edges.len();
10080 Some(graph)
10081 } else {
10082 None
10083 };
10084
10085 let cross_process_links = if cpl_enabled {
10087 let gr_refs: Vec<GoodsReceiptRef> = document_flows
10089 .p2p_chains
10090 .iter()
10091 .flat_map(|chain| {
10092 let vendor_id = chain.purchase_order.vendor_id.clone();
10093 let cc = chain.purchase_order.header.company_code.clone();
10094 chain.goods_receipts.iter().flat_map(move |gr| {
10095 gr.items.iter().filter_map({
10096 let doc_id = gr.header.document_id.clone();
10097 let v_id = vendor_id.clone();
10098 let company = cc.clone();
10099 let receipt_date = gr.header.document_date;
10100 move |item| {
10101 item.base
10102 .material_id
10103 .as_ref()
10104 .map(|mat_id| GoodsReceiptRef {
10105 document_id: doc_id.clone(),
10106 material_id: mat_id.clone(),
10107 quantity: item.base.quantity,
10108 receipt_date,
10109 vendor_id: v_id.clone(),
10110 company_code: company.clone(),
10111 })
10112 }
10113 })
10114 })
10115 })
10116 .collect();
10117
10118 let del_refs: Vec<DeliveryRef> = document_flows
10120 .o2c_chains
10121 .iter()
10122 .flat_map(|chain| {
10123 let customer_id = chain.sales_order.customer_id.clone();
10124 let cc = chain.sales_order.header.company_code.clone();
10125 chain.deliveries.iter().flat_map(move |del| {
10126 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10127 del.items.iter().filter_map({
10128 let doc_id = del.header.document_id.clone();
10129 let c_id = customer_id.clone();
10130 let company = cc.clone();
10131 move |item| {
10132 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10133 document_id: doc_id.clone(),
10134 material_id: mat_id.clone(),
10135 quantity: item.base.quantity,
10136 delivery_date,
10137 customer_id: c_id.clone(),
10138 company_code: company.clone(),
10139 })
10140 }
10141 })
10142 })
10143 })
10144 .collect();
10145
10146 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10147 info!("Cross-process links generated: {} links", links.len());
10148 stats.cross_process_link_count = links.len();
10149 links
10150 } else {
10151 Vec::new()
10152 };
10153
10154 self.check_resources_with_log("post-entity-relationships")?;
10155 Ok((entity_graph, cross_process_links))
10156 }
10157
10158 fn phase_industry_data(
10160 &self,
10161 stats: &mut EnhancedGenerationStatistics,
10162 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10163 if !self.config.industry_specific.enabled {
10164 return None;
10165 }
10166 info!("Phase 29: Generating industry-specific data");
10167 let output = datasynth_generators::industry::factory::generate_industry_output(
10168 self.config.global.industry,
10169 );
10170 stats.industry_gl_account_count = output.gl_accounts.len();
10171 info!(
10172 "Industry data generated: {} GL accounts for {:?}",
10173 output.gl_accounts.len(),
10174 self.config.global.industry
10175 );
10176 Some(output)
10177 }
10178
10179 fn phase_opening_balances(
10181 &mut self,
10182 coa: &Arc<ChartOfAccounts>,
10183 stats: &mut EnhancedGenerationStatistics,
10184 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10185 if !self.config.balance.generate_opening_balances {
10186 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10187 return Ok(Vec::new());
10188 }
10189 info!("Phase 3b: Generating Opening Balances");
10190
10191 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10192 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10193 let fiscal_year = start_date.year();
10194
10195 if let Some(ctx) = &self.shard_context {
10206 if !ctx.opening_balances.is_empty() {
10207 debug!(
10208 "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10209 ctx.opening_balances.len()
10210 );
10211 let mut results = Vec::new();
10212 for company in &self.config.companies {
10213 let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10214 .opening_balances
10215 .iter()
10216 .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10217 .collect();
10218 let total_assets = ctx
10219 .opening_balances
10220 .iter()
10221 .filter(|ob| {
10222 matches!(
10223 ob.account_type,
10224 AccountType::Asset | AccountType::ContraAsset
10225 )
10226 })
10227 .map(|ob| ob.net_balance())
10228 .sum::<rust_decimal::Decimal>();
10229 let total_liabilities = ctx
10230 .opening_balances
10231 .iter()
10232 .filter(|ob| {
10233 matches!(
10234 ob.account_type,
10235 AccountType::Liability | AccountType::ContraLiability
10236 )
10237 })
10238 .map(|ob| ob.net_balance())
10239 .sum::<rust_decimal::Decimal>();
10240 let total_equity = ctx
10241 .opening_balances
10242 .iter()
10243 .filter(|ob| {
10244 matches!(
10245 ob.account_type,
10246 AccountType::Equity | AccountType::ContraEquity
10247 )
10248 })
10249 .map(|ob| ob.net_balance())
10250 .sum::<rust_decimal::Decimal>();
10251 let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10252 < rust_decimal::Decimal::ONE;
10253 results.push(GeneratedOpeningBalance {
10254 company_code: company.code.clone(),
10255 as_of_date: start_date,
10256 balances,
10257 total_assets,
10258 total_liabilities,
10259 total_equity,
10260 is_balanced,
10261 calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10262 current_ratio: None,
10263 quick_ratio: None,
10264 debt_to_equity: None,
10265 working_capital: rust_decimal::Decimal::ZERO,
10266 },
10267 });
10268 }
10269 stats.opening_balance_count = results.len();
10270 info!(
10271 "Phase 3b: opening-balance carryover applied ({} companies)",
10272 results.len()
10273 );
10274 self.check_resources_with_log("post-opening-balances")?;
10275 return Ok(results);
10276 }
10277 }
10278
10279 let industry = match self.config.global.industry {
10280 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10281 IndustrySector::Retail => IndustryType::Retail,
10282 IndustrySector::FinancialServices => IndustryType::Financial,
10283 IndustrySector::Healthcare => IndustryType::Healthcare,
10284 IndustrySector::Technology => IndustryType::Technology,
10285 _ => IndustryType::Manufacturing,
10286 };
10287
10288 let config = datasynth_generators::OpeningBalanceConfig {
10289 industry,
10290 ..Default::default()
10291 };
10292 let mut gen =
10293 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10294
10295 let mut results = Vec::new();
10296 for company in &self.config.companies {
10297 let spec = OpeningBalanceSpec::new(
10298 company.code.clone(),
10299 start_date,
10300 fiscal_year,
10301 company.currency.clone(),
10302 rust_decimal::Decimal::new(10_000_000, 0),
10303 industry,
10304 );
10305 let ob = gen.generate(&spec, coa, start_date, &company.code);
10306 results.push(ob);
10307 }
10308
10309 stats.opening_balance_count = results.len();
10310 info!("Opening balances generated: {} companies", results.len());
10311 self.check_resources_with_log("post-opening-balances")?;
10312
10313 Ok(results)
10314 }
10315
10316 fn phase_subledger_reconciliation(
10318 &mut self,
10319 subledger: &SubledgerSnapshot,
10320 entries: &[JournalEntry],
10321 stats: &mut EnhancedGenerationStatistics,
10322 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10323 if !self.config.balance.reconcile_subledgers {
10324 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10325 return Ok(Vec::new());
10326 }
10327 info!("Phase 9b: Reconciling GL to subledger balances");
10328
10329 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10330 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10331 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10332
10333 let tracker_config = BalanceTrackerConfig {
10335 validate_on_each_entry: false,
10336 track_history: false,
10337 fail_on_validation_error: false,
10338 ..Default::default()
10339 };
10340 let recon_currency = self
10341 .config
10342 .companies
10343 .first()
10344 .map(|c| c.currency.clone())
10345 .unwrap_or_else(|| "USD".to_string());
10346 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10347 let validation_errors = tracker.apply_entries(entries);
10348 if !validation_errors.is_empty() {
10349 warn!(
10350 error_count = validation_errors.len(),
10351 "Balance tracker encountered validation errors during subledger reconciliation"
10352 );
10353 for err in &validation_errors {
10354 debug!("Balance validation error: {:?}", err);
10355 }
10356 }
10357
10358 let mut engine = datasynth_generators::ReconciliationEngine::new(
10359 datasynth_generators::ReconciliationConfig::default(),
10360 );
10361
10362 let mut results = Vec::new();
10363 let company_code = self
10364 .config
10365 .companies
10366 .first()
10367 .map(|c| c.code.as_str())
10368 .unwrap_or("1000");
10369
10370 if !subledger.ar_invoices.is_empty() {
10372 let gl_balance = tracker
10373 .get_account_balance(
10374 company_code,
10375 datasynth_core::accounts::control_accounts::AR_CONTROL,
10376 )
10377 .map(|b| b.closing_balance)
10378 .unwrap_or_default();
10379 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10380 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10381 }
10382
10383 if !subledger.ap_invoices.is_empty() {
10385 let gl_balance = tracker
10386 .get_account_balance(
10387 company_code,
10388 datasynth_core::accounts::control_accounts::AP_CONTROL,
10389 )
10390 .map(|b| b.closing_balance)
10391 .unwrap_or_default();
10392 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10393 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10394 }
10395
10396 if !subledger.fa_records.is_empty() {
10398 let gl_asset_balance = tracker
10399 .get_account_balance(
10400 company_code,
10401 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10402 )
10403 .map(|b| b.closing_balance)
10404 .unwrap_or_default();
10405 let gl_accum_depr_balance = tracker
10406 .get_account_balance(
10407 company_code,
10408 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10409 )
10410 .map(|b| b.closing_balance)
10411 .unwrap_or_default();
10412 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10413 subledger.fa_records.iter().collect();
10414 let (asset_recon, depr_recon) = engine.reconcile_fa(
10415 company_code,
10416 end_date,
10417 gl_asset_balance,
10418 gl_accum_depr_balance,
10419 &fa_refs,
10420 );
10421 results.push(asset_recon);
10422 results.push(depr_recon);
10423 }
10424
10425 if !subledger.inventory_positions.is_empty() {
10427 let gl_balance = tracker
10428 .get_account_balance(
10429 company_code,
10430 datasynth_core::accounts::control_accounts::INVENTORY,
10431 )
10432 .map(|b| b.closing_balance)
10433 .unwrap_or_default();
10434 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10435 subledger.inventory_positions.iter().collect();
10436 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10437 }
10438
10439 stats.subledger_reconciliation_count = results.len();
10440 let passed = results.iter().filter(|r| r.is_balanced()).count();
10441 let failed = results.len() - passed;
10442 info!(
10443 "Subledger reconciliation: {} checks, {} passed, {} failed",
10444 results.len(),
10445 passed,
10446 failed
10447 );
10448 self.check_resources_with_log("post-subledger-reconciliation")?;
10449
10450 Ok(results)
10451 }
10452
10453 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10455 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10456
10457 let coa_framework = self.resolve_coa_framework();
10458
10459 let mut gen = ChartOfAccountsGenerator::new(
10460 self.config.chart_of_accounts.complexity,
10461 self.config.global.industry,
10462 self.seed,
10463 )
10464 .with_coa_framework(coa_framework)
10465 .with_expand_industry_subaccounts(
10467 self.config.chart_of_accounts.expand_industry_subaccounts,
10468 );
10469
10470 let mut built = gen.generate();
10471 if self.config.accounting_standards.enabled {
10475 use datasynth_config::schema::AccountingFrameworkConfig;
10476 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10477 match f {
10478 AccountingFrameworkConfig::UsGaap => "us_gaap",
10479 AccountingFrameworkConfig::Ifrs => "ifrs",
10480 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10481 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10482 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10483 }
10484 .to_string()
10485 });
10486 }
10487 let coa = Arc::new(built);
10488 self.coa = Some(Arc::clone(&coa));
10489
10490 if let Some(pb) = pb {
10491 pb.finish_with_message("Chart of Accounts complete");
10492 }
10493
10494 Ok(coa)
10495 }
10496
10497 fn generate_master_data(&mut self) -> SynthResult<()> {
10499 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10500 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10501 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10502
10503 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
10505
10506 let pack = self.primary_pack().clone();
10508
10509 let vendors_per_company = self.phase_config.vendors_per_company;
10511 let customers_per_company = self.phase_config.customers_per_company;
10512 let materials_per_company = self.phase_config.materials_per_company;
10513 let assets_per_company = self.phase_config.assets_per_company;
10514 let coa_framework = self.resolve_coa_framework();
10515
10516 let per_company_results: Vec<_> = self
10519 .config
10520 .companies
10521 .par_iter()
10522 .enumerate()
10523 .map(|(i, company)| {
10524 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10525 let pack = pack.clone();
10526
10527 let mut vendor_gen = VendorGenerator::new(company_seed);
10529 vendor_gen.set_country_pack(pack.clone());
10530 vendor_gen.set_coa_framework(coa_framework);
10531 vendor_gen.set_counter_offset(i * vendors_per_company);
10532 vendor_gen.set_template_provider(self.template_provider.clone());
10535 if self.config.vendor_network.enabled {
10537 let vn = &self.config.vendor_network;
10538 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10539 enabled: true,
10540 depth: vn.depth,
10541 tier1_count: datasynth_generators::TierCountConfig::new(
10542 vn.tier1.min,
10543 vn.tier1.max,
10544 ),
10545 tier2_per_parent: datasynth_generators::TierCountConfig::new(
10546 vn.tier2_per_parent.min,
10547 vn.tier2_per_parent.max,
10548 ),
10549 tier3_per_parent: datasynth_generators::TierCountConfig::new(
10550 vn.tier3_per_parent.min,
10551 vn.tier3_per_parent.max,
10552 ),
10553 cluster_distribution: datasynth_generators::ClusterDistribution {
10554 reliable_strategic: vn.clusters.reliable_strategic,
10555 standard_operational: vn.clusters.standard_operational,
10556 transactional: vn.clusters.transactional,
10557 problematic: vn.clusters.problematic,
10558 },
10559 concentration_limits: datasynth_generators::ConcentrationLimits {
10560 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10561 max_top5: vn.dependencies.top_5_concentration,
10562 },
10563 ..datasynth_generators::VendorNetworkConfig::default()
10564 });
10565 }
10566 let vendor_pool =
10567 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10568
10569 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10571 customer_gen.set_country_pack(pack.clone());
10572 customer_gen.set_coa_framework(coa_framework);
10573 customer_gen.set_counter_offset(i * customers_per_company);
10574 customer_gen.set_template_provider(self.template_provider.clone());
10576 if self.config.customer_segmentation.enabled {
10578 let cs = &self.config.customer_segmentation;
10579 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10580 enabled: true,
10581 segment_distribution: datasynth_generators::SegmentDistribution {
10582 enterprise: cs.value_segments.enterprise.customer_share,
10583 mid_market: cs.value_segments.mid_market.customer_share,
10584 smb: cs.value_segments.smb.customer_share,
10585 consumer: cs.value_segments.consumer.customer_share,
10586 },
10587 referral_config: datasynth_generators::ReferralConfig {
10588 enabled: cs.networks.referrals.enabled,
10589 referral_rate: cs.networks.referrals.referral_rate,
10590 ..Default::default()
10591 },
10592 hierarchy_config: datasynth_generators::HierarchyConfig {
10593 enabled: cs.networks.corporate_hierarchies.enabled,
10594 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10595 ..Default::default()
10596 },
10597 ..Default::default()
10598 };
10599 customer_gen.set_segmentation_config(seg_cfg);
10600 }
10601 let customer_pool = customer_gen.generate_customer_pool(
10602 customers_per_company,
10603 &company.code,
10604 start_date,
10605 );
10606
10607 let mut material_gen = MaterialGenerator::new(company_seed + 200);
10609 material_gen.set_country_pack(pack.clone());
10610 material_gen.set_counter_offset(i * materials_per_company);
10611 material_gen.set_template_provider(self.template_provider.clone());
10613 let material_pool = material_gen.generate_material_pool(
10614 materials_per_company,
10615 &company.code,
10616 start_date,
10617 );
10618
10619 let mut asset_gen = AssetGenerator::new(company_seed + 300);
10621 asset_gen.set_template_provider(self.template_provider.clone());
10623 let asset_pool = asset_gen.generate_asset_pool(
10624 assets_per_company,
10625 &company.code,
10626 (start_date, end_date),
10627 );
10628
10629 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10631 employee_gen.set_country_pack(pack);
10632 employee_gen.set_template_provider(self.template_provider.clone());
10634 let employee_pool =
10635 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10636
10637 let employee_change_history =
10639 employee_gen.generate_all_change_history(&employee_pool, end_date);
10640
10641 let employee_ids: Vec<String> = employee_pool
10643 .employees
10644 .iter()
10645 .map(|e| e.employee_id.clone())
10646 .collect();
10647 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10648 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10649
10650 let mut pc_gen =
10653 datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10654 let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10655
10656 (
10657 vendor_pool.vendors,
10658 customer_pool.customers,
10659 material_pool.materials,
10660 asset_pool.assets,
10661 employee_pool.employees,
10662 employee_change_history,
10663 cost_centers,
10664 profit_centers,
10665 )
10666 })
10667 .collect();
10668
10669 for (
10671 vendors,
10672 customers,
10673 materials,
10674 assets,
10675 employees,
10676 change_history,
10677 cost_centers,
10678 profit_centers,
10679 ) in per_company_results
10680 {
10681 self.master_data.vendors.extend(vendors);
10682 self.master_data.customers.extend(customers);
10683 self.master_data.materials.extend(materials);
10684 self.master_data.assets.extend(assets);
10685 self.master_data.employees.extend(employees);
10686 self.master_data.cost_centers.extend(cost_centers);
10687 self.master_data.profit_centers.extend(profit_centers);
10688 self.master_data
10689 .employee_change_history
10690 .extend(change_history);
10691 }
10692
10693 {
10697 use datasynth_core::models::IndustrySector;
10698 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10699 let industry = match self.config.global.industry {
10700 IndustrySector::Manufacturing => "manufacturing",
10701 IndustrySector::Retail => "retail",
10702 IndustrySector::FinancialServices => "financial_services",
10703 IndustrySector::Technology => "technology",
10704 IndustrySector::Healthcare => "healthcare",
10705 _ => "other",
10706 };
10707 for (i, company) in self.config.companies.iter().enumerate() {
10708 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10709 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10710 let profile = profile_gen.generate(&company.code, industry);
10711 self.master_data.organizational_profiles.push(profile);
10712 }
10713 }
10714
10715 if let Some(pb) = &pb {
10716 pb.inc(total);
10717 }
10718 if let Some(pb) = pb {
10719 pb.finish_with_message("Master data generation complete");
10720 }
10721
10722 Ok(())
10723 }
10724
10725 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10727 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10728 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10729
10730 let months = (self.config.global.period_months as usize).max(1);
10733 let p2p_count = self
10734 .phase_config
10735 .p2p_chains
10736 .min(self.master_data.vendors.len() * 2 * months);
10737 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10738
10739 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10741 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10742 p2p_gen.set_country_pack(self.primary_pack().clone());
10743 if let Some(ctx) = &self.temporal_context {
10747 p2p_gen.set_temporal_context(Arc::clone(ctx));
10748 }
10749
10750 for i in 0..p2p_count {
10751 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10752 let materials: Vec<&Material> = self
10753 .master_data
10754 .materials
10755 .iter()
10756 .skip(i % self.master_data.materials.len().max(1))
10757 .take(2.min(self.master_data.materials.len()))
10758 .collect();
10759
10760 if materials.is_empty() {
10761 continue;
10762 }
10763
10764 let company = &self.config.companies[i % self.config.companies.len()];
10765 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10766 let fiscal_period = po_date.month() as u8;
10767 let created_by = if self.master_data.employees.is_empty() {
10768 "SYSTEM"
10769 } else {
10770 self.master_data.employees[i % self.master_data.employees.len()]
10771 .user_id
10772 .as_str()
10773 };
10774
10775 let chain = p2p_gen.generate_chain(
10776 &company.code,
10777 vendor,
10778 &materials,
10779 po_date,
10780 start_date.year() as u16,
10781 fiscal_period,
10782 created_by,
10783 );
10784
10785 flows.purchase_orders.push(chain.purchase_order.clone());
10787 flows.goods_receipts.extend(chain.goods_receipts.clone());
10788 if let Some(vi) = &chain.vendor_invoice {
10789 flows.vendor_invoices.push(vi.clone());
10790 }
10791 if let Some(payment) = &chain.payment {
10792 flows.payments.push(payment.clone());
10793 }
10794 for remainder in &chain.remainder_payments {
10795 flows.payments.push(remainder.clone());
10796 }
10797 flows.p2p_chains.push(chain);
10798
10799 if let Some(pb) = &pb {
10800 pb.inc(1);
10801 }
10802 }
10803
10804 if let Some(pb) = pb {
10805 pb.finish_with_message("P2P document flows complete");
10806 }
10807
10808 let o2c_count = self
10811 .phase_config
10812 .o2c_chains
10813 .min(self.master_data.customers.len() * 2 * months);
10814 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10815
10816 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10818 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10819 o2c_gen.set_country_pack(self.primary_pack().clone());
10820 if let Some(ctx) = &self.temporal_context {
10822 o2c_gen.set_temporal_context(Arc::clone(ctx));
10823 }
10824
10825 for i in 0..o2c_count {
10826 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10827 let materials: Vec<&Material> = self
10828 .master_data
10829 .materials
10830 .iter()
10831 .skip(i % self.master_data.materials.len().max(1))
10832 .take(2.min(self.master_data.materials.len()))
10833 .collect();
10834
10835 if materials.is_empty() {
10836 continue;
10837 }
10838
10839 let company = &self.config.companies[i % self.config.companies.len()];
10840 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10841 let fiscal_period = so_date.month() as u8;
10842 let created_by = if self.master_data.employees.is_empty() {
10843 "SYSTEM"
10844 } else {
10845 self.master_data.employees[i % self.master_data.employees.len()]
10846 .user_id
10847 .as_str()
10848 };
10849
10850 let chain = o2c_gen.generate_chain(
10851 &company.code,
10852 customer,
10853 &materials,
10854 so_date,
10855 start_date.year() as u16,
10856 fiscal_period,
10857 created_by,
10858 );
10859
10860 flows.sales_orders.push(chain.sales_order.clone());
10862 flows.deliveries.extend(chain.deliveries.clone());
10863 if let Some(ci) = &chain.customer_invoice {
10864 flows.customer_invoices.push(ci.clone());
10865 }
10866 if let Some(receipt) = &chain.customer_receipt {
10867 flows.payments.push(receipt.clone());
10868 }
10869 for receipt in &chain.remainder_receipts {
10871 flows.payments.push(receipt.clone());
10872 }
10873 flows.o2c_chains.push(chain);
10874
10875 if let Some(pb) = &pb {
10876 pb.inc(1);
10877 }
10878 }
10879
10880 if let Some(pb) = pb {
10881 pb.finish_with_message("O2C document flows complete");
10882 }
10883
10884 {
10888 let mut refs = Vec::new();
10889 for doc in &flows.purchase_orders {
10890 refs.extend(doc.header.document_references.iter().cloned());
10891 }
10892 for doc in &flows.goods_receipts {
10893 refs.extend(doc.header.document_references.iter().cloned());
10894 }
10895 for doc in &flows.vendor_invoices {
10896 refs.extend(doc.header.document_references.iter().cloned());
10897 }
10898 for doc in &flows.sales_orders {
10899 refs.extend(doc.header.document_references.iter().cloned());
10900 }
10901 for doc in &flows.deliveries {
10902 refs.extend(doc.header.document_references.iter().cloned());
10903 }
10904 for doc in &flows.customer_invoices {
10905 refs.extend(doc.header.document_references.iter().cloned());
10906 }
10907 for doc in &flows.payments {
10908 refs.extend(doc.header.document_references.iter().cloned());
10909 }
10910 debug!(
10911 "Collected {} document cross-references from document headers",
10912 refs.len()
10913 );
10914 flows.document_references = refs;
10915 }
10916
10917 Ok(())
10918 }
10919
10920 fn generate_journal_entries(
10922 &mut self,
10923 coa: &Arc<ChartOfAccounts>,
10924 ) -> SynthResult<Vec<JournalEntry>> {
10925 use datasynth_core::traits::ParallelGenerator;
10926
10927 let total = self.calculate_total_transactions();
10928 let pb = self.create_progress_bar(total, "Generating Journal Entries");
10929
10930 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10931 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10932 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10933
10934 let company_codes: Vec<String> = self
10935 .config
10936 .companies
10937 .iter()
10938 .map(|c| c.code.clone())
10939 .collect();
10940
10941 let mut generator = JournalEntryGenerator::new_with_params(
10942 self.config.transactions.clone(),
10943 Arc::clone(coa),
10944 company_codes,
10945 start_date,
10946 end_date,
10947 self.seed,
10948 );
10949 let bp = &self.config.business_processes;
10952 generator.set_business_process_weights(
10953 bp.o2c_weight,
10954 bp.p2p_weight,
10955 bp.r2r_weight,
10956 bp.h2r_weight,
10957 bp.a2r_weight,
10958 );
10959 generator
10964 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10965 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10966 let generator = generator;
10967
10968 let je_pack = self.primary_pack();
10972
10973 let cc_pool: Vec<String> = self
10980 .master_data
10981 .cost_centers
10982 .iter()
10983 .map(|c| c.id.clone())
10984 .collect();
10985 let pc_pool: Vec<String> = self
10986 .master_data
10987 .profit_centers
10988 .iter()
10989 .map(|p| p.id.clone())
10990 .collect();
10991
10992 let user_pool_from_employees =
10998 datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
10999
11000 let mut generator = generator
11001 .with_master_data(
11002 &self.master_data.vendors,
11003 &self.master_data.customers,
11004 &self.master_data.materials,
11005 )
11006 .with_cost_center_pool(cc_pool)
11007 .with_profit_center_pool(pc_pool)
11008 .with_country_pack_names(je_pack)
11009 .with_user_pool(user_pool_from_employees)
11010 .with_country_pack_temporal(
11011 self.config.temporal_patterns.clone(),
11012 self.seed + 200,
11013 je_pack,
11014 )
11015 .with_persona_errors(true)
11016 .with_fraud_config(self.config.fraud.clone());
11017
11018 let temporal_enabled = self.config.temporal.enabled;
11023 let regimes_enabled = self.config.distributions.regime_changes.enabled;
11024 if temporal_enabled || regimes_enabled {
11025 let mut drift_config = if temporal_enabled {
11026 self.config.temporal.to_core_config()
11027 } else {
11028 datasynth_core::distributions::DriftConfig::default()
11031 };
11032 if regimes_enabled {
11033 self.config
11034 .distributions
11035 .regime_changes
11036 .apply_to(&mut drift_config, start_date);
11037 }
11038 generator = generator.with_drift_config(drift_config, self.seed + 100);
11039 }
11040
11041 self.check_memory_limit()?;
11043
11044 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11046
11047 let entries = if total >= 10_000 && num_threads > 1 {
11051 let sub_generators = generator.split(num_threads);
11054 let entries_per_thread = total as usize / num_threads;
11055 let remainder = total as usize % num_threads;
11056
11057 let batches: Vec<Vec<JournalEntry>> = sub_generators
11058 .into_par_iter()
11059 .enumerate()
11060 .map(|(i, mut gen)| {
11061 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11062 gen.generate_batch(count)
11063 })
11064 .collect();
11065
11066 let entries = JournalEntryGenerator::merge_results(batches);
11068
11069 if let Some(pb) = &pb {
11070 pb.inc(total);
11071 }
11072 entries
11073 } else {
11074 let mut entries = Vec::with_capacity(total as usize);
11076 for _ in 0..total {
11077 let entry = generator.generate();
11078 entries.push(entry);
11079 if let Some(pb) = &pb {
11080 pb.inc(1);
11081 }
11082 }
11083 entries
11084 };
11085
11086 if let Some(pb) = pb {
11087 pb.finish_with_message("Journal entries complete");
11088 }
11089
11090 Ok(entries)
11091 }
11092
11093 fn generate_jes_from_document_flows(
11098 &mut self,
11099 flows: &DocumentFlowSnapshot,
11100 ) -> SynthResult<Vec<JournalEntry>> {
11101 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11102 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11103
11104 let je_config = match self.resolve_coa_framework() {
11105 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11106 CoAFramework::GermanSkr04 => {
11107 let fa = datasynth_core::FrameworkAccounts::german_gaap();
11108 DocumentFlowJeConfig::from(&fa)
11109 }
11110 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11111 };
11112
11113 let populate_fec = je_config.populate_fec_fields;
11114 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11115
11116 let cc_pool: Vec<String> = self
11122 .master_data
11123 .cost_centers
11124 .iter()
11125 .map(|c| c.id.clone())
11126 .collect();
11127 let pc_pool: Vec<String> = self
11128 .master_data
11129 .profit_centers
11130 .iter()
11131 .map(|p| p.id.clone())
11132 .collect();
11133 if !cc_pool.is_empty() {
11134 generator.set_cost_center_pool(cc_pool);
11135 }
11136 if !pc_pool.is_empty() {
11137 generator.set_profit_center_pool(pc_pool);
11138 }
11139
11140 if populate_fec {
11144 let mut aux_lookup = std::collections::HashMap::new();
11145 for vendor in &self.master_data.vendors {
11146 if let Some(ref aux) = vendor.auxiliary_gl_account {
11147 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11148 }
11149 }
11150 for customer in &self.master_data.customers {
11151 if let Some(ref aux) = customer.auxiliary_gl_account {
11152 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11153 }
11154 }
11155 if !aux_lookup.is_empty() {
11156 generator.set_auxiliary_account_lookup(aux_lookup);
11157 }
11158 }
11159
11160 let mut entries = Vec::new();
11161
11162 for chain in &flows.p2p_chains {
11164 let chain_entries = generator.generate_from_p2p_chain(chain);
11165 entries.extend(chain_entries);
11166 if let Some(pb) = &pb {
11167 pb.inc(1);
11168 }
11169 }
11170
11171 for chain in &flows.o2c_chains {
11173 let chain_entries = generator.generate_from_o2c_chain(chain);
11174 entries.extend(chain_entries);
11175 if let Some(pb) = &pb {
11176 pb.inc(1);
11177 }
11178 }
11179
11180 if let Some(pb) = pb {
11181 pb.finish_with_message(format!(
11182 "Generated {} JEs from document flows",
11183 entries.len()
11184 ));
11185 }
11186
11187 Ok(entries)
11188 }
11189
11190 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11196 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11197
11198 let mut jes = Vec::with_capacity(payroll_runs.len());
11199
11200 for run in payroll_runs {
11201 let mut je = JournalEntry::new_simple(
11202 format!("JE-PAYROLL-{}", run.payroll_id),
11203 run.company_code.clone(),
11204 run.run_date,
11205 format!("Payroll {}", run.payroll_id),
11206 );
11207
11208 je.add_line(JournalEntryLine {
11210 line_number: 1,
11211 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11212 debit_amount: run.total_gross,
11213 reference: Some(run.payroll_id.clone()),
11214 text: Some(format!(
11215 "Payroll {} ({} employees)",
11216 run.payroll_id, run.employee_count
11217 )),
11218 ..Default::default()
11219 });
11220
11221 je.add_line(JournalEntryLine {
11223 line_number: 2,
11224 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11225 credit_amount: run.total_gross,
11226 reference: Some(run.payroll_id.clone()),
11227 ..Default::default()
11228 });
11229
11230 jes.push(je);
11231 }
11232
11233 jes
11234 }
11235
11236 fn link_document_flows_to_subledgers(
11241 &mut self,
11242 flows: &DocumentFlowSnapshot,
11243 ) -> SynthResult<SubledgerSnapshot> {
11244 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11245 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11246
11247 let vendor_names: std::collections::HashMap<String, String> = self
11249 .master_data
11250 .vendors
11251 .iter()
11252 .map(|v| (v.vendor_id.clone(), v.name.clone()))
11253 .collect();
11254 let customer_names: std::collections::HashMap<String, String> = self
11255 .master_data
11256 .customers
11257 .iter()
11258 .map(|c| (c.customer_id.clone(), c.name.clone()))
11259 .collect();
11260
11261 let mut linker = DocumentFlowLinker::new()
11262 .with_vendor_names(vendor_names)
11263 .with_customer_names(customer_names);
11264
11265 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11267 if let Some(pb) = &pb {
11268 pb.inc(flows.vendor_invoices.len() as u64);
11269 }
11270
11271 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11273 if let Some(pb) = &pb {
11274 pb.inc(flows.customer_invoices.len() as u64);
11275 }
11276
11277 if let Some(pb) = pb {
11278 pb.finish_with_message(format!(
11279 "Linked {} AP and {} AR invoices",
11280 ap_invoices.len(),
11281 ar_invoices.len()
11282 ));
11283 }
11284
11285 Ok(SubledgerSnapshot {
11286 ap_invoices,
11287 ar_invoices,
11288 fa_records: Vec::new(),
11289 inventory_positions: Vec::new(),
11290 inventory_movements: Vec::new(),
11291 ar_aging_reports: Vec::new(),
11293 ap_aging_reports: Vec::new(),
11294 depreciation_runs: Vec::new(),
11296 inventory_valuations: Vec::new(),
11297 dunning_runs: Vec::new(),
11299 dunning_letters: Vec::new(),
11300 })
11301 }
11302
11303 #[allow(clippy::too_many_arguments)]
11308 fn generate_ocpm_events(
11309 &mut self,
11310 flows: &DocumentFlowSnapshot,
11311 sourcing: &SourcingSnapshot,
11312 hr: &HrSnapshot,
11313 manufacturing: &ManufacturingSnapshot,
11314 banking: &BankingSnapshot,
11315 audit: &AuditSnapshot,
11316 financial_reporting: &FinancialReportingSnapshot,
11317 ) -> SynthResult<OcpmSnapshot> {
11318 let total_chains = flows.p2p_chains.len()
11319 + flows.o2c_chains.len()
11320 + sourcing.sourcing_projects.len()
11321 + hr.payroll_runs.len()
11322 + manufacturing.production_orders.len()
11323 + banking.customers.len()
11324 + audit.engagements.len()
11325 + financial_reporting.bank_reconciliations.len();
11326 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11327
11328 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11330 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11331
11332 let ocpm_config = OcpmGeneratorConfig {
11334 generate_p2p: true,
11335 generate_o2c: true,
11336 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11337 generate_h2r: !hr.payroll_runs.is_empty(),
11338 generate_mfg: !manufacturing.production_orders.is_empty(),
11339 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11340 generate_bank: !banking.customers.is_empty(),
11341 generate_audit: !audit.engagements.is_empty(),
11342 happy_path_rate: 0.75,
11343 exception_path_rate: 0.20,
11344 error_path_rate: 0.05,
11345 add_duration_variability: true,
11346 duration_std_dev_factor: 0.3,
11347 };
11348 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11349 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11350
11351 let available_users: Vec<String> = self
11353 .master_data
11354 .employees
11355 .iter()
11356 .take(20)
11357 .map(|e| e.user_id.clone())
11358 .collect();
11359
11360 let fallback_date =
11362 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11363 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11364 .unwrap_or(fallback_date);
11365 let base_midnight = base_date
11366 .and_hms_opt(0, 0, 0)
11367 .expect("midnight is always valid");
11368 let base_datetime =
11369 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11370
11371 let add_result = |event_log: &mut OcpmEventLog,
11373 result: datasynth_ocpm::CaseGenerationResult| {
11374 for event in result.events {
11375 event_log.add_event(event);
11376 }
11377 for object in result.objects {
11378 event_log.add_object(object);
11379 }
11380 for relationship in result.relationships {
11381 event_log.add_relationship(relationship);
11382 }
11383 for corr in result.correlation_events {
11384 event_log.add_correlation_event(corr);
11385 }
11386 event_log.add_case(result.case_trace);
11387 };
11388
11389 for chain in &flows.p2p_chains {
11391 let po = &chain.purchase_order;
11392 let documents = P2pDocuments::new(
11393 &po.header.document_id,
11394 &po.vendor_id,
11395 &po.header.company_code,
11396 po.total_net_amount,
11397 &po.header.currency,
11398 &ocpm_uuid_factory,
11399 )
11400 .with_goods_receipt(
11401 chain
11402 .goods_receipts
11403 .first()
11404 .map(|gr| gr.header.document_id.as_str())
11405 .unwrap_or(""),
11406 &ocpm_uuid_factory,
11407 )
11408 .with_invoice(
11409 chain
11410 .vendor_invoice
11411 .as_ref()
11412 .map(|vi| vi.header.document_id.as_str())
11413 .unwrap_or(""),
11414 &ocpm_uuid_factory,
11415 )
11416 .with_payment(
11417 chain
11418 .payment
11419 .as_ref()
11420 .map(|p| p.header.document_id.as_str())
11421 .unwrap_or(""),
11422 &ocpm_uuid_factory,
11423 );
11424
11425 let start_time =
11426 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11427 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11428 add_result(&mut event_log, result);
11429
11430 if let Some(pb) = &pb {
11431 pb.inc(1);
11432 }
11433 }
11434
11435 for chain in &flows.o2c_chains {
11437 let so = &chain.sales_order;
11438 let documents = O2cDocuments::new(
11439 &so.header.document_id,
11440 &so.customer_id,
11441 &so.header.company_code,
11442 so.total_net_amount,
11443 &so.header.currency,
11444 &ocpm_uuid_factory,
11445 )
11446 .with_delivery(
11447 chain
11448 .deliveries
11449 .first()
11450 .map(|d| d.header.document_id.as_str())
11451 .unwrap_or(""),
11452 &ocpm_uuid_factory,
11453 )
11454 .with_invoice(
11455 chain
11456 .customer_invoice
11457 .as_ref()
11458 .map(|ci| ci.header.document_id.as_str())
11459 .unwrap_or(""),
11460 &ocpm_uuid_factory,
11461 )
11462 .with_receipt(
11463 chain
11464 .customer_receipt
11465 .as_ref()
11466 .map(|r| r.header.document_id.as_str())
11467 .unwrap_or(""),
11468 &ocpm_uuid_factory,
11469 );
11470
11471 let start_time =
11472 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11473 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11474 add_result(&mut event_log, result);
11475
11476 if let Some(pb) = &pb {
11477 pb.inc(1);
11478 }
11479 }
11480
11481 for project in &sourcing.sourcing_projects {
11483 let vendor_id = sourcing
11485 .contracts
11486 .iter()
11487 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11488 .map(|c| c.vendor_id.clone())
11489 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11490 .or_else(|| {
11491 self.master_data
11492 .vendors
11493 .first()
11494 .map(|v| v.vendor_id.clone())
11495 })
11496 .unwrap_or_else(|| "V000".to_string());
11497 let mut docs = S2cDocuments::new(
11498 &project.project_id,
11499 &vendor_id,
11500 &project.company_code,
11501 project.estimated_annual_spend,
11502 &ocpm_uuid_factory,
11503 );
11504 if let Some(rfx) = sourcing
11506 .rfx_events
11507 .iter()
11508 .find(|r| r.sourcing_project_id == project.project_id)
11509 {
11510 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11511 if let Some(bid) = sourcing.bids.iter().find(|b| {
11513 b.rfx_id == rfx.rfx_id
11514 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11515 }) {
11516 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11517 }
11518 }
11519 if let Some(contract) = sourcing
11521 .contracts
11522 .iter()
11523 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11524 {
11525 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11526 }
11527 let start_time = base_datetime - chrono::Duration::days(90);
11528 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11529 add_result(&mut event_log, result);
11530
11531 if let Some(pb) = &pb {
11532 pb.inc(1);
11533 }
11534 }
11535
11536 for run in &hr.payroll_runs {
11538 let employee_id = hr
11540 .payroll_line_items
11541 .iter()
11542 .find(|li| li.payroll_id == run.payroll_id)
11543 .map(|li| li.employee_id.as_str())
11544 .unwrap_or("EMP000");
11545 let docs = H2rDocuments::new(
11546 &run.payroll_id,
11547 employee_id,
11548 &run.company_code,
11549 run.total_gross,
11550 &ocpm_uuid_factory,
11551 )
11552 .with_time_entries(
11553 hr.time_entries
11554 .iter()
11555 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11556 .take(5)
11557 .map(|t| t.entry_id.as_str())
11558 .collect(),
11559 );
11560 let start_time = base_datetime - chrono::Duration::days(30);
11561 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11562 add_result(&mut event_log, result);
11563
11564 if let Some(pb) = &pb {
11565 pb.inc(1);
11566 }
11567 }
11568
11569 for order in &manufacturing.production_orders {
11571 let mut docs = MfgDocuments::new(
11572 &order.order_id,
11573 &order.material_id,
11574 &order.company_code,
11575 order.planned_quantity,
11576 &ocpm_uuid_factory,
11577 )
11578 .with_operations(
11579 order
11580 .operations
11581 .iter()
11582 .map(|o| format!("OP-{:04}", o.operation_number))
11583 .collect::<Vec<_>>()
11584 .iter()
11585 .map(std::string::String::as_str)
11586 .collect(),
11587 );
11588 if let Some(insp) = manufacturing
11590 .quality_inspections
11591 .iter()
11592 .find(|i| i.reference_id == order.order_id)
11593 {
11594 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11595 }
11596 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11598 cc.items
11599 .iter()
11600 .any(|item| item.material_id == order.material_id)
11601 }) {
11602 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11603 }
11604 let start_time = base_datetime - chrono::Duration::days(60);
11605 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11606 add_result(&mut event_log, result);
11607
11608 if let Some(pb) = &pb {
11609 pb.inc(1);
11610 }
11611 }
11612
11613 for customer in &banking.customers {
11615 let customer_id_str = customer.customer_id.to_string();
11616 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11617 if let Some(account) = banking
11619 .accounts
11620 .iter()
11621 .find(|a| a.primary_owner_id == customer.customer_id)
11622 {
11623 let account_id_str = account.account_id.to_string();
11624 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11625 let txn_strs: Vec<String> = banking
11627 .transactions
11628 .iter()
11629 .filter(|t| t.account_id == account.account_id)
11630 .take(10)
11631 .map(|t| t.transaction_id.to_string())
11632 .collect();
11633 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11634 let txn_amounts: Vec<rust_decimal::Decimal> = banking
11635 .transactions
11636 .iter()
11637 .filter(|t| t.account_id == account.account_id)
11638 .take(10)
11639 .map(|t| t.amount)
11640 .collect();
11641 if !txn_ids.is_empty() {
11642 docs = docs.with_transactions(txn_ids, txn_amounts);
11643 }
11644 }
11645 let start_time = base_datetime - chrono::Duration::days(180);
11646 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11647 add_result(&mut event_log, result);
11648
11649 if let Some(pb) = &pb {
11650 pb.inc(1);
11651 }
11652 }
11653
11654 for engagement in &audit.engagements {
11656 let engagement_id_str = engagement.engagement_id.to_string();
11657 let docs = AuditDocuments::new(
11658 &engagement_id_str,
11659 &engagement.client_entity_id,
11660 &ocpm_uuid_factory,
11661 )
11662 .with_workpapers(
11663 audit
11664 .workpapers
11665 .iter()
11666 .filter(|w| w.engagement_id == engagement.engagement_id)
11667 .take(10)
11668 .map(|w| w.workpaper_id.to_string())
11669 .collect::<Vec<_>>()
11670 .iter()
11671 .map(std::string::String::as_str)
11672 .collect(),
11673 )
11674 .with_evidence(
11675 audit
11676 .evidence
11677 .iter()
11678 .filter(|e| e.engagement_id == engagement.engagement_id)
11679 .take(10)
11680 .map(|e| e.evidence_id.to_string())
11681 .collect::<Vec<_>>()
11682 .iter()
11683 .map(std::string::String::as_str)
11684 .collect(),
11685 )
11686 .with_risks(
11687 audit
11688 .risk_assessments
11689 .iter()
11690 .filter(|r| r.engagement_id == engagement.engagement_id)
11691 .take(5)
11692 .map(|r| r.risk_id.to_string())
11693 .collect::<Vec<_>>()
11694 .iter()
11695 .map(std::string::String::as_str)
11696 .collect(),
11697 )
11698 .with_findings(
11699 audit
11700 .findings
11701 .iter()
11702 .filter(|f| f.engagement_id == engagement.engagement_id)
11703 .take(5)
11704 .map(|f| f.finding_id.to_string())
11705 .collect::<Vec<_>>()
11706 .iter()
11707 .map(std::string::String::as_str)
11708 .collect(),
11709 )
11710 .with_judgments(
11711 audit
11712 .judgments
11713 .iter()
11714 .filter(|j| j.engagement_id == engagement.engagement_id)
11715 .take(5)
11716 .map(|j| j.judgment_id.to_string())
11717 .collect::<Vec<_>>()
11718 .iter()
11719 .map(std::string::String::as_str)
11720 .collect(),
11721 );
11722 let start_time = base_datetime - chrono::Duration::days(120);
11723 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11724 add_result(&mut event_log, result);
11725
11726 if let Some(pb) = &pb {
11727 pb.inc(1);
11728 }
11729 }
11730
11731 for recon in &financial_reporting.bank_reconciliations {
11733 let docs = BankReconDocuments::new(
11734 &recon.reconciliation_id,
11735 &recon.bank_account_id,
11736 &recon.company_code,
11737 recon.bank_ending_balance,
11738 &ocpm_uuid_factory,
11739 )
11740 .with_statement_lines(
11741 recon
11742 .statement_lines
11743 .iter()
11744 .take(20)
11745 .map(|l| l.line_id.as_str())
11746 .collect(),
11747 )
11748 .with_reconciling_items(
11749 recon
11750 .reconciling_items
11751 .iter()
11752 .take(10)
11753 .map(|i| i.item_id.as_str())
11754 .collect(),
11755 );
11756 let start_time = base_datetime - chrono::Duration::days(30);
11757 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11758 add_result(&mut event_log, result);
11759
11760 if let Some(pb) = &pb {
11761 pb.inc(1);
11762 }
11763 }
11764
11765 event_log.compute_variants();
11767
11768 let summary = event_log.summary();
11769
11770 if let Some(pb) = pb {
11771 pb.finish_with_message(format!(
11772 "Generated {} OCPM events, {} objects",
11773 summary.event_count, summary.object_count
11774 ));
11775 }
11776
11777 Ok(OcpmSnapshot {
11778 event_count: summary.event_count,
11779 object_count: summary.object_count,
11780 case_count: summary.case_count,
11781 event_log: Some(event_log),
11782 })
11783 }
11784
11785 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11787 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11788
11789 let total_rate = if self.config.anomaly_injection.enabled {
11792 self.config.anomaly_injection.rates.total_rate
11793 } else if self.config.fraud.enabled {
11794 self.config.fraud.fraud_rate
11795 } else {
11796 0.02
11797 };
11798
11799 let fraud_rate = if self.config.anomaly_injection.enabled {
11800 self.config.anomaly_injection.rates.fraud_rate
11801 } else {
11802 AnomalyRateConfig::default().fraud_rate
11803 };
11804
11805 let error_rate = if self.config.anomaly_injection.enabled {
11806 self.config.anomaly_injection.rates.error_rate
11807 } else {
11808 AnomalyRateConfig::default().error_rate
11809 };
11810
11811 let process_issue_rate = if self.config.anomaly_injection.enabled {
11812 self.config.anomaly_injection.rates.process_rate
11813 } else {
11814 AnomalyRateConfig::default().process_issue_rate
11815 };
11816
11817 let anomaly_config = AnomalyInjectorConfig {
11818 rates: AnomalyRateConfig {
11819 total_rate,
11820 fraud_rate,
11821 error_rate,
11822 process_issue_rate,
11823 ..Default::default()
11824 },
11825 seed: self.seed + 5000,
11826 ..Default::default()
11827 };
11828
11829 let mut injector = AnomalyInjector::new(anomaly_config);
11830 let result = injector.process_entries(entries);
11831
11832 if let Some(pb) = &pb {
11833 pb.inc(entries.len() as u64);
11834 pb.finish_with_message("Anomaly injection complete");
11835 }
11836
11837 let mut by_type = HashMap::new();
11838 for label in &result.labels {
11839 *by_type
11840 .entry(format!("{:?}", label.anomaly_type))
11841 .or_insert(0) += 1;
11842 }
11843
11844 Ok(AnomalyLabels {
11845 labels: result.labels,
11846 summary: Some(result.summary),
11847 by_type,
11848 })
11849 }
11850
11851 fn validate_journal_entries(
11860 &mut self,
11861 entries: &[JournalEntry],
11862 ) -> SynthResult<BalanceValidationResult> {
11863 let clean_entries: Vec<&JournalEntry> = entries
11865 .iter()
11866 .filter(|e| {
11867 e.header
11868 .header_text
11869 .as_ref()
11870 .map(|t| !t.contains("[HUMAN_ERROR:"))
11871 .unwrap_or(true)
11872 })
11873 .collect();
11874
11875 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11876
11877 let config = BalanceTrackerConfig {
11879 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
11883 };
11884 let validation_currency = self
11885 .config
11886 .companies
11887 .first()
11888 .map(|c| c.currency.clone())
11889 .unwrap_or_else(|| "USD".to_string());
11890
11891 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11892
11893 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11895 let errors = tracker.apply_entries(&clean_refs);
11896
11897 if let Some(pb) = &pb {
11898 pb.inc(entries.len() as u64);
11899 }
11900
11901 let has_unbalanced = tracker
11904 .get_validation_errors()
11905 .iter()
11906 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11907
11908 let mut all_errors = errors;
11911 all_errors.extend(tracker.get_validation_errors().iter().cloned());
11912 let company_codes: Vec<String> = self
11913 .config
11914 .companies
11915 .iter()
11916 .map(|c| c.code.clone())
11917 .collect();
11918
11919 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11920 .map(|d| d + chrono::Months::new(self.config.global.period_months))
11921 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11922
11923 for company_code in &company_codes {
11924 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11925 all_errors.push(e);
11926 }
11927 }
11928
11929 let stats = tracker.get_statistics();
11931
11932 let is_balanced = all_errors.is_empty();
11934
11935 if let Some(pb) = pb {
11936 let msg = if is_balanced {
11937 "Balance validation passed"
11938 } else {
11939 "Balance validation completed with errors"
11940 };
11941 pb.finish_with_message(msg);
11942 }
11943
11944 Ok(BalanceValidationResult {
11945 validated: true,
11946 is_balanced,
11947 entries_processed: stats.entries_processed,
11948 total_debits: stats.total_debits,
11949 total_credits: stats.total_credits,
11950 accounts_tracked: stats.accounts_tracked,
11951 companies_tracked: stats.companies_tracked,
11952 validation_errors: all_errors,
11953 has_unbalanced_entries: has_unbalanced,
11954 })
11955 }
11956
11957 fn inject_data_quality(
11962 &mut self,
11963 entries: &mut [JournalEntry],
11964 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11965 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11966
11967 let config = if self.config.data_quality.enabled {
11970 let dq = &self.config.data_quality;
11971 let field_rates = dq.missing_values.field_rates.clone();
11975 let mut required_fields: std::collections::HashSet<String> =
11976 dq.missing_values.protected_fields.iter().cloned().collect();
11977 for f in [
11980 "document_id",
11981 "company_code",
11982 "posting_date",
11983 "fiscal_year",
11984 "fiscal_period",
11985 "gl_account",
11986 "line_number",
11987 "transaction_id",
11988 ] {
11989 required_fields.insert(f.to_string());
11990 }
11991 DataQualityConfig {
11992 enable_missing_values: dq.missing_values.enabled,
11993 missing_values: datasynth_generators::MissingValueConfig {
11994 global_rate: dq.effective_missing_rate(),
11995 field_rates,
11996 required_fields,
11997 ..Default::default()
11998 },
11999 enable_format_variations: dq.format_variations.enabled,
12000 format_variations: datasynth_generators::FormatVariationConfig {
12001 date_variation_rate: dq.format_variations.dates.rate,
12002 amount_variation_rate: dq.format_variations.amounts.rate,
12003 identifier_variation_rate: dq.format_variations.identifiers.rate,
12004 ..Default::default()
12005 },
12006 enable_duplicates: dq.duplicates.enabled,
12007 duplicates: datasynth_generators::DuplicateConfig {
12008 duplicate_rate: dq.effective_duplicate_rate(),
12009 ..Default::default()
12010 },
12011 enable_typos: dq.typos.enabled,
12012 typos: datasynth_generators::TypoConfig {
12013 char_error_rate: dq.effective_typo_rate(),
12014 ..Default::default()
12015 },
12016 enable_encoding_issues: dq.encoding_issues.enabled,
12017 encoding_issue_rate: dq.encoding_issues.rate,
12018 seed: self.seed.wrapping_add(77), track_statistics: true,
12020 }
12021 } else {
12022 DataQualityConfig::minimal()
12023 };
12024 let mut injector = DataQualityInjector::new(config);
12025
12026 injector.set_country_pack(self.primary_pack().clone());
12028
12029 let context = HashMap::new();
12031
12032 for entry in entries.iter_mut() {
12033 if let Some(text) = &entry.header.header_text {
12035 let processed = injector.process_text_field(
12036 "header_text",
12037 text,
12038 &entry.header.document_id.to_string(),
12039 &context,
12040 );
12041 match processed {
12042 Some(new_text) if new_text != *text => {
12043 entry.header.header_text = Some(new_text);
12044 }
12045 None => {
12046 entry.header.header_text = None; }
12048 _ => {}
12049 }
12050 }
12051
12052 if let Some(ref_text) = &entry.header.reference {
12054 let processed = injector.process_text_field(
12055 "reference",
12056 ref_text,
12057 &entry.header.document_id.to_string(),
12058 &context,
12059 );
12060 match processed {
12061 Some(new_text) if new_text != *ref_text => {
12062 entry.header.reference = Some(new_text);
12063 }
12064 None => {
12065 entry.header.reference = None;
12066 }
12067 _ => {}
12068 }
12069 }
12070
12071 let user_persona = entry.header.user_persona.clone();
12073 if let Some(processed) = injector.process_text_field(
12074 "user_persona",
12075 &user_persona,
12076 &entry.header.document_id.to_string(),
12077 &context,
12078 ) {
12079 if processed != user_persona {
12080 entry.header.user_persona = processed;
12081 }
12082 }
12083
12084 for line in &mut entry.lines {
12086 if let Some(ref text) = line.line_text {
12088 let processed = injector.process_text_field(
12089 "line_text",
12090 text,
12091 &entry.header.document_id.to_string(),
12092 &context,
12093 );
12094 match processed {
12095 Some(new_text) if new_text != *text => {
12096 line.line_text = Some(new_text);
12097 }
12098 None => {
12099 line.line_text = None;
12100 }
12101 _ => {}
12102 }
12103 }
12104
12105 if let Some(cc) = &line.cost_center {
12107 let processed = injector.process_text_field(
12108 "cost_center",
12109 cc,
12110 &entry.header.document_id.to_string(),
12111 &context,
12112 );
12113 match processed {
12114 Some(new_cc) if new_cc != *cc => {
12115 line.cost_center = Some(new_cc);
12116 }
12117 None => {
12118 line.cost_center = None;
12119 }
12120 _ => {}
12121 }
12122 }
12123
12124 macro_rules! process_opt_field {
12132 ($field_name:expr, $opt:expr) => {
12133 if let Some(val) = $opt.as_ref() {
12134 match injector.process_text_field(
12135 $field_name,
12136 val,
12137 &entry.header.document_id.to_string(),
12138 &context,
12139 ) {
12140 Some(new_val) if new_val != *val => {
12141 *$opt = Some(new_val);
12142 }
12143 None => {
12144 *$opt = None;
12145 }
12146 _ => {}
12147 }
12148 }
12149 };
12150 }
12151
12152 process_opt_field!("profit_center", &mut line.profit_center);
12153 process_opt_field!("assignment", &mut line.assignment);
12154 process_opt_field!("tax_code", &mut line.tax_code);
12155 process_opt_field!("account_description", &mut line.account_description);
12156 process_opt_field!(
12157 "auxiliary_account_number",
12158 &mut line.auxiliary_account_number
12159 );
12160 process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12161 process_opt_field!("lettrage", &mut line.lettrage);
12162 }
12163
12164 if let Some(pb) = &pb {
12165 pb.inc(1);
12166 }
12167 }
12168
12169 if let Some(pb) = pb {
12170 pb.finish_with_message("Data quality injection complete");
12171 }
12172
12173 let quality_issues = injector.issues().to_vec();
12174 Ok((injector.stats().clone(), quality_issues))
12175 }
12176
12177 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12188 let use_fsm = self
12190 .config
12191 .audit
12192 .fsm
12193 .as_ref()
12194 .map(|f| f.enabled)
12195 .unwrap_or(false);
12196
12197 if use_fsm {
12198 return self.generate_audit_data_with_fsm(entries);
12199 }
12200
12201 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12203 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12204 let fiscal_year = start_date.year() as u16;
12205 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12206
12207 let total_revenue: rust_decimal::Decimal = entries
12209 .iter()
12210 .flat_map(|e| e.lines.iter())
12211 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12212 .map(|l| l.credit_amount)
12213 .sum();
12214
12215 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12217
12218 let mut snapshot = AuditSnapshot::default();
12219
12220 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12222 engagement_gen.set_team_config(&self.config.audit.team);
12225
12226 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12227 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12231 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12232 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12233 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12234 finding_gen.set_template_provider(self.template_provider.clone());
12236 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12237 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12238 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12239 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12240 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12241 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12242 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12243
12244 let accounts: Vec<String> = self
12246 .coa
12247 .as_ref()
12248 .map(|coa| {
12249 coa.get_postable_accounts()
12250 .iter()
12251 .map(|acc| acc.account_code().to_string())
12252 .collect()
12253 })
12254 .unwrap_or_default();
12255
12256 for (i, company) in self.config.companies.iter().enumerate() {
12258 let company_revenue = total_revenue
12260 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12261
12262 let engagements_for_company =
12264 self.phase_config.audit_engagements / self.config.companies.len().max(1);
12265 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12266 1
12267 } else {
12268 0
12269 };
12270
12271 for _eng_idx in 0..(engagements_for_company + extra) {
12272 let eng_type =
12277 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12278
12279 let mut engagement = engagement_gen.generate_engagement(
12281 &company.code,
12282 &company.name,
12283 fiscal_year,
12284 period_end,
12285 company_revenue,
12286 Some(eng_type),
12287 );
12288
12289 if !self.master_data.employees.is_empty() {
12291 let emp_count = self.master_data.employees.len();
12292 let base = (i * 10 + _eng_idx) % emp_count;
12294 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12295 .employee_id
12296 .clone();
12297 engagement.engagement_manager_id = self.master_data.employees
12298 [(base + 1) % emp_count]
12299 .employee_id
12300 .clone();
12301 let real_team: Vec<String> = engagement
12302 .team_member_ids
12303 .iter()
12304 .enumerate()
12305 .map(|(j, _)| {
12306 self.master_data.employees[(base + 2 + j) % emp_count]
12307 .employee_id
12308 .clone()
12309 })
12310 .collect();
12311 engagement.team_member_ids = real_team;
12312 }
12313
12314 if let Some(pb) = &pb {
12315 pb.inc(1);
12316 }
12317
12318 let team_members: Vec<String> = engagement.team_member_ids.clone();
12320
12321 let workpapers = if self.config.audit.generate_workpapers {
12327 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12328 } else {
12329 Vec::new()
12330 };
12331
12332 for wp in &workpapers {
12333 if let Some(pb) = &pb {
12334 pb.inc(1);
12335 }
12336
12337 let evidence = evidence_gen.generate_evidence_for_workpaper(
12339 wp,
12340 &team_members,
12341 wp.preparer_date,
12342 );
12343
12344 for _ in &evidence {
12345 if let Some(pb) = &pb {
12346 pb.inc(1);
12347 }
12348 }
12349
12350 snapshot.evidence.extend(evidence);
12351 }
12352
12353 let risks =
12355 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12356
12357 for _ in &risks {
12358 if let Some(pb) = &pb {
12359 pb.inc(1);
12360 }
12361 }
12362 snapshot.risk_assessments.extend(risks);
12363
12364 let findings = finding_gen.generate_findings_for_engagement(
12366 &engagement,
12367 &workpapers,
12368 &team_members,
12369 );
12370
12371 for _ in &findings {
12372 if let Some(pb) = &pb {
12373 pb.inc(1);
12374 }
12375 }
12376 snapshot.findings.extend(findings);
12377
12378 let judgments =
12380 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12381
12382 for _ in &judgments {
12383 if let Some(pb) = &pb {
12384 pb.inc(1);
12385 }
12386 }
12387 snapshot.judgments.extend(judgments);
12388
12389 let (confs, resps) =
12391 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12392 snapshot.confirmations.extend(confs);
12393 snapshot.confirmation_responses.extend(resps);
12394
12395 let team_pairs: Vec<(String, String)> = team_members
12397 .iter()
12398 .map(|id| {
12399 let name = self
12400 .master_data
12401 .employees
12402 .iter()
12403 .find(|e| e.employee_id == *id)
12404 .map(|e| e.display_name.clone())
12405 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12406 (id.clone(), name)
12407 })
12408 .collect();
12409 for wp in &workpapers {
12410 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12411 snapshot.procedure_steps.extend(steps);
12412 }
12413
12414 for wp in &workpapers {
12416 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12417 snapshot.samples.push(sample);
12418 }
12419 }
12420
12421 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12423 snapshot.analytical_results.extend(analytical);
12424
12425 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12427 snapshot.ia_functions.push(ia_func);
12428 snapshot.ia_reports.extend(ia_reports);
12429
12430 let vendor_names: Vec<String> = self
12432 .master_data
12433 .vendors
12434 .iter()
12435 .map(|v| v.name.clone())
12436 .collect();
12437 let customer_names: Vec<String> = self
12438 .master_data
12439 .customers
12440 .iter()
12441 .map(|c| c.name.clone())
12442 .collect();
12443 let (parties, rp_txns) =
12444 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12445 snapshot.related_parties.extend(parties);
12446 snapshot.related_party_transactions.extend(rp_txns);
12447
12448 snapshot.workpapers.extend(workpapers);
12450
12451 {
12453 let scope_id = format!(
12454 "SCOPE-{}-{}",
12455 engagement.engagement_id.simple(),
12456 &engagement.client_entity_id
12457 );
12458 let scope = datasynth_core::models::audit::AuditScope::new(
12459 scope_id.clone(),
12460 engagement.engagement_id.to_string(),
12461 engagement.client_entity_id.clone(),
12462 engagement.materiality,
12463 );
12464 let mut eng = engagement;
12466 eng.scope_id = Some(scope_id);
12467 snapshot.audit_scopes.push(scope);
12468 snapshot.engagements.push(eng);
12469 }
12470 }
12471 }
12472
12473 if self.config.companies.len() > 1 {
12477 let group_materiality = snapshot
12480 .engagements
12481 .first()
12482 .map(|e| e.materiality)
12483 .unwrap_or_else(|| {
12484 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12485 total_revenue * pct
12486 });
12487
12488 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12489 let group_engagement_id = snapshot
12490 .engagements
12491 .first()
12492 .map(|e| e.engagement_id.to_string())
12493 .unwrap_or_else(|| "GROUP-ENG".to_string());
12494
12495 let component_snapshot = component_gen.generate(
12496 &self.config.companies,
12497 group_materiality,
12498 &group_engagement_id,
12499 period_end,
12500 );
12501
12502 snapshot.component_auditors = component_snapshot.component_auditors;
12503 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12504 snapshot.component_instructions = component_snapshot.component_instructions;
12505 snapshot.component_reports = component_snapshot.component_reports;
12506
12507 info!(
12508 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12509 snapshot.component_auditors.len(),
12510 snapshot.component_instructions.len(),
12511 snapshot.component_reports.len(),
12512 );
12513 }
12514
12515 {
12519 let applicable_framework = self
12520 .config
12521 .accounting_standards
12522 .framework
12523 .as_ref()
12524 .map(|f| format!("{f:?}"))
12525 .unwrap_or_else(|| "IFRS".to_string());
12526
12527 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12528 let entity_count = self.config.companies.len();
12529
12530 for engagement in &snapshot.engagements {
12531 let company = self
12532 .config
12533 .companies
12534 .iter()
12535 .find(|c| c.code == engagement.client_entity_id);
12536 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12537 let letter_date = engagement.planning_start;
12538 let letter = letter_gen.generate(
12539 &engagement.engagement_id.to_string(),
12540 &engagement.client_name,
12541 entity_count,
12542 engagement.period_end_date,
12543 currency,
12544 &applicable_framework,
12545 letter_date,
12546 );
12547 snapshot.engagement_letters.push(letter);
12548 }
12549
12550 info!(
12551 "ISA 210 engagement letters: {} generated",
12552 snapshot.engagement_letters.len()
12553 );
12554 }
12555
12556 if self.phase_config.generate_legal_documents {
12560 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12561 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12562 for engagement in &snapshot.engagements {
12563 let employee_names: Vec<String> = self
12567 .master_data
12568 .employees
12569 .iter()
12570 .filter(|e| e.company_code == engagement.client_entity_id)
12571 .map(|e| e.display_name.clone())
12572 .collect();
12573 let names_to_use = if !employee_names.is_empty() {
12574 employee_names
12575 } else {
12576 self.master_data
12577 .employees
12578 .iter()
12579 .take(10)
12580 .map(|e| e.display_name.clone())
12581 .collect()
12582 };
12583 let docs = legal_gen.generate(
12584 &engagement.client_entity_id,
12585 engagement.fiscal_year as i32,
12586 &names_to_use,
12587 );
12588 snapshot.legal_documents.extend(docs);
12589 }
12590 info!(
12591 "v3.3.0 legal documents: {} emitted across {} engagements",
12592 snapshot.legal_documents.len(),
12593 snapshot.engagements.len()
12594 );
12595 }
12596
12597 if self.phase_config.generate_it_controls {
12607 use datasynth_generators::it_controls_generator::ItControlsGenerator;
12608 use std::collections::HashMap;
12609 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12610
12611 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12614 HashMap::new();
12615 for engagement in &snapshot.engagements {
12616 let entry = by_company
12617 .entry(engagement.client_entity_id.clone())
12618 .or_insert((engagement.planning_start, engagement.period_end_date));
12619 if engagement.planning_start < entry.0 {
12620 entry.0 = engagement.planning_start;
12621 }
12622 if engagement.period_end_date > entry.1 {
12623 entry.1 = engagement.period_end_date;
12624 }
12625 }
12626
12627 let systems: Vec<String> = vec![
12631 "SAP ECC",
12632 "SAP S/4 HANA",
12633 "Oracle EBS",
12634 "Workday",
12635 "NetSuite",
12636 "Active Directory",
12637 "SharePoint",
12638 "Salesforce",
12639 "ServiceNow",
12640 "Jira",
12641 "GitHub Enterprise",
12642 "AWS Console",
12643 "Okta",
12644 ]
12645 .into_iter()
12646 .map(String::from)
12647 .collect();
12648
12649 for (company_code, (start, end)) in by_company {
12650 let emps: Vec<(String, String)> = self
12651 .master_data
12652 .employees
12653 .iter()
12654 .filter(|e| e.company_code == company_code)
12655 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12656 .collect();
12657 if emps.is_empty() {
12658 continue;
12659 }
12660 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12663 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12664 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12665 snapshot.it_controls_access_logs.extend(access_logs);
12666 snapshot.it_controls_change_records.extend(change_records);
12667 }
12668
12669 info!(
12670 "v3.3.0 IT controls: {} access logs, {} change records",
12671 snapshot.it_controls_access_logs.len(),
12672 snapshot.it_controls_change_records.len()
12673 );
12674 }
12675
12676 {
12680 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12681 let entity_codes: Vec<String> = self
12682 .config
12683 .companies
12684 .iter()
12685 .map(|c| c.code.clone())
12686 .collect();
12687 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12688 info!(
12689 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12690 subsequent.len(),
12691 subsequent
12692 .iter()
12693 .filter(|e| matches!(
12694 e.classification,
12695 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12696 ))
12697 .count(),
12698 subsequent
12699 .iter()
12700 .filter(|e| matches!(
12701 e.classification,
12702 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12703 ))
12704 .count(),
12705 );
12706 snapshot.subsequent_events = subsequent;
12707 }
12708
12709 {
12713 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12714 let entity_codes: Vec<String> = self
12715 .config
12716 .companies
12717 .iter()
12718 .map(|c| c.code.clone())
12719 .collect();
12720 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12721 info!(
12722 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12723 soc_snapshot.service_organizations.len(),
12724 soc_snapshot.soc_reports.len(),
12725 soc_snapshot.user_entity_controls.len(),
12726 );
12727 snapshot.service_organizations = soc_snapshot.service_organizations;
12728 snapshot.soc_reports = soc_snapshot.soc_reports;
12729 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12730 }
12731
12732 {
12736 use datasynth_generators::audit::going_concern_generator::{
12737 GoingConcernGenerator, GoingConcernInput,
12738 };
12739 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12740 let entity_codes: Vec<String> = self
12741 .config
12742 .companies
12743 .iter()
12744 .map(|c| c.code.clone())
12745 .collect();
12746 let assessment_date = period_end + chrono::Duration::days(75);
12748 let period_label = format!("FY{}", period_end.year());
12749
12750 let gc_inputs: Vec<GoingConcernInput> = self
12761 .config
12762 .companies
12763 .iter()
12764 .map(|company| {
12765 let code = &company.code;
12766 let mut revenue = rust_decimal::Decimal::ZERO;
12767 let mut expenses = rust_decimal::Decimal::ZERO;
12768 let mut current_assets = rust_decimal::Decimal::ZERO;
12769 let mut current_liabs = rust_decimal::Decimal::ZERO;
12770 let mut total_debt = rust_decimal::Decimal::ZERO;
12771
12772 for je in entries.iter().filter(|je| &je.header.company_code == code) {
12773 for line in &je.lines {
12774 let acct = line.gl_account.as_str();
12775 let net = line.debit_amount - line.credit_amount;
12776 if acct.starts_with('4') {
12777 revenue -= net;
12779 } else if acct.starts_with('6') {
12780 expenses += net;
12782 }
12783 if acct.starts_with('1') {
12785 if let Ok(n) = acct.parse::<u32>() {
12787 if (1000..=1499).contains(&n) {
12788 current_assets += net;
12789 }
12790 }
12791 } else if acct.starts_with('2') {
12792 if let Ok(n) = acct.parse::<u32>() {
12793 if (2000..=2499).contains(&n) {
12794 current_liabs -= net; } else if (2500..=2999).contains(&n) {
12797 total_debt -= net;
12799 }
12800 }
12801 }
12802 }
12803 }
12804
12805 let net_income = revenue - expenses;
12806 let working_capital = current_assets - current_liabs;
12807 let operating_cash_flow = net_income;
12810
12811 GoingConcernInput {
12812 entity_code: code.clone(),
12813 net_income,
12814 working_capital,
12815 operating_cash_flow,
12816 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12817 assessment_date,
12818 }
12819 })
12820 .collect();
12821
12822 let assessments = if gc_inputs.is_empty() {
12823 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12824 } else {
12825 gc_gen.generate_for_entities_with_inputs(
12826 &entity_codes,
12827 &gc_inputs,
12828 assessment_date,
12829 &period_label,
12830 )
12831 };
12832 info!(
12833 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12834 assessments.len(),
12835 assessments.iter().filter(|a| matches!(
12836 a.auditor_conclusion,
12837 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12838 )).count(),
12839 assessments.iter().filter(|a| matches!(
12840 a.auditor_conclusion,
12841 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12842 )).count(),
12843 assessments.iter().filter(|a| matches!(
12844 a.auditor_conclusion,
12845 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12846 )).count(),
12847 );
12848 snapshot.going_concern_assessments = assessments;
12849 }
12850
12851 {
12855 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12856 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12857 let entity_codes: Vec<String> = self
12858 .config
12859 .companies
12860 .iter()
12861 .map(|c| c.code.clone())
12862 .collect();
12863 let estimates = est_gen.generate_for_entities(&entity_codes);
12864 info!(
12865 "ISA 540 accounting estimates: {} estimates across {} entities \
12866 ({} with retrospective reviews, {} with auditor point estimates)",
12867 estimates.len(),
12868 entity_codes.len(),
12869 estimates
12870 .iter()
12871 .filter(|e| e.retrospective_review.is_some())
12872 .count(),
12873 estimates
12874 .iter()
12875 .filter(|e| e.auditor_point_estimate.is_some())
12876 .count(),
12877 );
12878 snapshot.accounting_estimates = estimates;
12879 }
12880
12881 {
12885 use datasynth_generators::audit::audit_opinion_generator::{
12886 AuditOpinionGenerator, AuditOpinionInput,
12887 };
12888
12889 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12890
12891 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12893 .engagements
12894 .iter()
12895 .map(|eng| {
12896 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12898 .findings
12899 .iter()
12900 .filter(|f| f.engagement_id == eng.engagement_id)
12901 .cloned()
12902 .collect();
12903
12904 let gc = snapshot
12906 .going_concern_assessments
12907 .iter()
12908 .find(|g| g.entity_code == eng.client_entity_id)
12909 .cloned();
12910
12911 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12913 snapshot.component_reports.clone();
12914
12915 let auditor = self
12916 .master_data
12917 .employees
12918 .first()
12919 .map(|e| e.display_name.clone())
12920 .unwrap_or_else(|| "Global Audit LLP".into());
12921
12922 let partner = self
12923 .master_data
12924 .employees
12925 .get(1)
12926 .map(|e| e.display_name.clone())
12927 .unwrap_or_else(|| eng.engagement_partner_id.clone());
12928
12929 AuditOpinionInput {
12930 entity_code: eng.client_entity_id.clone(),
12931 entity_name: eng.client_name.clone(),
12932 engagement_id: eng.engagement_id,
12933 period_end: eng.period_end_date,
12934 findings: eng_findings,
12935 going_concern: gc,
12936 component_reports: comp_reports,
12937 is_us_listed: {
12939 let fw = &self.config.audit_standards.isa_compliance.framework;
12940 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12941 },
12942 auditor_name: auditor,
12943 engagement_partner: partner,
12944 }
12945 })
12946 .collect();
12947
12948 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12949
12950 for go in &generated_opinions {
12951 snapshot
12952 .key_audit_matters
12953 .extend(go.key_audit_matters.clone());
12954 }
12955 snapshot.audit_opinions = generated_opinions
12956 .into_iter()
12957 .map(|go| go.opinion)
12958 .collect();
12959
12960 info!(
12961 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12962 snapshot.audit_opinions.len(),
12963 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12964 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12965 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12966 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12967 );
12968 }
12969
12970 {
12974 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12975
12976 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12977
12978 for (i, company) in self.config.companies.iter().enumerate() {
12979 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12981 .engagements
12982 .iter()
12983 .filter(|e| e.client_entity_id == company.code)
12984 .map(|e| e.engagement_id)
12985 .collect();
12986
12987 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12988 .findings
12989 .iter()
12990 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12991 .cloned()
12992 .collect();
12993
12994 let emp_count = self.master_data.employees.len();
12996 let ceo_name = if emp_count > 0 {
12997 self.master_data.employees[i % emp_count]
12998 .display_name
12999 .clone()
13000 } else {
13001 format!("CEO of {}", company.name)
13002 };
13003 let cfo_name = if emp_count > 1 {
13004 self.master_data.employees[(i + 1) % emp_count]
13005 .display_name
13006 .clone()
13007 } else {
13008 format!("CFO of {}", company.name)
13009 };
13010
13011 let materiality = snapshot
13013 .engagements
13014 .iter()
13015 .find(|e| e.client_entity_id == company.code)
13016 .map(|e| e.materiality)
13017 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13018
13019 let input = SoxGeneratorInput {
13020 company_code: company.code.clone(),
13021 company_name: company.name.clone(),
13022 fiscal_year,
13023 period_end,
13024 findings: company_findings,
13025 ceo_name,
13026 cfo_name,
13027 materiality_threshold: materiality,
13028 revenue_percent: rust_decimal::Decimal::from(100),
13029 assets_percent: rust_decimal::Decimal::from(100),
13030 significant_accounts: vec![
13031 "Revenue".into(),
13032 "Accounts Receivable".into(),
13033 "Inventory".into(),
13034 "Fixed Assets".into(),
13035 "Accounts Payable".into(),
13036 ],
13037 };
13038
13039 let (certs, assessment) = sox_gen.generate(&input);
13040 snapshot.sox_302_certifications.extend(certs);
13041 snapshot.sox_404_assessments.push(assessment);
13042 }
13043
13044 info!(
13045 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13046 snapshot.sox_302_certifications.len(),
13047 snapshot.sox_404_assessments.len(),
13048 snapshot
13049 .sox_404_assessments
13050 .iter()
13051 .filter(|a| a.icfr_effective)
13052 .count(),
13053 snapshot
13054 .sox_404_assessments
13055 .iter()
13056 .filter(|a| !a.icfr_effective)
13057 .count(),
13058 );
13059 }
13060
13061 {
13065 use datasynth_generators::audit::materiality_generator::{
13066 MaterialityGenerator, MaterialityInput,
13067 };
13068
13069 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13070
13071 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13075
13076 for company in &self.config.companies {
13077 let company_code = company.code.clone();
13078
13079 let company_revenue: rust_decimal::Decimal = entries
13081 .iter()
13082 .filter(|e| e.company_code() == company_code)
13083 .flat_map(|e| e.lines.iter())
13084 .filter(|l| l.account_code.starts_with('4'))
13085 .map(|l| l.credit_amount)
13086 .sum();
13087
13088 let total_assets: rust_decimal::Decimal = entries
13090 .iter()
13091 .filter(|e| e.company_code() == company_code)
13092 .flat_map(|e| e.lines.iter())
13093 .filter(|l| l.account_code.starts_with('1'))
13094 .map(|l| l.debit_amount)
13095 .sum();
13096
13097 let total_expenses: rust_decimal::Decimal = entries
13099 .iter()
13100 .filter(|e| e.company_code() == company_code)
13101 .flat_map(|e| e.lines.iter())
13102 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13103 .map(|l| l.debit_amount)
13104 .sum();
13105
13106 let equity: rust_decimal::Decimal = entries
13108 .iter()
13109 .filter(|e| e.company_code() == company_code)
13110 .flat_map(|e| e.lines.iter())
13111 .filter(|l| l.account_code.starts_with('3'))
13112 .map(|l| l.credit_amount)
13113 .sum();
13114
13115 let pretax_income = company_revenue - total_expenses;
13116
13117 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13119 let w = rust_decimal::Decimal::try_from(company.volume_weight)
13120 .unwrap_or(rust_decimal::Decimal::ONE);
13121 (
13122 total_revenue * w,
13123 total_revenue * w * rust_decimal::Decimal::from(3),
13124 total_revenue * w * rust_decimal::Decimal::new(1, 1),
13125 total_revenue * w * rust_decimal::Decimal::from(2),
13126 )
13127 } else {
13128 (company_revenue, total_assets, pretax_income, equity)
13129 };
13130
13131 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
13134 entity_code: company_code,
13135 period: format!("FY{}", fiscal_year),
13136 revenue: rev,
13137 pretax_income: pti,
13138 total_assets: assets,
13139 equity: eq,
13140 gross_profit,
13141 });
13142 }
13143
13144 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13145
13146 info!(
13147 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13148 {} total assets, {} equity benchmarks)",
13149 snapshot.materiality_calculations.len(),
13150 snapshot
13151 .materiality_calculations
13152 .iter()
13153 .filter(|m| matches!(
13154 m.benchmark,
13155 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13156 ))
13157 .count(),
13158 snapshot
13159 .materiality_calculations
13160 .iter()
13161 .filter(|m| matches!(
13162 m.benchmark,
13163 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13164 ))
13165 .count(),
13166 snapshot
13167 .materiality_calculations
13168 .iter()
13169 .filter(|m| matches!(
13170 m.benchmark,
13171 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13172 ))
13173 .count(),
13174 snapshot
13175 .materiality_calculations
13176 .iter()
13177 .filter(|m| matches!(
13178 m.benchmark,
13179 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13180 ))
13181 .count(),
13182 );
13183 }
13184
13185 {
13189 use datasynth_generators::audit::cra_generator::CraGenerator;
13190
13191 let mut cra_gen = CraGenerator::new(self.seed + 8315);
13192
13193 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13195 .audit_scopes
13196 .iter()
13197 .map(|s| (s.entity_code.clone(), s.id.clone()))
13198 .collect();
13199
13200 for company in &self.config.companies {
13201 let cras = cra_gen.generate_for_entity(&company.code, None);
13202 let scope_id = entity_scope_map.get(&company.code).cloned();
13203 let cras_with_scope: Vec<_> = cras
13204 .into_iter()
13205 .map(|mut cra| {
13206 cra.scope_id = scope_id.clone();
13207 cra
13208 })
13209 .collect();
13210 snapshot.combined_risk_assessments.extend(cras_with_scope);
13211 }
13212
13213 let significant_count = snapshot
13214 .combined_risk_assessments
13215 .iter()
13216 .filter(|c| c.significant_risk)
13217 .count();
13218 let high_cra_count = snapshot
13219 .combined_risk_assessments
13220 .iter()
13221 .filter(|c| {
13222 matches!(
13223 c.combined_risk,
13224 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13225 )
13226 })
13227 .count();
13228
13229 info!(
13230 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13231 snapshot.combined_risk_assessments.len(),
13232 significant_count,
13233 high_cra_count,
13234 );
13235 }
13236
13237 {
13241 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13242
13243 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13244
13245 for company in &self.config.companies {
13247 let entity_code = company.code.clone();
13248
13249 let tolerable_error = snapshot
13251 .materiality_calculations
13252 .iter()
13253 .find(|m| m.entity_code == entity_code)
13254 .map(|m| m.tolerable_error);
13255
13256 let entity_cras: Vec<_> = snapshot
13258 .combined_risk_assessments
13259 .iter()
13260 .filter(|c| c.entity_code == entity_code)
13261 .cloned()
13262 .collect();
13263
13264 if !entity_cras.is_empty() {
13265 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13266 snapshot.sampling_plans.extend(plans);
13267 snapshot.sampled_items.extend(items);
13268 }
13269 }
13270
13271 let misstatement_count = snapshot
13272 .sampled_items
13273 .iter()
13274 .filter(|i| i.misstatement_found)
13275 .count();
13276
13277 info!(
13278 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13279 snapshot.sampling_plans.len(),
13280 snapshot.sampled_items.len(),
13281 misstatement_count,
13282 );
13283 }
13284
13285 {
13289 use datasynth_generators::audit::scots_generator::{
13290 ScotsGenerator, ScotsGeneratorConfig,
13291 };
13292
13293 let ic_enabled = self.config.intercompany.enabled;
13294
13295 let config = ScotsGeneratorConfig {
13296 intercompany_enabled: ic_enabled,
13297 ..ScotsGeneratorConfig::default()
13298 };
13299 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13300
13301 for company in &self.config.companies {
13302 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13303 snapshot
13304 .significant_transaction_classes
13305 .extend(entity_scots);
13306 }
13307
13308 let estimation_count = snapshot
13309 .significant_transaction_classes
13310 .iter()
13311 .filter(|s| {
13312 matches!(
13313 s.transaction_type,
13314 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13315 )
13316 })
13317 .count();
13318
13319 info!(
13320 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13321 snapshot.significant_transaction_classes.len(),
13322 estimation_count,
13323 );
13324 }
13325
13326 {
13330 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13331
13332 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13333 let entity_codes: Vec<String> = self
13334 .config
13335 .companies
13336 .iter()
13337 .map(|c| c.code.clone())
13338 .collect();
13339 let unusual_flags =
13340 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13341 info!(
13342 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13343 unusual_flags.len(),
13344 unusual_flags
13345 .iter()
13346 .filter(|f| matches!(
13347 f.severity,
13348 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13349 ))
13350 .count(),
13351 unusual_flags
13352 .iter()
13353 .filter(|f| matches!(
13354 f.severity,
13355 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13356 ))
13357 .count(),
13358 unusual_flags
13359 .iter()
13360 .filter(|f| matches!(
13361 f.severity,
13362 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13363 ))
13364 .count(),
13365 );
13366 snapshot.unusual_items = unusual_flags;
13367 }
13368
13369 {
13373 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13374
13375 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13376 let entity_codes: Vec<String> = self
13377 .config
13378 .companies
13379 .iter()
13380 .map(|c| c.code.clone())
13381 .collect();
13382 let current_period_label = format!("FY{fiscal_year}");
13383 let prior_period_label = format!("FY{}", fiscal_year - 1);
13384 let analytical_rels = ar_gen.generate_for_entities(
13385 &entity_codes,
13386 entries,
13387 ¤t_period_label,
13388 &prior_period_label,
13389 );
13390 let out_of_range = analytical_rels
13391 .iter()
13392 .filter(|r| !r.within_expected_range)
13393 .count();
13394 info!(
13395 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13396 analytical_rels.len(),
13397 out_of_range,
13398 );
13399 snapshot.analytical_relationships = analytical_rels;
13400 }
13401
13402 if let Some(pb) = pb {
13403 pb.finish_with_message(format!(
13404 "Audit data: {} engagements, {} workpapers, {} evidence, \
13405 {} confirmations, {} procedure steps, {} samples, \
13406 {} analytical, {} IA funcs, {} related parties, \
13407 {} component auditors, {} letters, {} subsequent events, \
13408 {} service orgs, {} going concern, {} accounting estimates, \
13409 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13410 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13411 {} unusual items, {} analytical relationships",
13412 snapshot.engagements.len(),
13413 snapshot.workpapers.len(),
13414 snapshot.evidence.len(),
13415 snapshot.confirmations.len(),
13416 snapshot.procedure_steps.len(),
13417 snapshot.samples.len(),
13418 snapshot.analytical_results.len(),
13419 snapshot.ia_functions.len(),
13420 snapshot.related_parties.len(),
13421 snapshot.component_auditors.len(),
13422 snapshot.engagement_letters.len(),
13423 snapshot.subsequent_events.len(),
13424 snapshot.service_organizations.len(),
13425 snapshot.going_concern_assessments.len(),
13426 snapshot.accounting_estimates.len(),
13427 snapshot.audit_opinions.len(),
13428 snapshot.key_audit_matters.len(),
13429 snapshot.sox_302_certifications.len(),
13430 snapshot.sox_404_assessments.len(),
13431 snapshot.materiality_calculations.len(),
13432 snapshot.combined_risk_assessments.len(),
13433 snapshot.sampling_plans.len(),
13434 snapshot.significant_transaction_classes.len(),
13435 snapshot.unusual_items.len(),
13436 snapshot.analytical_relationships.len(),
13437 ));
13438 }
13439
13440 {
13447 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13448 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13449 debug!(
13450 "PCAOB-ISA mappings generated: {} mappings",
13451 snapshot.isa_pcaob_mappings.len()
13452 );
13453 }
13454
13455 {
13462 use datasynth_standards::audit::isa_reference::IsaStandard;
13463 snapshot.isa_mappings = IsaStandard::standard_entries();
13464 debug!(
13465 "ISA standard entries generated: {} standards",
13466 snapshot.isa_mappings.len()
13467 );
13468 }
13469
13470 {
13473 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13474 .engagements
13475 .iter()
13476 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13477 .collect();
13478
13479 for rpt in &mut snapshot.related_party_transactions {
13480 if rpt.journal_entry_id.is_some() {
13481 continue; }
13483 let entity = engagement_by_id
13484 .get(&rpt.engagement_id.to_string())
13485 .copied()
13486 .unwrap_or("");
13487
13488 let best_je = entries
13490 .iter()
13491 .filter(|je| je.header.company_code == entity)
13492 .min_by_key(|je| {
13493 (je.header.posting_date - rpt.transaction_date)
13494 .num_days()
13495 .abs()
13496 });
13497
13498 if let Some(je) = best_je {
13499 rpt.journal_entry_id = Some(je.header.document_id.to_string());
13500 }
13501 }
13502
13503 let linked = snapshot
13504 .related_party_transactions
13505 .iter()
13506 .filter(|t| t.journal_entry_id.is_some())
13507 .count();
13508 debug!(
13509 "Linked {}/{} related party transactions to journal entries",
13510 linked,
13511 snapshot.related_party_transactions.len()
13512 );
13513 }
13514
13515 if !snapshot.engagements.is_empty() {
13521 use datasynth_generators::audit_opinion_generator::{
13522 AuditOpinionGenerator, AuditOpinionInput,
13523 };
13524
13525 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13526 let inputs: Vec<AuditOpinionInput> = snapshot
13527 .engagements
13528 .iter()
13529 .map(|eng| {
13530 let findings = snapshot
13531 .findings
13532 .iter()
13533 .filter(|f| f.engagement_id == eng.engagement_id)
13534 .cloned()
13535 .collect();
13536 let going_concern = snapshot
13537 .going_concern_assessments
13538 .iter()
13539 .find(|gc| gc.entity_code == eng.client_entity_id)
13540 .cloned();
13541 let component_reports = snapshot
13544 .component_reports
13545 .iter()
13546 .filter(|r| r.entity_code == eng.client_entity_id)
13547 .cloned()
13548 .collect();
13549
13550 AuditOpinionInput {
13551 entity_code: eng.client_entity_id.clone(),
13552 entity_name: eng.client_name.clone(),
13553 engagement_id: eng.engagement_id,
13554 period_end: eng.period_end_date,
13555 findings,
13556 going_concern,
13557 component_reports,
13558 is_us_listed: matches!(
13559 eng.engagement_type,
13560 datasynth_core::audit::EngagementType::IntegratedAudit
13561 | datasynth_core::audit::EngagementType::Sox404
13562 ),
13563 auditor_name: "DataSynth Audit LLP".to_string(),
13564 engagement_partner: "Engagement Partner".to_string(),
13565 }
13566 })
13567 .collect();
13568
13569 let generated = opinion_gen.generate_batch(&inputs);
13570 for g in generated {
13571 snapshot.key_audit_matters.extend(g.key_audit_matters);
13572 snapshot.audit_opinions.push(g.opinion);
13573 }
13574 debug!(
13575 "Generated {} audit opinions with {} key audit matters",
13576 snapshot.audit_opinions.len(),
13577 snapshot.key_audit_matters.len()
13578 );
13579 }
13580
13581 Ok(snapshot)
13582 }
13583
13584 fn generate_audit_data_with_fsm(
13591 &mut self,
13592 entries: &[JournalEntry],
13593 ) -> SynthResult<AuditSnapshot> {
13594 use datasynth_audit_fsm::{
13595 context::EngagementContext,
13596 engine::AuditFsmEngine,
13597 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13598 };
13599 use rand::SeedableRng;
13600 use rand_chacha::ChaCha8Rng;
13601
13602 info!("Audit FSM: generating audit data via FSM engine");
13603
13604 let fsm_config = self
13605 .config
13606 .audit
13607 .fsm
13608 .as_ref()
13609 .expect("FSM config must be present when FSM is enabled");
13610
13611 let bwp = match fsm_config.blueprint.as_str() {
13613 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13614 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13615 _ => {
13616 warn!(
13617 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13618 fsm_config.blueprint
13619 );
13620 BlueprintWithPreconditions::load_builtin_fsa()
13621 }
13622 }
13623 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13624
13625 let overlay = match fsm_config.overlay.as_str() {
13627 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13628 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13629 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13630 _ => {
13631 warn!(
13632 "Unknown FSM overlay '{}', falling back to builtin:default",
13633 fsm_config.overlay
13634 );
13635 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13636 }
13637 }
13638 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13639
13640 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13642 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13643 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13644
13645 let company = self.config.companies.first();
13647 let company_code = company
13648 .map(|c| c.code.clone())
13649 .unwrap_or_else(|| "UNKNOWN".to_string());
13650 let company_name = company
13651 .map(|c| c.name.clone())
13652 .unwrap_or_else(|| "Unknown Company".to_string());
13653 let currency = company
13654 .map(|c| c.currency.clone())
13655 .unwrap_or_else(|| "USD".to_string());
13656
13657 let entity_entries: Vec<_> = entries
13659 .iter()
13660 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13661 .cloned()
13662 .collect();
13663 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
13667 .iter()
13668 .flat_map(|e| e.lines.iter())
13669 .filter(|l| l.account_code.starts_with('4'))
13670 .map(|l| l.credit_amount - l.debit_amount)
13671 .sum();
13672
13673 let total_assets: rust_decimal::Decimal = entries
13674 .iter()
13675 .flat_map(|e| e.lines.iter())
13676 .filter(|l| l.account_code.starts_with('1'))
13677 .map(|l| l.debit_amount - l.credit_amount)
13678 .sum();
13679
13680 let total_expenses: rust_decimal::Decimal = entries
13681 .iter()
13682 .flat_map(|e| e.lines.iter())
13683 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13684 .map(|l| l.debit_amount)
13685 .sum();
13686
13687 let equity: rust_decimal::Decimal = entries
13688 .iter()
13689 .flat_map(|e| e.lines.iter())
13690 .filter(|l| l.account_code.starts_with('3'))
13691 .map(|l| l.credit_amount - l.debit_amount)
13692 .sum();
13693
13694 let total_debt: rust_decimal::Decimal = entries
13695 .iter()
13696 .flat_map(|e| e.lines.iter())
13697 .filter(|l| l.account_code.starts_with('2'))
13698 .map(|l| l.credit_amount - l.debit_amount)
13699 .sum();
13700
13701 let pretax_income = total_revenue - total_expenses;
13702
13703 let cogs: rust_decimal::Decimal = entries
13704 .iter()
13705 .flat_map(|e| e.lines.iter())
13706 .filter(|l| l.account_code.starts_with('5'))
13707 .map(|l| l.debit_amount)
13708 .sum();
13709 let gross_profit = total_revenue - cogs;
13710
13711 let current_assets: rust_decimal::Decimal = entries
13712 .iter()
13713 .flat_map(|e| e.lines.iter())
13714 .filter(|l| {
13715 l.account_code.starts_with("10")
13716 || l.account_code.starts_with("11")
13717 || l.account_code.starts_with("12")
13718 || l.account_code.starts_with("13")
13719 })
13720 .map(|l| l.debit_amount - l.credit_amount)
13721 .sum();
13722 let current_liabilities: rust_decimal::Decimal = entries
13723 .iter()
13724 .flat_map(|e| e.lines.iter())
13725 .filter(|l| {
13726 l.account_code.starts_with("20")
13727 || l.account_code.starts_with("21")
13728 || l.account_code.starts_with("22")
13729 })
13730 .map(|l| l.credit_amount - l.debit_amount)
13731 .sum();
13732 let working_capital = current_assets - current_liabilities;
13733
13734 let depreciation: rust_decimal::Decimal = entries
13735 .iter()
13736 .flat_map(|e| e.lines.iter())
13737 .filter(|l| l.account_code.starts_with("60"))
13738 .map(|l| l.debit_amount)
13739 .sum();
13740 let operating_cash_flow = pretax_income + depreciation;
13741
13742 let accounts: Vec<String> = self
13744 .coa
13745 .as_ref()
13746 .map(|coa| {
13747 coa.get_postable_accounts()
13748 .iter()
13749 .map(|acc| acc.account_code().to_string())
13750 .collect()
13751 })
13752 .unwrap_or_default();
13753
13754 let team_member_ids: Vec<String> = self
13756 .master_data
13757 .employees
13758 .iter()
13759 .take(8) .map(|e| e.employee_id.clone())
13761 .collect();
13762 let team_member_pairs: Vec<(String, String)> = self
13763 .master_data
13764 .employees
13765 .iter()
13766 .take(8)
13767 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13768 .collect();
13769
13770 let vendor_names: Vec<String> = self
13771 .master_data
13772 .vendors
13773 .iter()
13774 .map(|v| v.name.clone())
13775 .collect();
13776 let customer_names: Vec<String> = self
13777 .master_data
13778 .customers
13779 .iter()
13780 .map(|c| c.name.clone())
13781 .collect();
13782
13783 let entity_codes: Vec<String> = self
13784 .config
13785 .companies
13786 .iter()
13787 .map(|c| c.code.clone())
13788 .collect();
13789
13790 let journal_entry_ids: Vec<String> = entries
13792 .iter()
13793 .take(50)
13794 .map(|e| e.header.document_id.to_string())
13795 .collect();
13796
13797 let mut account_balances = std::collections::HashMap::<String, f64>::new();
13799 for entry in entries {
13800 for line in &entry.lines {
13801 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13802 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13803 *account_balances
13804 .entry(line.account_code.clone())
13805 .or_insert(0.0) += debit_f64 - credit_f64;
13806 }
13807 }
13808
13809 let control_ids: Vec<String> = Vec::new();
13814 let anomaly_refs: Vec<String> = Vec::new();
13815
13816 let mut context = EngagementContext {
13817 company_code,
13818 company_name,
13819 fiscal_year: start_date.year(),
13820 currency,
13821 total_revenue,
13822 total_assets,
13823 engagement_start: start_date,
13824 report_date: period_end,
13825 pretax_income,
13826 equity,
13827 gross_profit,
13828 working_capital,
13829 operating_cash_flow,
13830 total_debt,
13831 team_member_ids,
13832 team_member_pairs,
13833 accounts,
13834 vendor_names,
13835 customer_names,
13836 journal_entry_ids,
13837 account_balances,
13838 control_ids,
13839 anomaly_refs,
13840 journal_entries: entries.to_vec(),
13841 is_us_listed: false,
13842 entity_codes,
13843 auditor_firm_name: "DataSynth Audit LLP".into(),
13844 accounting_framework: self
13845 .config
13846 .accounting_standards
13847 .framework
13848 .map(|f| match f {
13849 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13850 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13851 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13852 "French GAAP"
13853 }
13854 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13855 "German GAAP"
13856 }
13857 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13858 "Dual Reporting"
13859 }
13860 })
13861 .unwrap_or("IFRS")
13862 .into(),
13863 };
13864
13865 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13867 let rng = ChaCha8Rng::seed_from_u64(seed);
13868 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13869
13870 let mut result = engine
13871 .run_engagement(&context)
13872 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13873
13874 info!(
13875 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13876 {} phases completed, duration {:.1}h",
13877 result.event_log.len(),
13878 result.artifacts.total_artifacts(),
13879 result.anomalies.len(),
13880 result.phases_completed.len(),
13881 result.total_duration_hours,
13882 );
13883
13884 let tb_entity = context.company_code.clone();
13886 let tb_fy = context.fiscal_year;
13887 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13888 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13889 entries,
13890 &tb_entity,
13891 tb_fy,
13892 self.coa.as_ref().map(|c| c.as_ref()),
13893 );
13894
13895 let bag = result.artifacts;
13897 let mut snapshot = AuditSnapshot {
13898 engagements: bag.engagements,
13899 engagement_letters: bag.engagement_letters,
13900 materiality_calculations: bag.materiality_calculations,
13901 risk_assessments: bag.risk_assessments,
13902 combined_risk_assessments: bag.combined_risk_assessments,
13903 workpapers: bag.workpapers,
13904 evidence: bag.evidence,
13905 findings: bag.findings,
13906 judgments: bag.judgments,
13907 sampling_plans: bag.sampling_plans,
13908 sampled_items: bag.sampled_items,
13909 analytical_results: bag.analytical_results,
13910 going_concern_assessments: bag.going_concern_assessments,
13911 subsequent_events: bag.subsequent_events,
13912 audit_opinions: bag.audit_opinions,
13913 key_audit_matters: bag.key_audit_matters,
13914 procedure_steps: bag.procedure_steps,
13915 samples: bag.samples,
13916 confirmations: bag.confirmations,
13917 confirmation_responses: bag.confirmation_responses,
13918 fsm_event_trail: Some(result.event_log),
13920 ..Default::default()
13922 };
13923
13924 {
13926 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13927 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13928 }
13929 {
13930 use datasynth_standards::audit::isa_reference::IsaStandard;
13931 snapshot.isa_mappings = IsaStandard::standard_entries();
13932 }
13933
13934 info!(
13935 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13936 {} risk assessments, {} findings, {} materiality calcs",
13937 snapshot.engagements.len(),
13938 snapshot.workpapers.len(),
13939 snapshot.evidence.len(),
13940 snapshot.risk_assessments.len(),
13941 snapshot.findings.len(),
13942 snapshot.materiality_calculations.len(),
13943 );
13944
13945 Ok(snapshot)
13946 }
13947
13948 fn export_graphs(
13955 &mut self,
13956 entries: &[JournalEntry],
13957 _coa: &Arc<ChartOfAccounts>,
13958 stats: &mut EnhancedGenerationStatistics,
13959 ) -> SynthResult<GraphExportSnapshot> {
13960 let pb = self.create_progress_bar(100, "Exporting Graphs");
13961
13962 let mut snapshot = GraphExportSnapshot::default();
13963
13964 let output_dir = self
13966 .output_path
13967 .clone()
13968 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13969 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13970
13971 for graph_type in &self.config.graph_export.graph_types {
13973 if let Some(pb) = &pb {
13974 pb.inc(10);
13975 }
13976
13977 let graph_config = TransactionGraphConfig {
13979 include_vendors: false,
13980 include_customers: false,
13981 create_debit_credit_edges: true,
13982 include_document_nodes: graph_type.include_document_nodes,
13983 min_edge_weight: graph_type.min_edge_weight,
13984 aggregate_parallel_edges: graph_type.aggregate_edges,
13985 framework: None,
13986 };
13987
13988 let mut builder = TransactionGraphBuilder::new(graph_config);
13989 builder.add_journal_entries(entries);
13990 let graph = builder.build();
13991
13992 stats.graph_node_count += graph.node_count();
13994 stats.graph_edge_count += graph.edge_count();
13995
13996 if let Some(pb) = &pb {
13997 pb.inc(40);
13998 }
13999
14000 for format in &self.config.graph_export.formats {
14002 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14003
14004 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14006 warn!("Failed to create graph output directory: {}", e);
14007 continue;
14008 }
14009
14010 match format {
14011 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14012 let pyg_config = PyGExportConfig {
14013 common: datasynth_graph::CommonExportConfig {
14014 export_node_features: true,
14015 export_edge_features: true,
14016 export_node_labels: true,
14017 export_edge_labels: true,
14018 export_masks: true,
14019 train_ratio: self.config.graph_export.train_ratio,
14020 val_ratio: self.config.graph_export.validation_ratio,
14021 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14022 },
14023 one_hot_categoricals: false,
14024 };
14025
14026 let exporter = PyGExporter::new(pyg_config);
14027 match exporter.export(&graph, &format_dir) {
14028 Ok(metadata) => {
14029 snapshot.exports.insert(
14030 format!("{}_{}", graph_type.name, "pytorch_geometric"),
14031 GraphExportInfo {
14032 name: graph_type.name.clone(),
14033 format: "pytorch_geometric".to_string(),
14034 output_path: format_dir.clone(),
14035 node_count: metadata.num_nodes,
14036 edge_count: metadata.num_edges,
14037 },
14038 );
14039 snapshot.graph_count += 1;
14040 }
14041 Err(e) => {
14042 warn!("Failed to export PyTorch Geometric graph: {}", e);
14043 }
14044 }
14045 }
14046 datasynth_config::schema::GraphExportFormat::Neo4j => {
14047 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14048
14049 let neo4j_config = Neo4jExportConfig {
14050 export_node_properties: true,
14051 export_edge_properties: true,
14052 export_features: true,
14053 generate_cypher: true,
14054 generate_admin_import: true,
14055 database_name: "synth".to_string(),
14056 cypher_batch_size: 1000,
14057 };
14058
14059 let exporter = Neo4jExporter::new(neo4j_config);
14060 match exporter.export(&graph, &format_dir) {
14061 Ok(metadata) => {
14062 snapshot.exports.insert(
14063 format!("{}_{}", graph_type.name, "neo4j"),
14064 GraphExportInfo {
14065 name: graph_type.name.clone(),
14066 format: "neo4j".to_string(),
14067 output_path: format_dir.clone(),
14068 node_count: metadata.num_nodes,
14069 edge_count: metadata.num_edges,
14070 },
14071 );
14072 snapshot.graph_count += 1;
14073 }
14074 Err(e) => {
14075 warn!("Failed to export Neo4j graph: {}", e);
14076 }
14077 }
14078 }
14079 datasynth_config::schema::GraphExportFormat::Dgl => {
14080 use datasynth_graph::{DGLExportConfig, DGLExporter};
14081
14082 let dgl_config = DGLExportConfig {
14083 common: datasynth_graph::CommonExportConfig {
14084 export_node_features: true,
14085 export_edge_features: true,
14086 export_node_labels: true,
14087 export_edge_labels: true,
14088 export_masks: true,
14089 train_ratio: self.config.graph_export.train_ratio,
14090 val_ratio: self.config.graph_export.validation_ratio,
14091 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14092 },
14093 heterogeneous: self.config.graph_export.dgl.heterogeneous,
14094 include_pickle_script: true, };
14096
14097 let exporter = DGLExporter::new(dgl_config);
14098 match exporter.export(&graph, &format_dir) {
14099 Ok(metadata) => {
14100 snapshot.exports.insert(
14101 format!("{}_{}", graph_type.name, "dgl"),
14102 GraphExportInfo {
14103 name: graph_type.name.clone(),
14104 format: "dgl".to_string(),
14105 output_path: format_dir.clone(),
14106 node_count: metadata.common.num_nodes,
14107 edge_count: metadata.common.num_edges,
14108 },
14109 );
14110 snapshot.graph_count += 1;
14111 }
14112 Err(e) => {
14113 warn!("Failed to export DGL graph: {}", e);
14114 }
14115 }
14116 }
14117 datasynth_config::schema::GraphExportFormat::RustGraph => {
14118 use datasynth_graph::{
14119 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14120 };
14121
14122 let rustgraph_config = RustGraphExportConfig {
14123 include_features: true,
14124 include_temporal: true,
14125 include_labels: true,
14126 source_name: "datasynth".to_string(),
14127 batch_id: None,
14128 output_format: RustGraphOutputFormat::JsonLines,
14129 export_node_properties: true,
14130 export_edge_properties: true,
14131 pretty_print: false,
14132 };
14133
14134 let exporter = RustGraphExporter::new(rustgraph_config);
14135 match exporter.export(&graph, &format_dir) {
14136 Ok(metadata) => {
14137 snapshot.exports.insert(
14138 format!("{}_{}", graph_type.name, "rustgraph"),
14139 GraphExportInfo {
14140 name: graph_type.name.clone(),
14141 format: "rustgraph".to_string(),
14142 output_path: format_dir.clone(),
14143 node_count: metadata.num_nodes,
14144 edge_count: metadata.num_edges,
14145 },
14146 );
14147 snapshot.graph_count += 1;
14148 }
14149 Err(e) => {
14150 warn!("Failed to export RustGraph: {}", e);
14151 }
14152 }
14153 }
14154 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14155 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14157 }
14158 }
14159 }
14160
14161 if let Some(pb) = &pb {
14162 pb.inc(40);
14163 }
14164 }
14165
14166 stats.graph_export_count = snapshot.graph_count;
14167 snapshot.exported = snapshot.graph_count > 0;
14168
14169 if let Some(pb) = pb {
14170 pb.finish_with_message(format!(
14171 "Graphs exported: {} graphs ({} nodes, {} edges)",
14172 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14173 ));
14174 }
14175
14176 Ok(snapshot)
14177 }
14178
14179 fn build_additional_graphs(
14184 &self,
14185 banking: &BankingSnapshot,
14186 intercompany: &IntercompanySnapshot,
14187 entries: &[JournalEntry],
14188 stats: &mut EnhancedGenerationStatistics,
14189 ) {
14190 let output_dir = self
14191 .output_path
14192 .clone()
14193 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14194 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14195
14196 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14198 info!("Phase 10c: Building banking network graph");
14199 let config = BankingGraphConfig::default();
14200 let mut builder = BankingGraphBuilder::new(config);
14201 builder.add_customers(&banking.customers);
14202 builder.add_accounts(&banking.accounts, &banking.customers);
14203 builder.add_transactions(&banking.transactions);
14204 let graph = builder.build();
14205
14206 let node_count = graph.node_count();
14207 let edge_count = graph.edge_count();
14208 stats.graph_node_count += node_count;
14209 stats.graph_edge_count += edge_count;
14210
14211 for format in &self.config.graph_export.formats {
14213 if matches!(
14214 format,
14215 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14216 ) {
14217 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14218 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14219 warn!("Failed to create banking graph output dir: {}", e);
14220 continue;
14221 }
14222 let pyg_config = PyGExportConfig::default();
14223 let exporter = PyGExporter::new(pyg_config);
14224 if let Err(e) = exporter.export(&graph, &format_dir) {
14225 warn!("Failed to export banking graph as PyG: {}", e);
14226 } else {
14227 info!(
14228 "Banking network graph exported: {} nodes, {} edges",
14229 node_count, edge_count
14230 );
14231 }
14232 }
14233 }
14234 }
14235
14236 let approval_entries: Vec<_> = entries
14238 .iter()
14239 .filter(|je| je.header.approval_workflow.is_some())
14240 .collect();
14241
14242 if !approval_entries.is_empty() {
14243 info!(
14244 "Phase 10c: Building approval network graph ({} entries with approvals)",
14245 approval_entries.len()
14246 );
14247 let config = ApprovalGraphConfig::default();
14248 let mut builder = ApprovalGraphBuilder::new(config);
14249
14250 for je in &approval_entries {
14251 if let Some(ref wf) = je.header.approval_workflow {
14252 for action in &wf.actions {
14253 let record = datasynth_core::models::ApprovalRecord {
14254 approval_id: format!(
14255 "APR-{}-{}",
14256 je.header.document_id, action.approval_level
14257 ),
14258 document_number: je.header.document_id.to_string(),
14259 document_type: "JE".to_string(),
14260 company_code: je.company_code().to_string(),
14261 requester_id: wf.preparer_id.clone(),
14262 requester_name: Some(wf.preparer_name.clone()),
14263 approver_id: action.actor_id.clone(),
14264 approver_name: action.actor_name.clone(),
14265 approval_date: je.posting_date(),
14266 action: format!("{:?}", action.action),
14267 amount: wf.amount,
14268 approval_limit: None,
14269 comments: action.comments.clone(),
14270 delegation_from: None,
14271 is_auto_approved: false,
14272 };
14273 builder.add_approval(&record);
14274 }
14275 }
14276 }
14277
14278 let graph = builder.build();
14279 let node_count = graph.node_count();
14280 let edge_count = graph.edge_count();
14281 stats.graph_node_count += node_count;
14282 stats.graph_edge_count += edge_count;
14283
14284 for format in &self.config.graph_export.formats {
14286 if matches!(
14287 format,
14288 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14289 ) {
14290 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14291 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14292 warn!("Failed to create approval graph output dir: {}", e);
14293 continue;
14294 }
14295 let pyg_config = PyGExportConfig::default();
14296 let exporter = PyGExporter::new(pyg_config);
14297 if let Err(e) = exporter.export(&graph, &format_dir) {
14298 warn!("Failed to export approval graph as PyG: {}", e);
14299 } else {
14300 info!(
14301 "Approval network graph exported: {} nodes, {} edges",
14302 node_count, edge_count
14303 );
14304 }
14305 }
14306 }
14307 }
14308
14309 if self.config.companies.len() >= 2 {
14311 info!(
14312 "Phase 10c: Building entity relationship graph ({} companies)",
14313 self.config.companies.len()
14314 );
14315
14316 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14317 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14318
14319 let parent_code = &self.config.companies[0].code;
14321 let mut companies: Vec<datasynth_core::models::Company> =
14322 Vec::with_capacity(self.config.companies.len());
14323
14324 let first = &self.config.companies[0];
14326 companies.push(datasynth_core::models::Company::parent(
14327 &first.code,
14328 &first.name,
14329 &first.country,
14330 &first.currency,
14331 ));
14332
14333 for cc in self.config.companies.iter().skip(1) {
14335 companies.push(datasynth_core::models::Company::subsidiary(
14336 &cc.code,
14337 &cc.name,
14338 &cc.country,
14339 &cc.currency,
14340 parent_code,
14341 rust_decimal::Decimal::from(100),
14342 ));
14343 }
14344
14345 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14347 self.config
14348 .companies
14349 .iter()
14350 .skip(1)
14351 .enumerate()
14352 .map(|(i, cc)| {
14353 let mut rel =
14354 datasynth_core::models::intercompany::IntercompanyRelationship::new(
14355 format!("REL{:03}", i + 1),
14356 parent_code.clone(),
14357 cc.code.clone(),
14358 rust_decimal::Decimal::from(100),
14359 start_date,
14360 );
14361 rel.functional_currency = cc.currency.clone();
14362 rel
14363 })
14364 .collect();
14365
14366 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14367 builder.add_companies(&companies);
14368 builder.add_ownership_relationships(&relationships);
14369
14370 for pair in &intercompany.matched_pairs {
14372 builder.add_intercompany_edge(
14373 &pair.seller_company,
14374 &pair.buyer_company,
14375 pair.amount,
14376 &format!("{:?}", pair.transaction_type),
14377 );
14378 }
14379
14380 let graph = builder.build();
14381 let node_count = graph.node_count();
14382 let edge_count = graph.edge_count();
14383 stats.graph_node_count += node_count;
14384 stats.graph_edge_count += edge_count;
14385
14386 for format in &self.config.graph_export.formats {
14388 if matches!(
14389 format,
14390 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14391 ) {
14392 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14393 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14394 warn!("Failed to create entity graph output dir: {}", e);
14395 continue;
14396 }
14397 let pyg_config = PyGExportConfig::default();
14398 let exporter = PyGExporter::new(pyg_config);
14399 if let Err(e) = exporter.export(&graph, &format_dir) {
14400 warn!("Failed to export entity graph as PyG: {}", e);
14401 } else {
14402 info!(
14403 "Entity relationship graph exported: {} nodes, {} edges",
14404 node_count, edge_count
14405 );
14406 }
14407 }
14408 }
14409 } else {
14410 debug!(
14411 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14412 self.config.companies.len()
14413 );
14414 }
14415 }
14416
14417 #[allow(clippy::too_many_arguments)]
14424 fn export_hypergraph(
14425 &self,
14426 coa: &Arc<ChartOfAccounts>,
14427 entries: &[JournalEntry],
14428 document_flows: &DocumentFlowSnapshot,
14429 sourcing: &SourcingSnapshot,
14430 hr: &HrSnapshot,
14431 manufacturing: &ManufacturingSnapshot,
14432 banking: &BankingSnapshot,
14433 audit: &AuditSnapshot,
14434 financial_reporting: &FinancialReportingSnapshot,
14435 ocpm: &OcpmSnapshot,
14436 compliance: &ComplianceRegulationsSnapshot,
14437 stats: &mut EnhancedGenerationStatistics,
14438 ) -> SynthResult<HypergraphExportInfo> {
14439 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14440 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14441 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14442 use datasynth_graph::models::hypergraph::AggregationStrategy;
14443
14444 let hg_settings = &self.config.graph_export.hypergraph;
14445
14446 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14448 "truncate" => AggregationStrategy::Truncate,
14449 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14450 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14451 "importance_sample" => AggregationStrategy::ImportanceSample,
14452 _ => AggregationStrategy::PoolByCounterparty,
14453 };
14454
14455 let builder_config = HypergraphConfig {
14456 max_nodes: hg_settings.max_nodes,
14457 aggregation_strategy,
14458 include_coso: hg_settings.governance_layer.include_coso,
14459 include_controls: hg_settings.governance_layer.include_controls,
14460 include_sox: hg_settings.governance_layer.include_sox,
14461 include_vendors: hg_settings.governance_layer.include_vendors,
14462 include_customers: hg_settings.governance_layer.include_customers,
14463 include_employees: hg_settings.governance_layer.include_employees,
14464 include_p2p: hg_settings.process_layer.include_p2p,
14465 include_o2c: hg_settings.process_layer.include_o2c,
14466 include_s2c: hg_settings.process_layer.include_s2c,
14467 include_h2r: hg_settings.process_layer.include_h2r,
14468 include_mfg: hg_settings.process_layer.include_mfg,
14469 include_bank: hg_settings.process_layer.include_bank,
14470 include_audit: hg_settings.process_layer.include_audit,
14471 include_r2r: hg_settings.process_layer.include_r2r,
14472 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14473 docs_per_counterparty_threshold: hg_settings
14474 .process_layer
14475 .docs_per_counterparty_threshold,
14476 include_accounts: hg_settings.accounting_layer.include_accounts,
14477 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14478 include_cross_layer_edges: hg_settings.cross_layer.enabled,
14479 include_compliance: self.config.compliance_regulations.enabled,
14480 include_tax: true,
14481 include_treasury: true,
14482 include_esg: true,
14483 include_project: true,
14484 include_intercompany: true,
14485 include_temporal_events: true,
14486 };
14487
14488 let mut builder = HypergraphBuilder::new(builder_config);
14489
14490 builder.add_coso_framework();
14492
14493 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14496 let controls = InternalControl::standard_controls();
14497 builder.add_controls(&controls);
14498 }
14499
14500 builder.add_vendors(&self.master_data.vendors);
14502 builder.add_customers(&self.master_data.customers);
14503 builder.add_employees(&self.master_data.employees);
14504
14505 builder.add_p2p_documents(
14507 &document_flows.purchase_orders,
14508 &document_flows.goods_receipts,
14509 &document_flows.vendor_invoices,
14510 &document_flows.payments,
14511 );
14512 builder.add_o2c_documents(
14513 &document_flows.sales_orders,
14514 &document_flows.deliveries,
14515 &document_flows.customer_invoices,
14516 );
14517 builder.add_s2c_documents(
14518 &sourcing.sourcing_projects,
14519 &sourcing.qualifications,
14520 &sourcing.rfx_events,
14521 &sourcing.bids,
14522 &sourcing.bid_evaluations,
14523 &sourcing.contracts,
14524 );
14525 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14526 builder.add_mfg_documents(
14527 &manufacturing.production_orders,
14528 &manufacturing.quality_inspections,
14529 &manufacturing.cycle_counts,
14530 );
14531 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14532 builder.add_audit_documents(
14533 &audit.engagements,
14534 &audit.workpapers,
14535 &audit.findings,
14536 &audit.evidence,
14537 &audit.risk_assessments,
14538 &audit.judgments,
14539 &audit.materiality_calculations,
14540 &audit.audit_opinions,
14541 &audit.going_concern_assessments,
14542 );
14543 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14544
14545 if let Some(ref event_log) = ocpm.event_log {
14547 builder.add_ocpm_events(event_log);
14548 }
14549
14550 if self.config.compliance_regulations.enabled
14552 && hg_settings.governance_layer.include_controls
14553 {
14554 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14556 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14557 .standard_records
14558 .iter()
14559 .filter_map(|r| {
14560 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14561 registry.get(&sid).cloned()
14562 })
14563 .collect();
14564
14565 builder.add_compliance_regulations(
14566 &standards,
14567 &compliance.findings,
14568 &compliance.filings,
14569 );
14570 }
14571
14572 builder.add_accounts(coa);
14574 builder.add_journal_entries_as_hyperedges(entries);
14575
14576 let hypergraph = builder.build();
14578
14579 let output_dir = self
14581 .output_path
14582 .clone()
14583 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14584 let hg_dir = output_dir
14585 .join(&self.config.graph_export.output_subdirectory)
14586 .join(&hg_settings.output_subdirectory);
14587
14588 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14590 "unified" => {
14591 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14592 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14593 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14594 })?;
14595 (
14596 metadata.num_nodes,
14597 metadata.num_edges,
14598 metadata.num_hyperedges,
14599 )
14600 }
14601 _ => {
14602 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14604 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14605 SynthError::generation(format!("Hypergraph export failed: {e}"))
14606 })?;
14607 (
14608 metadata.num_nodes,
14609 metadata.num_edges,
14610 metadata.num_hyperedges,
14611 )
14612 }
14613 };
14614
14615 #[cfg(feature = "streaming")]
14617 if let Some(ref target_url) = hg_settings.stream_target {
14618 use crate::stream_client::{StreamClient, StreamConfig};
14619 use std::io::Write as _;
14620
14621 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14622 let stream_config = StreamConfig {
14623 target_url: target_url.clone(),
14624 batch_size: hg_settings.stream_batch_size,
14625 api_key,
14626 ..StreamConfig::default()
14627 };
14628
14629 match StreamClient::new(stream_config) {
14630 Ok(mut client) => {
14631 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14632 match exporter.export_to_writer(&hypergraph, &mut client) {
14633 Ok(_) => {
14634 if let Err(e) = client.flush() {
14635 warn!("Failed to flush stream client: {}", e);
14636 } else {
14637 info!("Streamed {} records to {}", client.total_sent(), target_url);
14638 }
14639 }
14640 Err(e) => {
14641 warn!("Streaming export failed: {}", e);
14642 }
14643 }
14644 }
14645 Err(e) => {
14646 warn!("Failed to create stream client: {}", e);
14647 }
14648 }
14649 }
14650
14651 stats.graph_node_count += num_nodes;
14653 stats.graph_edge_count += num_edges;
14654 stats.graph_export_count += 1;
14655
14656 Ok(HypergraphExportInfo {
14657 node_count: num_nodes,
14658 edge_count: num_edges,
14659 hyperedge_count: num_hyperedges,
14660 output_path: hg_dir,
14661 })
14662 }
14663
14664 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14669 let pb = self.create_progress_bar(100, "Generating Banking Data");
14670
14671 let orchestrator = BankingOrchestratorBuilder::new()
14673 .config(self.config.banking.clone())
14674 .seed(self.seed + 9000)
14675 .country_pack(self.primary_pack().clone())
14676 .build();
14677
14678 if let Some(pb) = &pb {
14679 pb.inc(10);
14680 }
14681
14682 let result = orchestrator.generate();
14684
14685 if let Some(pb) = &pb {
14686 pb.inc(90);
14687 pb.finish_with_message(format!(
14688 "Banking: {} customers, {} transactions",
14689 result.customers.len(),
14690 result.transactions.len()
14691 ));
14692 }
14693
14694 let mut banking_customers = result.customers;
14699 let core_customers = &self.master_data.customers;
14700 if !core_customers.is_empty() {
14701 for (i, bc) in banking_customers.iter_mut().enumerate() {
14702 let core = &core_customers[i % core_customers.len()];
14703 bc.name = CustomerName::business(&core.name);
14704 bc.residence_country = core.country.clone();
14705 bc.enterprise_customer_id = Some(core.customer_id.clone());
14706 }
14707 debug!(
14708 "Cross-referenced {} banking customers with {} core customers",
14709 banking_customers.len(),
14710 core_customers.len()
14711 );
14712 }
14713
14714 Ok(BankingSnapshot {
14715 customers: banking_customers,
14716 accounts: result.accounts,
14717 transactions: result.transactions,
14718 transaction_labels: result.transaction_labels,
14719 customer_labels: result.customer_labels,
14720 account_labels: result.account_labels,
14721 relationship_labels: result.relationship_labels,
14722 narratives: result.narratives,
14723 suspicious_count: result.stats.suspicious_count,
14724 scenario_count: result.scenarios.len(),
14725 })
14726 }
14727
14728 fn calculate_total_transactions(&self) -> u64 {
14730 let months = self.config.global.period_months as f64;
14731 self.config
14732 .companies
14733 .iter()
14734 .map(|c| {
14735 let annual = c.annual_transaction_volume.count() as f64;
14736 let weighted = annual * c.volume_weight;
14737 (weighted * months / 12.0) as u64
14738 })
14739 .sum()
14740 }
14741
14742 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14744 if !self.phase_config.show_progress {
14745 return None;
14746 }
14747
14748 let pb = if let Some(mp) = &self.multi_progress {
14749 mp.add(ProgressBar::new(total))
14750 } else {
14751 ProgressBar::new(total)
14752 };
14753
14754 pb.set_style(
14755 ProgressStyle::default_bar()
14756 .template(&format!(
14757 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14758 ))
14759 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14760 .progress_chars("#>-"),
14761 );
14762
14763 Some(pb)
14764 }
14765
14766 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14768 self.coa.clone()
14769 }
14770
14771 pub fn get_master_data(&self) -> &MasterDataSnapshot {
14773 &self.master_data
14774 }
14775
14776 fn phase_compliance_regulations(
14778 &mut self,
14779 _stats: &mut EnhancedGenerationStatistics,
14780 ) -> SynthResult<ComplianceRegulationsSnapshot> {
14781 if !self.phase_config.generate_compliance_regulations {
14782 return Ok(ComplianceRegulationsSnapshot::default());
14783 }
14784
14785 info!("Phase: Generating Compliance Regulations Data");
14786
14787 let cr_config = &self.config.compliance_regulations;
14788
14789 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14791 self.config
14792 .companies
14793 .iter()
14794 .map(|c| c.country.clone())
14795 .collect::<std::collections::HashSet<_>>()
14796 .into_iter()
14797 .collect()
14798 } else {
14799 cr_config.jurisdictions.clone()
14800 };
14801
14802 let fallback_date =
14804 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14805 let reference_date = cr_config
14806 .reference_date
14807 .as_ref()
14808 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14809 .unwrap_or_else(|| {
14810 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14811 .unwrap_or(fallback_date)
14812 });
14813
14814 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14816 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14817 let cross_reference_records = reg_gen.generate_cross_reference_records();
14818 let jurisdiction_records =
14819 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14820
14821 info!(
14822 " Standards: {} records, {} cross-references, {} jurisdictions",
14823 standard_records.len(),
14824 cross_reference_records.len(),
14825 jurisdiction_records.len()
14826 );
14827
14828 let audit_procedures = if cr_config.audit_procedures.enabled {
14830 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14831 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14832 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14833 confidence_level: cr_config.audit_procedures.confidence_level,
14834 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14835 };
14836 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14837 self.seed + 9000,
14838 proc_config,
14839 );
14840 let registry = reg_gen.registry();
14841 let mut all_procs = Vec::new();
14842 for jurisdiction in &jurisdictions {
14843 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14844 all_procs.extend(procs);
14845 }
14846 info!(" Audit procedures: {}", all_procs.len());
14847 all_procs
14848 } else {
14849 Vec::new()
14850 };
14851
14852 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14854 let finding_config =
14855 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14856 finding_rate: cr_config.findings.finding_rate,
14857 material_weakness_rate: cr_config.findings.material_weakness_rate,
14858 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14859 generate_remediation: cr_config.findings.generate_remediation,
14860 };
14861 let mut finding_gen =
14862 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14863 self.seed + 9100,
14864 finding_config,
14865 );
14866 let mut all_findings = Vec::new();
14867 for company in &self.config.companies {
14868 let company_findings =
14869 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14870 all_findings.extend(company_findings);
14871 }
14872 info!(" Compliance findings: {}", all_findings.len());
14873 all_findings
14874 } else {
14875 Vec::new()
14876 };
14877
14878 let filings = if cr_config.filings.enabled {
14880 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14881 filing_types: cr_config.filings.filing_types.clone(),
14882 generate_status_progression: cr_config.filings.generate_status_progression,
14883 };
14884 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14885 self.seed + 9200,
14886 filing_config,
14887 );
14888 let company_codes: Vec<String> = self
14889 .config
14890 .companies
14891 .iter()
14892 .map(|c| c.code.clone())
14893 .collect();
14894 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14895 .unwrap_or(fallback_date);
14896 let filings = filing_gen.generate_filings(
14897 &company_codes,
14898 &jurisdictions,
14899 start_date,
14900 self.config.global.period_months,
14901 );
14902 info!(" Regulatory filings: {}", filings.len());
14903 filings
14904 } else {
14905 Vec::new()
14906 };
14907
14908 let compliance_graph = if cr_config.graph.enabled {
14910 let graph_config = datasynth_graph::ComplianceGraphConfig {
14911 include_standard_nodes: cr_config.graph.include_compliance_nodes,
14912 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14913 include_cross_references: cr_config.graph.include_cross_references,
14914 include_supersession_edges: cr_config.graph.include_supersession_edges,
14915 include_account_links: cr_config.graph.include_account_links,
14916 include_control_links: cr_config.graph.include_control_links,
14917 include_company_links: cr_config.graph.include_company_links,
14918 };
14919 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14920
14921 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14923 .iter()
14924 .map(|r| datasynth_graph::StandardNodeInput {
14925 standard_id: r.standard_id.clone(),
14926 title: r.title.clone(),
14927 category: r.category.clone(),
14928 domain: r.domain.clone(),
14929 is_active: r.is_active,
14930 features: vec![if r.is_active { 1.0 } else { 0.0 }],
14931 applicable_account_types: r.applicable_account_types.clone(),
14932 applicable_processes: r.applicable_processes.clone(),
14933 })
14934 .collect();
14935 builder.add_standards(&standard_inputs);
14936
14937 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14939 jurisdiction_records
14940 .iter()
14941 .map(|r| datasynth_graph::JurisdictionNodeInput {
14942 country_code: r.country_code.clone(),
14943 country_name: r.country_name.clone(),
14944 framework: r.accounting_framework.clone(),
14945 standard_count: r.standard_count,
14946 tax_rate: r.statutory_tax_rate,
14947 })
14948 .collect();
14949 builder.add_jurisdictions(&jurisdiction_inputs);
14950
14951 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14953 cross_reference_records
14954 .iter()
14955 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14956 from_standard: r.from_standard.clone(),
14957 to_standard: r.to_standard.clone(),
14958 relationship: r.relationship.clone(),
14959 convergence_level: r.convergence_level,
14960 })
14961 .collect();
14962 builder.add_cross_references(&xref_inputs);
14963
14964 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14966 .iter()
14967 .map(|r| datasynth_graph::JurisdictionMappingInput {
14968 country_code: r.jurisdiction.clone(),
14969 standard_id: r.standard_id.clone(),
14970 })
14971 .collect();
14972 builder.add_jurisdiction_mappings(&mapping_inputs);
14973
14974 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14976 .iter()
14977 .map(|p| datasynth_graph::ProcedureNodeInput {
14978 procedure_id: p.procedure_id.clone(),
14979 standard_id: p.standard_id.clone(),
14980 procedure_type: p.procedure_type.clone(),
14981 sample_size: p.sample_size,
14982 confidence_level: p.confidence_level,
14983 })
14984 .collect();
14985 builder.add_procedures(&proc_inputs);
14986
14987 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14989 .iter()
14990 .map(|f| datasynth_graph::FindingNodeInput {
14991 finding_id: f.finding_id.to_string(),
14992 standard_id: f
14993 .related_standards
14994 .first()
14995 .map(|s| s.as_str().to_string())
14996 .unwrap_or_default(),
14997 severity: f.severity.to_string(),
14998 deficiency_level: f.deficiency_level.to_string(),
14999 severity_score: f.deficiency_level.severity_score(),
15000 control_id: f.control_id.clone(),
15001 affected_accounts: f.affected_accounts.clone(),
15002 })
15003 .collect();
15004 builder.add_findings(&finding_inputs);
15005
15006 if cr_config.graph.include_account_links {
15008 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15009 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15010 for std_record in &standard_records {
15011 if let Some(std_obj) =
15012 registry.get(&datasynth_core::models::compliance::StandardId::parse(
15013 &std_record.standard_id,
15014 ))
15015 {
15016 for acct_type in &std_obj.applicable_account_types {
15017 account_links.push(datasynth_graph::AccountLinkInput {
15018 standard_id: std_record.standard_id.clone(),
15019 account_code: acct_type.clone(),
15020 account_name: acct_type.clone(),
15021 });
15022 }
15023 }
15024 }
15025 builder.add_account_links(&account_links);
15026 }
15027
15028 if cr_config.graph.include_control_links {
15030 let mut control_links = Vec::new();
15031 let sox_like_ids: Vec<String> = standard_records
15033 .iter()
15034 .filter(|r| {
15035 r.standard_id.starts_with("SOX")
15036 || r.standard_id.starts_with("PCAOB-AS-2201")
15037 })
15038 .map(|r| r.standard_id.clone())
15039 .collect();
15040 let control_ids = [
15042 ("C001", "Cash Controls"),
15043 ("C002", "Large Transaction Approval"),
15044 ("C010", "PO Approval"),
15045 ("C011", "Three-Way Match"),
15046 ("C020", "Revenue Recognition"),
15047 ("C021", "Credit Check"),
15048 ("C030", "Manual JE Approval"),
15049 ("C031", "Period Close Review"),
15050 ("C032", "Account Reconciliation"),
15051 ("C040", "Payroll Processing"),
15052 ("C050", "Fixed Asset Capitalization"),
15053 ("C060", "Intercompany Elimination"),
15054 ];
15055 for sox_id in &sox_like_ids {
15056 for (ctrl_id, ctrl_name) in &control_ids {
15057 control_links.push(datasynth_graph::ControlLinkInput {
15058 standard_id: sox_id.clone(),
15059 control_id: ctrl_id.to_string(),
15060 control_name: ctrl_name.to_string(),
15061 });
15062 }
15063 }
15064 builder.add_control_links(&control_links);
15065 }
15066
15067 if cr_config.graph.include_company_links {
15069 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15070 .iter()
15071 .enumerate()
15072 .map(|(i, f)| datasynth_graph::FilingNodeInput {
15073 filing_id: format!("F{:04}", i + 1),
15074 filing_type: f.filing_type.to_string(),
15075 company_code: f.company_code.clone(),
15076 jurisdiction: f.jurisdiction.clone(),
15077 status: format!("{:?}", f.status),
15078 })
15079 .collect();
15080 builder.add_filings(&filing_inputs);
15081 }
15082
15083 let graph = builder.build();
15084 info!(
15085 " Compliance graph: {} nodes, {} edges",
15086 graph.nodes.len(),
15087 graph.edges.len()
15088 );
15089 Some(graph)
15090 } else {
15091 None
15092 };
15093
15094 self.check_resources_with_log("post-compliance-regulations")?;
15095
15096 Ok(ComplianceRegulationsSnapshot {
15097 standard_records,
15098 cross_reference_records,
15099 jurisdiction_records,
15100 audit_procedures,
15101 findings,
15102 filings,
15103 compliance_graph,
15104 })
15105 }
15106
15107 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15109 use super::lineage::LineageGraphBuilder;
15110
15111 let mut builder = LineageGraphBuilder::new();
15112
15113 builder.add_config_section("config:global", "Global Config");
15115 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15116 builder.add_config_section("config:transactions", "Transaction Config");
15117
15118 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15120 builder.add_generator_phase("phase:je", "Journal Entry Generation");
15121
15122 builder.configured_by("phase:coa", "config:chart_of_accounts");
15124 builder.configured_by("phase:je", "config:transactions");
15125
15126 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15128 builder.produced_by("output:je", "phase:je");
15129
15130 if self.phase_config.generate_master_data {
15132 builder.add_config_section("config:master_data", "Master Data Config");
15133 builder.add_generator_phase("phase:master_data", "Master Data Generation");
15134 builder.configured_by("phase:master_data", "config:master_data");
15135 builder.input_to("phase:master_data", "phase:je");
15136 }
15137
15138 if self.phase_config.generate_document_flows {
15139 builder.add_config_section("config:document_flows", "Document Flow Config");
15140 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15141 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15142 builder.configured_by("phase:p2p", "config:document_flows");
15143 builder.configured_by("phase:o2c", "config:document_flows");
15144
15145 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15146 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15147 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15148 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15149 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15150
15151 builder.produced_by("output:po", "phase:p2p");
15152 builder.produced_by("output:gr", "phase:p2p");
15153 builder.produced_by("output:vi", "phase:p2p");
15154 builder.produced_by("output:so", "phase:o2c");
15155 builder.produced_by("output:ci", "phase:o2c");
15156 }
15157
15158 if self.phase_config.inject_anomalies {
15159 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15160 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15161 builder.configured_by("phase:anomaly", "config:fraud");
15162 builder.add_output_file(
15163 "output:labels",
15164 "Anomaly Labels",
15165 "labels/anomaly_labels.csv",
15166 );
15167 builder.produced_by("output:labels", "phase:anomaly");
15168 }
15169
15170 if self.phase_config.generate_audit {
15171 builder.add_config_section("config:audit", "Audit Config");
15172 builder.add_generator_phase("phase:audit", "Audit Data Generation");
15173 builder.configured_by("phase:audit", "config:audit");
15174 }
15175
15176 if self.phase_config.generate_banking {
15177 builder.add_config_section("config:banking", "Banking Config");
15178 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15179 builder.configured_by("phase:banking", "config:banking");
15180 }
15181
15182 if self.config.llm.enabled {
15183 builder.add_config_section("config:llm", "LLM Enrichment Config");
15184 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15185 builder.configured_by("phase:llm_enrichment", "config:llm");
15186 }
15187
15188 if self.config.diffusion.enabled {
15189 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15190 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15191 builder.configured_by("phase:diffusion", "config:diffusion");
15192 }
15193
15194 if self.config.causal.enabled {
15195 builder.add_config_section("config:causal", "Causal Generation Config");
15196 builder.add_generator_phase("phase:causal", "Causal Overlay");
15197 builder.configured_by("phase:causal", "config:causal");
15198 }
15199
15200 builder.build()
15201 }
15202
15203 fn compute_company_revenue(
15212 entries: &[JournalEntry],
15213 company_code: &str,
15214 ) -> rust_decimal::Decimal {
15215 use rust_decimal::Decimal;
15216 let mut revenue = Decimal::ZERO;
15217 for je in entries {
15218 if je.header.company_code != company_code {
15219 continue;
15220 }
15221 for line in &je.lines {
15222 if line.gl_account.starts_with('4') {
15223 revenue += line.credit_amount - line.debit_amount;
15225 }
15226 }
15227 }
15228 revenue.max(Decimal::ZERO)
15229 }
15230
15231 fn compute_entity_net_assets(
15235 entries: &[JournalEntry],
15236 entity_code: &str,
15237 ) -> rust_decimal::Decimal {
15238 use rust_decimal::Decimal;
15239 let mut asset_net = Decimal::ZERO;
15240 let mut liability_net = Decimal::ZERO;
15241 for je in entries {
15242 if je.header.company_code != entity_code {
15243 continue;
15244 }
15245 for line in &je.lines {
15246 if line.gl_account.starts_with('1') {
15247 asset_net += line.debit_amount - line.credit_amount;
15248 } else if line.gl_account.starts_with('2') {
15249 liability_net += line.credit_amount - line.debit_amount;
15250 }
15251 }
15252 }
15253 asset_net - liability_net
15254 }
15255
15256 fn phase_statistical_validation(
15267 &self,
15268 entries: &[JournalEntry],
15269 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15270 use datasynth_config::schema::StatisticalTestConfig;
15271 use datasynth_core::distributions::{
15272 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15273 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15274 };
15275 use rust_decimal::prelude::ToPrimitive;
15276
15277 let cfg = &self.config.distributions.validation;
15278 if !cfg.enabled {
15279 return Ok(None);
15280 }
15281
15282 let amounts: Vec<rust_decimal::Decimal> = entries
15285 .iter()
15286 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15287 .filter(|a| *a > rust_decimal::Decimal::ZERO)
15288 .collect();
15289
15290 let paired_amount_linecount: Vec<(f64, f64)> = entries
15294 .iter()
15295 .filter_map(|je| {
15296 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15297 if amt > rust_decimal::Decimal::ZERO {
15298 amt.to_f64().map(|a| (a, je.lines.len() as f64))
15299 } else {
15300 None
15301 }
15302 })
15303 .collect();
15304
15305 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15306 for test_cfg in &cfg.tests {
15307 match test_cfg {
15308 StatisticalTestConfig::BenfordFirstDigit {
15309 threshold_mad,
15310 warning_mad,
15311 } => {
15312 results.push(run_benford_first_digit(
15313 &amounts,
15314 *threshold_mad,
15315 *warning_mad,
15316 ));
15317 }
15318 StatisticalTestConfig::ChiSquared { bins, significance } => {
15319 results.push(run_chi_squared(&amounts, *bins, *significance));
15320 }
15321 StatisticalTestConfig::DistributionFit {
15322 target: _,
15323 ks_significance,
15324 method: _,
15325 } => {
15326 results.push(run_ks_uniform_log(&amounts, *ks_significance));
15329 }
15330 StatisticalTestConfig::AndersonDarling {
15331 target: _,
15332 significance,
15333 } => {
15334 results.push(run_anderson_darling(&amounts, *significance));
15337 }
15338 StatisticalTestConfig::CorrelationCheck {
15339 expected_correlations,
15340 } => {
15341 if expected_correlations.is_empty() {
15345 results.push(StatisticalTestResult {
15346 name: "correlation_check".to_string(),
15347 outcome: TestOutcome::Skipped,
15348 statistic: 0.0,
15349 threshold: 0.0,
15350 message: "no expected correlations declared".to_string(),
15351 });
15352 } else {
15353 for ec in expected_correlations {
15354 let pair_key = format!("{}_{}", ec.field1, ec.field2);
15355 let is_amount_linecount = (ec.field1 == "amount"
15356 && ec.field2 == "line_count")
15357 || (ec.field1 == "line_count" && ec.field2 == "amount");
15358 if is_amount_linecount {
15359 let xs: Vec<f64> =
15360 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15361 let ys: Vec<f64> =
15362 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15363 results.push(run_correlation_check(
15364 &pair_key,
15365 &xs,
15366 &ys,
15367 ec.expected_r,
15368 ec.tolerance,
15369 ));
15370 } else {
15371 results.push(StatisticalTestResult {
15372 name: format!("correlation_check_{pair_key}"),
15373 outcome: TestOutcome::Skipped,
15374 statistic: 0.0,
15375 threshold: ec.tolerance,
15376 message: format!(
15377 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15378 ec.field1, ec.field2
15379 ),
15380 });
15381 }
15382 }
15383 }
15384 }
15385 }
15386 }
15387
15388 let report = StatisticalValidationReport {
15389 sample_count: amounts.len(),
15390 results,
15391 };
15392
15393 if cfg.reporting.fail_on_error && !report.all_passed() {
15394 let failed = report.failed_names().join(", ");
15395 return Err(SynthError::validation(format!(
15396 "statistical validation failed: {failed}"
15397 )));
15398 }
15399
15400 Ok(Some(report))
15401 }
15402
15403 fn phase_analytics_metadata(
15416 &mut self,
15417 entries: &[JournalEntry],
15418 ) -> SynthResult<AnalyticsMetadataSnapshot> {
15419 use datasynth_generators::drift_event_generator::DriftEventGenerator;
15420 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15421 use datasynth_generators::management_report_generator::ManagementReportGenerator;
15422 use datasynth_generators::prior_year_generator::PriorYearGenerator;
15423 use std::collections::BTreeMap;
15424
15425 let mut snap = AnalyticsMetadataSnapshot::default();
15426
15427 if !self.phase_config.generate_analytics_metadata {
15428 return Ok(snap);
15429 }
15430
15431 let cfg = &self.config.analytics_metadata;
15432 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15433 .map(|d| d.year())
15434 .unwrap_or(2025);
15435
15436 if cfg.prior_year {
15438 let mut gen = PriorYearGenerator::new(self.seed + 9100);
15439 for company in &self.config.companies {
15440 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15443 BTreeMap::new();
15444 for je in entries {
15445 if je.header.company_code != company.code {
15446 continue;
15447 }
15448 for line in &je.lines {
15449 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15450 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15451 });
15452 entry.1 += line.debit_amount - line.credit_amount;
15453 }
15454 }
15455 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15456 .into_iter()
15457 .filter(|(_, (_, bal))| !bal.is_zero())
15458 .map(|(code, (name, bal))| (code, name, bal))
15459 .collect();
15460 if !current.is_empty() {
15461 let comparatives =
15462 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
15463 snap.prior_year_comparatives.extend(comparatives);
15464 }
15465 }
15466 info!(
15467 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15468 snap.prior_year_comparatives.len(),
15469 self.config.companies.len()
15470 );
15471 }
15472
15473 if cfg.industry_benchmark {
15475 use datasynth_core::models::IndustrySector;
15476 let industry = match self.config.global.industry {
15477 IndustrySector::Manufacturing => "manufacturing",
15478 IndustrySector::Retail => "retail",
15479 IndustrySector::FinancialServices => "financial_services",
15480 IndustrySector::Technology => "technology",
15481 IndustrySector::Healthcare => "healthcare",
15482 _ => "other",
15483 };
15484 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15485 let benchmarks = gen.generate(industry, fiscal_year);
15486 info!(
15487 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15488 benchmarks.len()
15489 );
15490 snap.industry_benchmarks = benchmarks;
15491 }
15492
15493 if cfg.management_reports {
15495 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15496 let period_months = self.config.global.period_months;
15497 for company in &self.config.companies {
15498 let reports =
15499 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15500 snap.management_reports.extend(reports);
15501 }
15502 info!(
15503 "v3.3.0 analytics: {} management reports across {} companies",
15504 snap.management_reports.len(),
15505 self.config.companies.len()
15506 );
15507 }
15508
15509 if cfg.drift_events {
15511 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15512 .expect("hardcoded NaiveDate 2025-01-01 is valid");
15513 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15514 .unwrap_or(fallback_start);
15515 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15516 let mut gen = DriftEventGenerator::new(self.seed + 9400);
15517 let drifts = gen.generate_standalone_drifts(start_date, end_date);
15518 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15519 snap.drift_events = drifts;
15520 }
15521 let _ = entries;
15523
15524 Ok(snap)
15525 }
15526}
15527
15528fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15530 match format {
15531 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15532 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15533 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15534 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15535 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15536 }
15537}
15538
15539fn compute_trial_balance_entries(
15544 entries: &[JournalEntry],
15545 entity_code: &str,
15546 fiscal_year: i32,
15547 coa: Option<&ChartOfAccounts>,
15548) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15549 use std::collections::BTreeMap;
15550
15551 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15552 BTreeMap::new();
15553
15554 for je in entries {
15555 for line in &je.lines {
15556 let entry = balances.entry(line.account_code.clone()).or_default();
15557 entry.0 += line.debit_amount;
15558 entry.1 += line.credit_amount;
15559 }
15560 }
15561
15562 balances
15563 .into_iter()
15564 .map(
15565 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15566 account_description: coa
15567 .and_then(|c| c.get_account(&account_code))
15568 .map(|a| a.description().to_string())
15569 .unwrap_or_else(|| account_code.clone()),
15570 account_code,
15571 debit_balance: debit,
15572 credit_balance: credit,
15573 net_balance: debit - credit,
15574 entity_code: entity_code.to_string(),
15575 period: format!("FY{}", fiscal_year),
15576 },
15577 )
15578 .collect()
15579}
15580
15581#[cfg(test)]
15582#[allow(clippy::unwrap_used)]
15583mod tests {
15584 use super::*;
15585 use datasynth_config::schema::*;
15586
15587 fn create_test_config() -> GeneratorConfig {
15588 GeneratorConfig {
15589 global: GlobalConfig {
15590 industry: IndustrySector::Manufacturing,
15591 start_date: "2024-01-01".to_string(),
15592 period_months: 1,
15593 seed: Some(42),
15594 parallel: false,
15595 group_currency: "USD".to_string(),
15596 presentation_currency: None,
15597 worker_threads: 0,
15598 memory_limit_mb: 0,
15599 fiscal_year_months: None,
15600 },
15601 companies: vec![CompanyConfig {
15602 code: "1000".to_string(),
15603 name: "Test Company".to_string(),
15604 currency: "USD".to_string(),
15605 functional_currency: None,
15606 country: "US".to_string(),
15607 annual_transaction_volume: TransactionVolume::TenK,
15608 volume_weight: 1.0,
15609 fiscal_year_variant: "K4".to_string(),
15610 }],
15611 chart_of_accounts: ChartOfAccountsConfig {
15612 complexity: CoAComplexity::Small,
15613 industry_specific: true,
15614 custom_accounts: None,
15615 min_hierarchy_depth: 2,
15616 max_hierarchy_depth: 4,
15617 expand_industry_subaccounts: false,
15618 },
15619 transactions: TransactionConfig::default(),
15620 output: OutputConfig::default(),
15621 fraud: FraudConfig::default(),
15622 internal_controls: InternalControlsConfig::default(),
15623 business_processes: BusinessProcessConfig::default(),
15624 user_personas: UserPersonaConfig::default(),
15625 templates: TemplateConfig::default(),
15626 approval: ApprovalConfig::default(),
15627 departments: DepartmentConfig::default(),
15628 master_data: MasterDataConfig::default(),
15629 document_flows: DocumentFlowConfig::default(),
15630 intercompany: IntercompanyConfig::default(),
15631 balance: BalanceConfig::default(),
15632 ocpm: OcpmConfig::default(),
15633 audit: AuditGenerationConfig::default(),
15634 banking: datasynth_banking::BankingConfig::default(),
15635 data_quality: DataQualitySchemaConfig::default(),
15636 scenario: ScenarioConfig::default(),
15637 temporal: TemporalDriftConfig::default(),
15638 graph_export: GraphExportConfig::default(),
15639 streaming: StreamingSchemaConfig::default(),
15640 rate_limit: RateLimitSchemaConfig::default(),
15641 temporal_attributes: TemporalAttributeSchemaConfig::default(),
15642 relationships: RelationshipSchemaConfig::default(),
15643 accounting_standards: AccountingStandardsConfig::default(),
15644 audit_standards: AuditStandardsConfig::default(),
15645 distributions: Default::default(),
15646 temporal_patterns: Default::default(),
15647 vendor_network: VendorNetworkSchemaConfig::default(),
15648 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15649 relationship_strength: RelationshipStrengthSchemaConfig::default(),
15650 cross_process_links: CrossProcessLinksSchemaConfig::default(),
15651 organizational_events: OrganizationalEventsSchemaConfig::default(),
15652 behavioral_drift: BehavioralDriftSchemaConfig::default(),
15653 market_drift: MarketDriftSchemaConfig::default(),
15654 drift_labeling: DriftLabelingSchemaConfig::default(),
15655 anomaly_injection: Default::default(),
15656 industry_specific: Default::default(),
15657 fingerprint_privacy: Default::default(),
15658 quality_gates: Default::default(),
15659 compliance: Default::default(),
15660 webhooks: Default::default(),
15661 llm: Default::default(),
15662 diffusion: Default::default(),
15663 causal: Default::default(),
15664 source_to_pay: Default::default(),
15665 financial_reporting: Default::default(),
15666 hr: Default::default(),
15667 manufacturing: Default::default(),
15668 sales_quotes: Default::default(),
15669 tax: Default::default(),
15670 treasury: Default::default(),
15671 project_accounting: Default::default(),
15672 esg: Default::default(),
15673 country_packs: None,
15674 scenarios: Default::default(),
15675 session: Default::default(),
15676 compliance_regulations: Default::default(),
15677 analytics_metadata: Default::default(),
15678 }
15679 }
15680
15681 #[test]
15682 fn test_enhanced_orchestrator_creation() {
15683 let config = create_test_config();
15684 let orchestrator = EnhancedOrchestrator::with_defaults(config);
15685 assert!(orchestrator.is_ok());
15686 }
15687
15688 #[test]
15689 fn test_minimal_generation() {
15690 let config = create_test_config();
15691 let phase_config = PhaseConfig {
15692 generate_master_data: false,
15693 generate_document_flows: false,
15694 generate_journal_entries: true,
15695 inject_anomalies: false,
15696 show_progress: false,
15697 ..Default::default()
15698 };
15699
15700 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15701 let result = orchestrator.generate();
15702
15703 assert!(result.is_ok());
15704 let result = result.unwrap();
15705 assert!(!result.journal_entries.is_empty());
15706 }
15707
15708 #[test]
15709 fn test_master_data_generation() {
15710 let config = create_test_config();
15711 let phase_config = PhaseConfig {
15712 generate_master_data: true,
15713 generate_document_flows: false,
15714 generate_journal_entries: false,
15715 inject_anomalies: false,
15716 show_progress: false,
15717 vendors_per_company: 5,
15718 customers_per_company: 5,
15719 materials_per_company: 10,
15720 assets_per_company: 5,
15721 employees_per_company: 10,
15722 ..Default::default()
15723 };
15724
15725 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15726 let result = orchestrator.generate().unwrap();
15727
15728 assert!(!result.master_data.vendors.is_empty());
15729 assert!(!result.master_data.customers.is_empty());
15730 assert!(!result.master_data.materials.is_empty());
15731 }
15732
15733 #[test]
15734 fn test_document_flow_generation() {
15735 let config = create_test_config();
15736 let phase_config = PhaseConfig {
15737 generate_master_data: true,
15738 generate_document_flows: true,
15739 generate_journal_entries: false,
15740 inject_anomalies: false,
15741 inject_data_quality: false,
15742 validate_balances: false,
15743 validate_coa_coverage_strict: false,
15744 generate_ocpm_events: false,
15745 show_progress: false,
15746 vendors_per_company: 5,
15747 customers_per_company: 5,
15748 materials_per_company: 10,
15749 assets_per_company: 5,
15750 employees_per_company: 10,
15751 p2p_chains: 5,
15752 o2c_chains: 5,
15753 ..Default::default()
15754 };
15755
15756 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15757 let result = orchestrator.generate().unwrap();
15758
15759 assert!(!result.document_flows.p2p_chains.is_empty());
15761 assert!(!result.document_flows.o2c_chains.is_empty());
15762
15763 assert!(!result.document_flows.purchase_orders.is_empty());
15765 assert!(!result.document_flows.sales_orders.is_empty());
15766 }
15767
15768 #[test]
15769 fn test_anomaly_injection() {
15770 let config = create_test_config();
15771 let phase_config = PhaseConfig {
15772 generate_master_data: false,
15773 generate_document_flows: false,
15774 generate_journal_entries: true,
15775 inject_anomalies: true,
15776 show_progress: false,
15777 ..Default::default()
15778 };
15779
15780 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15781 let result = orchestrator.generate().unwrap();
15782
15783 assert!(!result.journal_entries.is_empty());
15785
15786 assert!(result.anomaly_labels.summary.is_some());
15789 }
15790
15791 #[test]
15792 fn test_full_generation_pipeline() {
15793 let config = create_test_config();
15794 let phase_config = PhaseConfig {
15795 generate_master_data: true,
15796 generate_document_flows: true,
15797 generate_journal_entries: true,
15798 inject_anomalies: false,
15799 inject_data_quality: false,
15800 validate_balances: true,
15801 validate_coa_coverage_strict: false,
15802 generate_ocpm_events: false,
15803 show_progress: false,
15804 vendors_per_company: 3,
15805 customers_per_company: 3,
15806 materials_per_company: 5,
15807 assets_per_company: 3,
15808 employees_per_company: 5,
15809 p2p_chains: 3,
15810 o2c_chains: 3,
15811 ..Default::default()
15812 };
15813
15814 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15815 let result = orchestrator.generate().unwrap();
15816
15817 assert!(!result.master_data.vendors.is_empty());
15819 assert!(!result.master_data.customers.is_empty());
15820 assert!(!result.document_flows.p2p_chains.is_empty());
15821 assert!(!result.document_flows.o2c_chains.is_empty());
15822 assert!(!result.journal_entries.is_empty());
15823 assert!(result.statistics.accounts_count > 0);
15824
15825 assert!(!result.subledger.ap_invoices.is_empty());
15827 assert!(!result.subledger.ar_invoices.is_empty());
15828
15829 assert!(result.balance_validation.validated);
15831 assert!(result.balance_validation.entries_processed > 0);
15832 }
15833
15834 #[test]
15835 fn test_subledger_linking() {
15836 let config = create_test_config();
15837 let phase_config = PhaseConfig {
15838 generate_master_data: true,
15839 generate_document_flows: true,
15840 generate_journal_entries: false,
15841 inject_anomalies: false,
15842 inject_data_quality: false,
15843 validate_balances: false,
15844 validate_coa_coverage_strict: false,
15845 generate_ocpm_events: false,
15846 show_progress: false,
15847 vendors_per_company: 5,
15848 customers_per_company: 5,
15849 materials_per_company: 10,
15850 assets_per_company: 3,
15851 employees_per_company: 5,
15852 p2p_chains: 5,
15853 o2c_chains: 5,
15854 ..Default::default()
15855 };
15856
15857 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15858 let result = orchestrator.generate().unwrap();
15859
15860 assert!(!result.document_flows.vendor_invoices.is_empty());
15862 assert!(!result.document_flows.customer_invoices.is_empty());
15863
15864 assert!(!result.subledger.ap_invoices.is_empty());
15866 assert!(!result.subledger.ar_invoices.is_empty());
15867
15868 assert_eq!(
15870 result.subledger.ap_invoices.len(),
15871 result.document_flows.vendor_invoices.len()
15872 );
15873
15874 assert_eq!(
15876 result.subledger.ar_invoices.len(),
15877 result.document_flows.customer_invoices.len()
15878 );
15879
15880 assert_eq!(
15882 result.statistics.ap_invoice_count,
15883 result.subledger.ap_invoices.len()
15884 );
15885 assert_eq!(
15886 result.statistics.ar_invoice_count,
15887 result.subledger.ar_invoices.len()
15888 );
15889 }
15890
15891 #[test]
15892 fn test_balance_validation() {
15893 let config = create_test_config();
15894 let phase_config = PhaseConfig {
15895 generate_master_data: false,
15896 generate_document_flows: false,
15897 generate_journal_entries: true,
15898 inject_anomalies: false,
15899 validate_balances: true,
15900 validate_coa_coverage_strict: false,
15901 show_progress: false,
15902 ..Default::default()
15903 };
15904
15905 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15906 let result = orchestrator.generate().unwrap();
15907
15908 assert!(result.balance_validation.validated);
15910 assert!(result.balance_validation.entries_processed > 0);
15911
15912 assert!(!result.balance_validation.has_unbalanced_entries);
15914
15915 assert_eq!(
15917 result.balance_validation.total_debits,
15918 result.balance_validation.total_credits
15919 );
15920 }
15921
15922 #[test]
15923 fn test_statistics_accuracy() {
15924 let config = create_test_config();
15925 let phase_config = PhaseConfig {
15926 generate_master_data: true,
15927 generate_document_flows: false,
15928 generate_journal_entries: true,
15929 inject_anomalies: false,
15930 show_progress: false,
15931 vendors_per_company: 10,
15932 customers_per_company: 20,
15933 materials_per_company: 15,
15934 assets_per_company: 5,
15935 employees_per_company: 8,
15936 ..Default::default()
15937 };
15938
15939 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15940 let result = orchestrator.generate().unwrap();
15941
15942 assert_eq!(
15944 result.statistics.vendor_count,
15945 result.master_data.vendors.len()
15946 );
15947 assert_eq!(
15948 result.statistics.customer_count,
15949 result.master_data.customers.len()
15950 );
15951 assert_eq!(
15952 result.statistics.material_count,
15953 result.master_data.materials.len()
15954 );
15955 assert_eq!(
15956 result.statistics.total_entries as usize,
15957 result.journal_entries.len()
15958 );
15959 }
15960
15961 #[test]
15962 fn test_phase_config_defaults() {
15963 let config = PhaseConfig::default();
15964 assert!(config.generate_master_data);
15965 assert!(config.generate_document_flows);
15966 assert!(config.generate_journal_entries);
15967 assert!(!config.inject_anomalies);
15968 assert!(config.validate_balances);
15969 assert!(config.show_progress);
15970 assert!(config.vendors_per_company > 0);
15971 assert!(config.customers_per_company > 0);
15972 }
15973
15974 #[test]
15975 fn test_get_coa_before_generation() {
15976 let config = create_test_config();
15977 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15978
15979 assert!(orchestrator.get_coa().is_none());
15981 }
15982
15983 #[test]
15984 fn test_get_coa_after_generation() {
15985 let config = create_test_config();
15986 let phase_config = PhaseConfig {
15987 generate_master_data: false,
15988 generate_document_flows: false,
15989 generate_journal_entries: true,
15990 inject_anomalies: false,
15991 show_progress: false,
15992 ..Default::default()
15993 };
15994
15995 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15996 let _ = orchestrator.generate().unwrap();
15997
15998 assert!(orchestrator.get_coa().is_some());
16000 }
16001
16002 #[test]
16003 fn test_get_master_data() {
16004 let config = create_test_config();
16005 let phase_config = PhaseConfig {
16006 generate_master_data: true,
16007 generate_document_flows: false,
16008 generate_journal_entries: false,
16009 inject_anomalies: false,
16010 show_progress: false,
16011 vendors_per_company: 5,
16012 customers_per_company: 5,
16013 materials_per_company: 5,
16014 assets_per_company: 5,
16015 employees_per_company: 5,
16016 ..Default::default()
16017 };
16018
16019 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16020 let result = orchestrator.generate().unwrap();
16021
16022 assert!(!result.master_data.vendors.is_empty());
16024 }
16025
16026 #[test]
16027 fn test_with_progress_builder() {
16028 let config = create_test_config();
16029 let orchestrator = EnhancedOrchestrator::with_defaults(config)
16030 .unwrap()
16031 .with_progress(false);
16032
16033 assert!(!orchestrator.phase_config.show_progress);
16035 }
16036
16037 #[test]
16038 fn test_multi_company_generation() {
16039 let mut config = create_test_config();
16040 config.companies.push(CompanyConfig {
16041 code: "2000".to_string(),
16042 name: "Subsidiary".to_string(),
16043 currency: "EUR".to_string(),
16044 functional_currency: None,
16045 country: "DE".to_string(),
16046 annual_transaction_volume: TransactionVolume::TenK,
16047 volume_weight: 0.5,
16048 fiscal_year_variant: "K4".to_string(),
16049 });
16050
16051 let phase_config = PhaseConfig {
16052 generate_master_data: true,
16053 generate_document_flows: false,
16054 generate_journal_entries: true,
16055 inject_anomalies: false,
16056 show_progress: false,
16057 vendors_per_company: 5,
16058 customers_per_company: 5,
16059 materials_per_company: 5,
16060 assets_per_company: 5,
16061 employees_per_company: 5,
16062 ..Default::default()
16063 };
16064
16065 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16066 let result = orchestrator.generate().unwrap();
16067
16068 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
16071 assert!(result.statistics.companies_count == 2);
16072 }
16073
16074 #[test]
16075 fn test_empty_master_data_skips_document_flows() {
16076 let config = create_test_config();
16077 let phase_config = PhaseConfig {
16078 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
16081 inject_anomalies: false,
16082 show_progress: false,
16083 ..Default::default()
16084 };
16085
16086 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16087 let result = orchestrator.generate().unwrap();
16088
16089 assert!(result.document_flows.p2p_chains.is_empty());
16091 assert!(result.document_flows.o2c_chains.is_empty());
16092 }
16093
16094 #[test]
16095 fn test_journal_entry_line_item_count() {
16096 let config = create_test_config();
16097 let phase_config = PhaseConfig {
16098 generate_master_data: false,
16099 generate_document_flows: false,
16100 generate_journal_entries: true,
16101 inject_anomalies: false,
16102 show_progress: false,
16103 ..Default::default()
16104 };
16105
16106 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16107 let result = orchestrator.generate().unwrap();
16108
16109 let calculated_line_items: u64 = result
16111 .journal_entries
16112 .iter()
16113 .map(|e| e.line_count() as u64)
16114 .sum();
16115 assert_eq!(result.statistics.total_line_items, calculated_line_items);
16116 }
16117
16118 #[test]
16119 fn test_audit_generation() {
16120 let config = create_test_config();
16121 let phase_config = PhaseConfig {
16122 generate_master_data: false,
16123 generate_document_flows: false,
16124 generate_journal_entries: true,
16125 inject_anomalies: false,
16126 show_progress: false,
16127 generate_audit: true,
16128 audit_engagements: 2,
16129 workpapers_per_engagement: 5,
16130 evidence_per_workpaper: 2,
16131 risks_per_engagement: 3,
16132 findings_per_engagement: 2,
16133 judgments_per_engagement: 2,
16134 ..Default::default()
16135 };
16136
16137 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16138 let result = orchestrator.generate().unwrap();
16139
16140 assert_eq!(result.audit.engagements.len(), 2);
16142 assert!(!result.audit.workpapers.is_empty());
16143 assert!(!result.audit.evidence.is_empty());
16144 assert!(!result.audit.risk_assessments.is_empty());
16145 assert!(!result.audit.findings.is_empty());
16146 assert!(!result.audit.judgments.is_empty());
16147
16148 assert!(
16150 !result.audit.confirmations.is_empty(),
16151 "ISA 505 confirmations should be generated"
16152 );
16153 assert!(
16154 !result.audit.confirmation_responses.is_empty(),
16155 "ISA 505 confirmation responses should be generated"
16156 );
16157 assert!(
16158 !result.audit.procedure_steps.is_empty(),
16159 "ISA 330 procedure steps should be generated"
16160 );
16161 assert!(
16163 !result.audit.analytical_results.is_empty(),
16164 "ISA 520 analytical procedures should be generated"
16165 );
16166 assert!(
16167 !result.audit.ia_functions.is_empty(),
16168 "ISA 610 IA functions should be generated (one per engagement)"
16169 );
16170 assert!(
16171 !result.audit.related_parties.is_empty(),
16172 "ISA 550 related parties should be generated"
16173 );
16174
16175 assert_eq!(
16177 result.statistics.audit_engagement_count,
16178 result.audit.engagements.len()
16179 );
16180 assert_eq!(
16181 result.statistics.audit_workpaper_count,
16182 result.audit.workpapers.len()
16183 );
16184 assert_eq!(
16185 result.statistics.audit_evidence_count,
16186 result.audit.evidence.len()
16187 );
16188 assert_eq!(
16189 result.statistics.audit_risk_count,
16190 result.audit.risk_assessments.len()
16191 );
16192 assert_eq!(
16193 result.statistics.audit_finding_count,
16194 result.audit.findings.len()
16195 );
16196 assert_eq!(
16197 result.statistics.audit_judgment_count,
16198 result.audit.judgments.len()
16199 );
16200 assert_eq!(
16201 result.statistics.audit_confirmation_count,
16202 result.audit.confirmations.len()
16203 );
16204 assert_eq!(
16205 result.statistics.audit_confirmation_response_count,
16206 result.audit.confirmation_responses.len()
16207 );
16208 assert_eq!(
16209 result.statistics.audit_procedure_step_count,
16210 result.audit.procedure_steps.len()
16211 );
16212 assert_eq!(
16213 result.statistics.audit_sample_count,
16214 result.audit.samples.len()
16215 );
16216 assert_eq!(
16217 result.statistics.audit_analytical_result_count,
16218 result.audit.analytical_results.len()
16219 );
16220 assert_eq!(
16221 result.statistics.audit_ia_function_count,
16222 result.audit.ia_functions.len()
16223 );
16224 assert_eq!(
16225 result.statistics.audit_ia_report_count,
16226 result.audit.ia_reports.len()
16227 );
16228 assert_eq!(
16229 result.statistics.audit_related_party_count,
16230 result.audit.related_parties.len()
16231 );
16232 assert_eq!(
16233 result.statistics.audit_related_party_transaction_count,
16234 result.audit.related_party_transactions.len()
16235 );
16236 }
16237
16238 #[test]
16239 fn test_new_phases_disabled_by_default() {
16240 let config = create_test_config();
16241 assert!(!config.llm.enabled);
16243 assert!(!config.diffusion.enabled);
16244 assert!(!config.causal.enabled);
16245
16246 let phase_config = PhaseConfig {
16247 generate_master_data: false,
16248 generate_document_flows: false,
16249 generate_journal_entries: true,
16250 inject_anomalies: false,
16251 show_progress: false,
16252 ..Default::default()
16253 };
16254
16255 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16256 let result = orchestrator.generate().unwrap();
16257
16258 assert_eq!(result.statistics.llm_enrichment_ms, 0);
16260 assert_eq!(result.statistics.llm_vendors_enriched, 0);
16261 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16262 assert_eq!(result.statistics.diffusion_samples_generated, 0);
16263 assert_eq!(result.statistics.causal_generation_ms, 0);
16264 assert_eq!(result.statistics.causal_samples_generated, 0);
16265 assert!(result.statistics.causal_validation_passed.is_none());
16266 assert_eq!(result.statistics.counterfactual_pair_count, 0);
16267 assert!(result.counterfactual_pairs.is_empty());
16268 }
16269
16270 #[test]
16271 fn test_counterfactual_generation_enabled() {
16272 let config = create_test_config();
16273 let phase_config = PhaseConfig {
16274 generate_master_data: false,
16275 generate_document_flows: false,
16276 generate_journal_entries: true,
16277 inject_anomalies: false,
16278 show_progress: false,
16279 generate_counterfactuals: true,
16280 generate_period_close: false, ..Default::default()
16282 };
16283
16284 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16285 let result = orchestrator.generate().unwrap();
16286
16287 if !result.journal_entries.is_empty() {
16289 assert_eq!(
16290 result.counterfactual_pairs.len(),
16291 result.journal_entries.len()
16292 );
16293 assert_eq!(
16294 result.statistics.counterfactual_pair_count,
16295 result.journal_entries.len()
16296 );
16297 let ids: std::collections::HashSet<_> = result
16299 .counterfactual_pairs
16300 .iter()
16301 .map(|p| p.pair_id.clone())
16302 .collect();
16303 assert_eq!(ids.len(), result.counterfactual_pairs.len());
16304 }
16305 }
16306
16307 #[test]
16308 fn test_llm_enrichment_enabled() {
16309 let mut config = create_test_config();
16310 config.llm.enabled = true;
16311 config.llm.max_vendor_enrichments = 3;
16312
16313 let phase_config = PhaseConfig {
16314 generate_master_data: true,
16315 generate_document_flows: false,
16316 generate_journal_entries: false,
16317 inject_anomalies: false,
16318 show_progress: false,
16319 vendors_per_company: 5,
16320 customers_per_company: 3,
16321 materials_per_company: 3,
16322 assets_per_company: 3,
16323 employees_per_company: 3,
16324 ..Default::default()
16325 };
16326
16327 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16328 let result = orchestrator.generate().unwrap();
16329
16330 assert!(result.statistics.llm_vendors_enriched > 0);
16332 assert!(result.statistics.llm_vendors_enriched <= 3);
16333 }
16334
16335 #[test]
16336 fn test_diffusion_enhancement_enabled() {
16337 let mut config = create_test_config();
16338 config.diffusion.enabled = true;
16339 config.diffusion.n_steps = 50;
16340 config.diffusion.sample_size = 20;
16341
16342 let phase_config = PhaseConfig {
16343 generate_master_data: false,
16344 generate_document_flows: false,
16345 generate_journal_entries: true,
16346 inject_anomalies: false,
16347 show_progress: false,
16348 ..Default::default()
16349 };
16350
16351 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16352 let result = orchestrator.generate().unwrap();
16353
16354 assert_eq!(result.statistics.diffusion_samples_generated, 20);
16356 }
16357
16358 #[test]
16359 fn test_causal_overlay_enabled() {
16360 let mut config = create_test_config();
16361 config.causal.enabled = true;
16362 config.causal.template = "fraud_detection".to_string();
16363 config.causal.sample_size = 100;
16364 config.causal.validate = true;
16365
16366 let phase_config = PhaseConfig {
16367 generate_master_data: false,
16368 generate_document_flows: false,
16369 generate_journal_entries: true,
16370 inject_anomalies: false,
16371 show_progress: false,
16372 ..Default::default()
16373 };
16374
16375 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16376 let result = orchestrator.generate().unwrap();
16377
16378 assert_eq!(result.statistics.causal_samples_generated, 100);
16380 assert!(result.statistics.causal_validation_passed.is_some());
16382 }
16383
16384 #[test]
16385 fn test_causal_overlay_revenue_cycle_template() {
16386 let mut config = create_test_config();
16387 config.causal.enabled = true;
16388 config.causal.template = "revenue_cycle".to_string();
16389 config.causal.sample_size = 50;
16390 config.causal.validate = false;
16391
16392 let phase_config = PhaseConfig {
16393 generate_master_data: false,
16394 generate_document_flows: false,
16395 generate_journal_entries: true,
16396 inject_anomalies: false,
16397 show_progress: false,
16398 ..Default::default()
16399 };
16400
16401 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16402 let result = orchestrator.generate().unwrap();
16403
16404 assert_eq!(result.statistics.causal_samples_generated, 50);
16406 assert!(result.statistics.causal_validation_passed.is_none());
16408 }
16409
16410 #[test]
16411 fn test_all_new_phases_enabled_together() {
16412 let mut config = create_test_config();
16413 config.llm.enabled = true;
16414 config.llm.max_vendor_enrichments = 2;
16415 config.diffusion.enabled = true;
16416 config.diffusion.n_steps = 20;
16417 config.diffusion.sample_size = 10;
16418 config.causal.enabled = true;
16419 config.causal.sample_size = 50;
16420 config.causal.validate = true;
16421
16422 let phase_config = PhaseConfig {
16423 generate_master_data: true,
16424 generate_document_flows: false,
16425 generate_journal_entries: true,
16426 inject_anomalies: false,
16427 show_progress: false,
16428 vendors_per_company: 5,
16429 customers_per_company: 3,
16430 materials_per_company: 3,
16431 assets_per_company: 3,
16432 employees_per_company: 3,
16433 ..Default::default()
16434 };
16435
16436 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16437 let result = orchestrator.generate().unwrap();
16438
16439 assert!(result.statistics.llm_vendors_enriched > 0);
16441 assert_eq!(result.statistics.diffusion_samples_generated, 10);
16442 assert_eq!(result.statistics.causal_samples_generated, 50);
16443 assert!(result.statistics.causal_validation_passed.is_some());
16444 }
16445
16446 #[test]
16447 fn test_statistics_serialization_with_new_fields() {
16448 let stats = EnhancedGenerationStatistics {
16449 total_entries: 100,
16450 total_line_items: 500,
16451 llm_enrichment_ms: 42,
16452 llm_vendors_enriched: 10,
16453 diffusion_enhancement_ms: 100,
16454 diffusion_samples_generated: 50,
16455 causal_generation_ms: 200,
16456 causal_samples_generated: 100,
16457 causal_validation_passed: Some(true),
16458 ..Default::default()
16459 };
16460
16461 let json = serde_json::to_string(&stats).unwrap();
16462 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16463
16464 assert_eq!(deserialized.llm_enrichment_ms, 42);
16465 assert_eq!(deserialized.llm_vendors_enriched, 10);
16466 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16467 assert_eq!(deserialized.diffusion_samples_generated, 50);
16468 assert_eq!(deserialized.causal_generation_ms, 200);
16469 assert_eq!(deserialized.causal_samples_generated, 100);
16470 assert_eq!(deserialized.causal_validation_passed, Some(true));
16471 }
16472
16473 #[test]
16474 fn test_statistics_backward_compat_deserialization() {
16475 let old_json = r#"{
16477 "total_entries": 100,
16478 "total_line_items": 500,
16479 "accounts_count": 50,
16480 "companies_count": 1,
16481 "period_months": 12,
16482 "vendor_count": 10,
16483 "customer_count": 20,
16484 "material_count": 15,
16485 "asset_count": 5,
16486 "employee_count": 8,
16487 "p2p_chain_count": 5,
16488 "o2c_chain_count": 5,
16489 "ap_invoice_count": 5,
16490 "ar_invoice_count": 5,
16491 "ocpm_event_count": 0,
16492 "ocpm_object_count": 0,
16493 "ocpm_case_count": 0,
16494 "audit_engagement_count": 0,
16495 "audit_workpaper_count": 0,
16496 "audit_evidence_count": 0,
16497 "audit_risk_count": 0,
16498 "audit_finding_count": 0,
16499 "audit_judgment_count": 0,
16500 "anomalies_injected": 0,
16501 "data_quality_issues": 0,
16502 "banking_customer_count": 0,
16503 "banking_account_count": 0,
16504 "banking_transaction_count": 0,
16505 "banking_suspicious_count": 0,
16506 "graph_export_count": 0,
16507 "graph_node_count": 0,
16508 "graph_edge_count": 0
16509 }"#;
16510
16511 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16512
16513 assert_eq!(stats.llm_enrichment_ms, 0);
16515 assert_eq!(stats.llm_vendors_enriched, 0);
16516 assert_eq!(stats.diffusion_enhancement_ms, 0);
16517 assert_eq!(stats.diffusion_samples_generated, 0);
16518 assert_eq!(stats.causal_generation_ms, 0);
16519 assert_eq!(stats.causal_samples_generated, 0);
16520 assert!(stats.causal_validation_passed.is_none());
16521 }
16522}