1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338 fn default() -> Self {
339 Self {
340 generate_master_data: true,
341 generate_document_flows: true,
342 generate_ocpm_events: false, generate_journal_entries: true,
344 inject_anomalies: false,
345 inject_data_quality: false, validate_balances: true,
347 show_progress: true,
348 vendors_per_company: 50,
349 customers_per_company: 100,
350 materials_per_company: 200,
351 assets_per_company: 50,
352 employees_per_company: 100,
353 p2p_chains: 100,
354 o2c_chains: 100,
355 generate_audit: false, audit_engagements: 5,
357 workpapers_per_engagement: 20,
358 evidence_per_workpaper: 5,
359 risks_per_engagement: 15,
360 findings_per_engagement: 8,
361 judgments_per_engagement: 10,
362 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
381 }
382}
383
384impl PhaseConfig {
385 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390 Self {
391 generate_master_data: true,
393 generate_document_flows: true,
394 generate_journal_entries: true,
395 validate_balances: true,
396 generate_period_close: true,
397 generate_evolution_events: true,
398 show_progress: true,
399
400 generate_audit: cfg.audit.enabled,
402 generate_banking: cfg.banking.enabled,
403 generate_graph_export: cfg.graph_export.enabled,
404 generate_sourcing: cfg.source_to_pay.enabled,
405 generate_intercompany: cfg.intercompany.enabled,
406 generate_financial_statements: cfg.financial_reporting.enabled,
407 generate_bank_reconciliation: cfg.financial_reporting.enabled,
408 generate_accounting_standards: cfg.accounting_standards.enabled,
409 generate_manufacturing: cfg.manufacturing.enabled,
410 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411 generate_tax: cfg.tax.enabled,
412 generate_esg: cfg.esg.enabled,
413 generate_ocpm_events: cfg.ocpm.enabled,
414 generate_compliance_regulations: cfg.compliance_regulations.enabled,
415 generate_hr: cfg.hr.enabled,
416 generate_treasury: cfg.treasury.enabled,
417 generate_project_accounting: cfg.project_accounting.enabled,
418
419 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423 inject_data_quality: cfg.data_quality.enabled,
424
425 vendors_per_company: 50,
427 customers_per_company: 100,
428 materials_per_company: 200,
429 assets_per_company: 50,
430 employees_per_company: 100,
431 p2p_chains: 100,
432 o2c_chains: 100,
433 audit_engagements: 5,
434 workpapers_per_engagement: 20,
435 evidence_per_workpaper: 5,
436 risks_per_engagement: 15,
437 findings_per_engagement: 8,
438 judgments_per_engagement: 10,
439 }
440 }
441}
442
443#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446 pub vendors: Vec<Vendor>,
448 pub customers: Vec<Customer>,
450 pub materials: Vec<Material>,
452 pub assets: Vec<FixedAsset>,
454 pub employees: Vec<Employee>,
456 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465 pub node_count: usize,
467 pub edge_count: usize,
469 pub hyperedge_count: usize,
471 pub output_path: PathBuf,
473}
474
475#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478 pub p2p_chains: Vec<P2PDocumentChain>,
480 pub o2c_chains: Vec<O2CDocumentChain>,
482 pub purchase_orders: Vec<documents::PurchaseOrder>,
484 pub goods_receipts: Vec<documents::GoodsReceipt>,
486 pub vendor_invoices: Vec<documents::VendorInvoice>,
488 pub sales_orders: Vec<documents::SalesOrder>,
490 pub deliveries: Vec<documents::Delivery>,
492 pub customer_invoices: Vec<documents::CustomerInvoice>,
494 pub payments: Vec<documents::Payment>,
496 pub document_references: Vec<documents::DocumentReference>,
499}
500
501#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504 pub ap_invoices: Vec<APInvoice>,
506 pub ar_invoices: Vec<ARInvoice>,
508 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514 pub ar_aging_reports: Vec<ARAgingReport>,
516 pub ap_aging_reports: Vec<APAgingReport>,
518 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531 pub event_log: Option<OcpmEventLog>,
533 pub event_count: usize,
535 pub object_count: usize,
537 pub case_count: usize,
539}
540
541#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544 pub engagements: Vec<AuditEngagement>,
546 pub workpapers: Vec<Workpaper>,
548 pub evidence: Vec<AuditEvidence>,
550 pub risk_assessments: Vec<RiskAssessment>,
552 pub findings: Vec<AuditFinding>,
554 pub judgments: Vec<ProfessionalJudgment>,
556 pub confirmations: Vec<ExternalConfirmation>,
558 pub confirmation_responses: Vec<ConfirmationResponse>,
560 pub procedure_steps: Vec<AuditProcedureStep>,
562 pub samples: Vec<AuditSample>,
564 pub analytical_results: Vec<AnalyticalProcedureResult>,
566 pub ia_functions: Vec<InternalAuditFunction>,
568 pub ia_reports: Vec<InternalAuditReport>,
570 pub related_parties: Vec<RelatedParty>,
572 pub related_party_transactions: Vec<RelatedPartyTransaction>,
574 pub component_auditors: Vec<ComponentAuditor>,
577 pub group_audit_plan: Option<GroupAuditPlan>,
579 pub component_instructions: Vec<ComponentInstruction>,
581 pub component_reports: Vec<ComponentAuditorReport>,
583 pub engagement_letters: Vec<EngagementLetter>,
586 pub subsequent_events: Vec<SubsequentEvent>,
589 pub service_organizations: Vec<ServiceOrganization>,
592 pub soc_reports: Vec<SocReport>,
594 pub user_entity_controls: Vec<UserEntityControl>,
596 pub going_concern_assessments:
599 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600 pub accounting_estimates:
603 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614 pub materiality_calculations:
617 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618 pub combined_risk_assessments:
621 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627 pub significant_transaction_classes:
630 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634 pub analytical_relationships:
637 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657 pub customers: Vec<BankingCustomer>,
659 pub accounts: Vec<BankAccount>,
661 pub transactions: Vec<BankTransaction>,
663 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673 pub suspicious_count: usize,
675 pub scenario_count: usize,
677}
678
679#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682 pub exported: bool,
684 pub graph_count: usize,
686 pub exports: HashMap<String, GraphExportInfo>,
688}
689
690#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693 pub name: String,
695 pub format: String,
697 pub output_path: PathBuf,
699 pub node_count: usize,
701 pub edge_count: usize,
703}
704
705#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708 pub spend_analyses: Vec<SpendAnalysis>,
710 pub sourcing_projects: Vec<SourcingProject>,
712 pub qualifications: Vec<SupplierQualification>,
714 pub rfx_events: Vec<RfxEvent>,
716 pub bids: Vec<SupplierBid>,
718 pub bid_evaluations: Vec<BidEvaluation>,
720 pub contracts: Vec<ProcurementContract>,
722 pub catalog_items: Vec<CatalogItem>,
724 pub scorecards: Vec<SupplierScorecard>,
726}
727
728#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731 pub fiscal_year: u16,
733 pub fiscal_period: u8,
735 pub period_start: NaiveDate,
737 pub period_end: NaiveDate,
739 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746 pub financial_statements: Vec<FinancialStatement>,
749 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752 pub consolidated_statements: Vec<FinancialStatement>,
754 pub consolidation_schedules: Vec<ConsolidationSchedule>,
756 pub bank_reconciliations: Vec<BankReconciliation>,
758 pub trial_balances: Vec<PeriodTrialBalance>,
760 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771 pub payroll_runs: Vec<PayrollRun>,
773 pub payroll_line_items: Vec<PayrollLineItem>,
775 pub time_entries: Vec<TimeEntry>,
777 pub expense_reports: Vec<ExpenseReport>,
779 pub benefit_enrollments: Vec<BenefitEnrollment>,
781 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789 pub pension_journal_entries: Vec<JournalEntry>,
791 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795 pub stock_comp_journal_entries: Vec<JournalEntry>,
797 pub payroll_run_count: usize,
799 pub payroll_line_item_count: usize,
801 pub time_entry_count: usize,
803 pub expense_report_count: usize,
805 pub benefit_enrollment_count: usize,
807 pub pension_plan_count: usize,
809 pub stock_grant_count: usize,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820 pub business_combinations:
822 Vec<datasynth_core::models::business_combination::BusinessCombination>,
823 pub business_combination_journal_entries: Vec<JournalEntry>,
825 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827 pub ecl_provision_movements:
829 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830 pub ecl_journal_entries: Vec<JournalEntry>,
832 pub provisions: Vec<datasynth_core::models::provision::Provision>,
834 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838 pub provision_journal_entries: Vec<JournalEntry>,
840 pub currency_translation_results:
842 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843 pub revenue_contract_count: usize,
845 pub impairment_test_count: usize,
847 pub business_combination_count: usize,
849 pub ecl_model_count: usize,
851 pub provision_count: usize,
853 pub currency_translation_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872 pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879 pub production_orders: Vec<ProductionOrder>,
881 pub quality_inspections: Vec<QualityInspection>,
883 pub cycle_counts: Vec<CycleCount>,
885 pub bom_components: Vec<BomComponent>,
887 pub inventory_movements: Vec<InventoryMovement>,
889 pub production_order_count: usize,
891 pub quality_inspection_count: usize,
893 pub cycle_count_count: usize,
895 pub bom_component_count: usize,
897 pub inventory_movement_count: usize,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904 pub sales_quotes: Vec<SalesQuote>,
906 pub kpis: Vec<ManagementKpi>,
908 pub budgets: Vec<Budget>,
910 pub sales_quote_count: usize,
912 pub kpi_count: usize,
914 pub budget_line_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921 pub labels: Vec<LabeledAnomaly>,
923 pub summary: Option<AnomalySummary>,
925 pub by_type: HashMap<String, usize>,
927}
928
929#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932 pub validated: bool,
934 pub is_balanced: bool,
936 pub entries_processed: u64,
938 pub total_debits: rust_decimal::Decimal,
940 pub total_credits: rust_decimal::Decimal,
942 pub accounts_tracked: usize,
944 pub companies_tracked: usize,
946 pub validation_errors: Vec<ValidationError>,
948 pub has_unbalanced_entries: bool,
950}
951
952#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955 pub jurisdictions: Vec<TaxJurisdiction>,
957 pub codes: Vec<TaxCode>,
959 pub tax_lines: Vec<TaxLine>,
961 pub tax_returns: Vec<TaxReturn>,
963 pub tax_provisions: Vec<TaxProvision>,
965 pub withholding_records: Vec<WithholdingTaxRecord>,
967 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969 pub jurisdiction_count: usize,
971 pub code_count: usize,
973 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975 pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986 pub seller_journal_entries: Vec<JournalEntry>,
988 pub buyer_journal_entries: Vec<JournalEntry>,
990 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994 #[serde(skip)]
996 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997 pub matched_pair_count: usize,
999 pub elimination_entry_count: usize,
1001 pub match_rate: f64,
1003}
1004
1005#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008 pub emissions: Vec<EmissionRecord>,
1010 pub energy: Vec<EnergyConsumption>,
1012 pub water: Vec<WaterUsage>,
1014 pub waste: Vec<WasteRecord>,
1016 pub diversity: Vec<WorkforceDiversityMetric>,
1018 pub pay_equity: Vec<PayEquityMetric>,
1020 pub safety_incidents: Vec<SafetyIncident>,
1022 pub safety_metrics: Vec<SafetyMetric>,
1024 pub governance: Vec<GovernanceMetric>,
1026 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028 pub materiality: Vec<MaterialityAssessment>,
1030 pub disclosures: Vec<EsgDisclosure>,
1032 pub climate_scenarios: Vec<ClimateScenario>,
1034 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036 pub emission_count: usize,
1038 pub disclosure_count: usize,
1040}
1041
1042#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045 pub cash_positions: Vec<CashPosition>,
1047 pub cash_forecasts: Vec<CashForecast>,
1049 pub cash_pools: Vec<CashPool>,
1051 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053 pub hedging_instruments: Vec<HedgingInstrument>,
1055 pub hedge_relationships: Vec<HedgeRelationship>,
1057 pub debt_instruments: Vec<DebtInstrument>,
1059 pub bank_guarantees: Vec<BankGuarantee>,
1061 pub netting_runs: Vec<NettingRun>,
1063 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065 pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073 pub projects: Vec<Project>,
1075 pub cost_lines: Vec<ProjectCostLine>,
1077 pub revenue_records: Vec<ProjectRevenue>,
1079 pub earned_value_metrics: Vec<EarnedValueMetric>,
1081 pub change_orders: Vec<ChangeOrder>,
1083 pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090 pub chart_of_accounts: ChartOfAccounts,
1092 pub master_data: MasterDataSnapshot,
1094 pub document_flows: DocumentFlowSnapshot,
1096 pub subledger: SubledgerSnapshot,
1098 pub ocpm: OcpmSnapshot,
1100 pub audit: AuditSnapshot,
1102 pub banking: BankingSnapshot,
1104 pub graph_export: GraphExportSnapshot,
1106 pub sourcing: SourcingSnapshot,
1108 pub financial_reporting: FinancialReportingSnapshot,
1110 pub hr: HrSnapshot,
1112 pub accounting_standards: AccountingStandardsSnapshot,
1114 pub manufacturing: ManufacturingSnapshot,
1116 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118 pub tax: TaxSnapshot,
1120 pub esg: EsgSnapshot,
1122 pub treasury: TreasurySnapshot,
1124 pub project_accounting: ProjectAccountingSnapshot,
1126 pub process_evolution: Vec<ProcessEvolutionEvent>,
1128 pub organizational_events: Vec<OrganizationalEvent>,
1130 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132 pub intercompany: IntercompanySnapshot,
1134 pub journal_entries: Vec<JournalEntry>,
1136 pub anomaly_labels: AnomalyLabels,
1138 pub balance_validation: BalanceValidationResult,
1140 pub data_quality_stats: DataQualityStats,
1142 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144 pub statistics: EnhancedGenerationStatistics,
1146 pub lineage: Option<super::lineage::LineageGraph>,
1148 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150 pub internal_controls: Vec<InternalControl>,
1152 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156 pub opening_balances: Vec<GeneratedOpeningBalance>,
1158 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166 pub temporal_vendor_chains:
1168 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175 pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182 pub total_entries: u64,
1184 pub total_line_items: u64,
1186 pub accounts_count: usize,
1188 pub companies_count: usize,
1190 pub period_months: u32,
1192 pub vendor_count: usize,
1194 pub customer_count: usize,
1195 pub material_count: usize,
1196 pub asset_count: usize,
1197 pub employee_count: usize,
1198 pub p2p_chain_count: usize,
1200 pub o2c_chain_count: usize,
1201 pub ap_invoice_count: usize,
1203 pub ar_invoice_count: usize,
1204 pub ocpm_event_count: usize,
1206 pub ocpm_object_count: usize,
1207 pub ocpm_case_count: usize,
1208 pub audit_engagement_count: usize,
1210 pub audit_workpaper_count: usize,
1211 pub audit_evidence_count: usize,
1212 pub audit_risk_count: usize,
1213 pub audit_finding_count: usize,
1214 pub audit_judgment_count: usize,
1215 #[serde(default)]
1217 pub audit_confirmation_count: usize,
1218 #[serde(default)]
1219 pub audit_confirmation_response_count: usize,
1220 #[serde(default)]
1222 pub audit_procedure_step_count: usize,
1223 #[serde(default)]
1224 pub audit_sample_count: usize,
1225 #[serde(default)]
1227 pub audit_analytical_result_count: usize,
1228 #[serde(default)]
1230 pub audit_ia_function_count: usize,
1231 #[serde(default)]
1232 pub audit_ia_report_count: usize,
1233 #[serde(default)]
1235 pub audit_related_party_count: usize,
1236 #[serde(default)]
1237 pub audit_related_party_transaction_count: usize,
1238 pub anomalies_injected: usize,
1240 pub data_quality_issues: usize,
1242 pub banking_customer_count: usize,
1244 pub banking_account_count: usize,
1245 pub banking_transaction_count: usize,
1246 pub banking_suspicious_count: usize,
1247 pub graph_export_count: usize,
1249 pub graph_node_count: usize,
1250 pub graph_edge_count: usize,
1251 #[serde(default)]
1253 pub llm_enrichment_ms: u64,
1254 #[serde(default)]
1256 pub llm_vendors_enriched: usize,
1257 #[serde(default)]
1259 pub diffusion_enhancement_ms: u64,
1260 #[serde(default)]
1262 pub diffusion_samples_generated: usize,
1263 #[serde(default)]
1265 pub causal_generation_ms: u64,
1266 #[serde(default)]
1268 pub causal_samples_generated: usize,
1269 #[serde(default)]
1271 pub causal_validation_passed: Option<bool>,
1272 #[serde(default)]
1274 pub sourcing_project_count: usize,
1275 #[serde(default)]
1276 pub rfx_event_count: usize,
1277 #[serde(default)]
1278 pub bid_count: usize,
1279 #[serde(default)]
1280 pub contract_count: usize,
1281 #[serde(default)]
1282 pub catalog_item_count: usize,
1283 #[serde(default)]
1284 pub scorecard_count: usize,
1285 #[serde(default)]
1287 pub financial_statement_count: usize,
1288 #[serde(default)]
1289 pub bank_reconciliation_count: usize,
1290 #[serde(default)]
1292 pub payroll_run_count: usize,
1293 #[serde(default)]
1294 pub time_entry_count: usize,
1295 #[serde(default)]
1296 pub expense_report_count: usize,
1297 #[serde(default)]
1298 pub benefit_enrollment_count: usize,
1299 #[serde(default)]
1300 pub pension_plan_count: usize,
1301 #[serde(default)]
1302 pub stock_grant_count: usize,
1303 #[serde(default)]
1305 pub revenue_contract_count: usize,
1306 #[serde(default)]
1307 pub impairment_test_count: usize,
1308 #[serde(default)]
1309 pub business_combination_count: usize,
1310 #[serde(default)]
1311 pub ecl_model_count: usize,
1312 #[serde(default)]
1313 pub provision_count: usize,
1314 #[serde(default)]
1316 pub production_order_count: usize,
1317 #[serde(default)]
1318 pub quality_inspection_count: usize,
1319 #[serde(default)]
1320 pub cycle_count_count: usize,
1321 #[serde(default)]
1322 pub bom_component_count: usize,
1323 #[serde(default)]
1324 pub inventory_movement_count: usize,
1325 #[serde(default)]
1327 pub sales_quote_count: usize,
1328 #[serde(default)]
1329 pub kpi_count: usize,
1330 #[serde(default)]
1331 pub budget_line_count: usize,
1332 #[serde(default)]
1334 pub tax_jurisdiction_count: usize,
1335 #[serde(default)]
1336 pub tax_code_count: usize,
1337 #[serde(default)]
1339 pub esg_emission_count: usize,
1340 #[serde(default)]
1341 pub esg_disclosure_count: usize,
1342 #[serde(default)]
1344 pub ic_matched_pair_count: usize,
1345 #[serde(default)]
1346 pub ic_elimination_count: usize,
1347 #[serde(default)]
1349 pub ic_transaction_count: usize,
1350 #[serde(default)]
1352 pub fa_subledger_count: usize,
1353 #[serde(default)]
1355 pub inventory_subledger_count: usize,
1356 #[serde(default)]
1358 pub treasury_debt_instrument_count: usize,
1359 #[serde(default)]
1361 pub treasury_hedging_instrument_count: usize,
1362 #[serde(default)]
1364 pub project_count: usize,
1365 #[serde(default)]
1367 pub project_change_order_count: usize,
1368 #[serde(default)]
1370 pub tax_provision_count: usize,
1371 #[serde(default)]
1373 pub opening_balance_count: usize,
1374 #[serde(default)]
1376 pub subledger_reconciliation_count: usize,
1377 #[serde(default)]
1379 pub tax_line_count: usize,
1380 #[serde(default)]
1382 pub project_cost_line_count: usize,
1383 #[serde(default)]
1385 pub cash_position_count: usize,
1386 #[serde(default)]
1388 pub cash_forecast_count: usize,
1389 #[serde(default)]
1391 pub cash_pool_count: usize,
1392 #[serde(default)]
1394 pub process_evolution_event_count: usize,
1395 #[serde(default)]
1397 pub organizational_event_count: usize,
1398 #[serde(default)]
1400 pub counterfactual_pair_count: usize,
1401 #[serde(default)]
1403 pub red_flag_count: usize,
1404 #[serde(default)]
1406 pub collusion_ring_count: usize,
1407 #[serde(default)]
1409 pub temporal_version_chain_count: usize,
1410 #[serde(default)]
1412 pub entity_relationship_node_count: usize,
1413 #[serde(default)]
1415 pub entity_relationship_edge_count: usize,
1416 #[serde(default)]
1418 pub cross_process_link_count: usize,
1419 #[serde(default)]
1421 pub disruption_event_count: usize,
1422 #[serde(default)]
1424 pub industry_gl_account_count: usize,
1425 #[serde(default)]
1427 pub period_close_je_count: usize,
1428}
1429
1430pub struct EnhancedOrchestrator {
1432 config: GeneratorConfig,
1433 phase_config: PhaseConfig,
1434 coa: Option<Arc<ChartOfAccounts>>,
1435 master_data: MasterDataSnapshot,
1436 seed: u64,
1437 multi_progress: Option<MultiProgress>,
1438 resource_guard: ResourceGuard,
1440 output_path: Option<PathBuf>,
1442 copula_generators: Vec<CopulaGeneratorSpec>,
1444 country_pack_registry: datasynth_core::CountryPackRegistry,
1446 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453 datasynth_config::validate_config(&config)?;
1454
1455 let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457 let resource_guard = Self::build_resource_guard(&config, None);
1459
1460 let country_pack_registry = match &config.country_packs {
1462 Some(cp) => {
1463 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464 .map_err(|e| SynthError::config(e.to_string()))?
1465 }
1466 None => datasynth_core::CountryPackRegistry::builtin_only()
1467 .map_err(|e| SynthError::config(e.to_string()))?,
1468 };
1469
1470 Ok(Self {
1471 config,
1472 phase_config,
1473 coa: None,
1474 master_data: MasterDataSnapshot::default(),
1475 seed,
1476 multi_progress: None,
1477 resource_guard,
1478 output_path: None,
1479 copula_generators: Vec::new(),
1480 country_pack_registry,
1481 phase_sink: None,
1482 })
1483 }
1484
1485 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487 Self::new(config, PhaseConfig::default())
1488 }
1489
1490 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492 self.phase_sink = Some(sink);
1493 self
1494 }
1495
1496 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1498 if let Some(ref sink) = self.phase_sink {
1499 for item in items {
1500 if let Ok(value) = serde_json::to_value(item) {
1501 if let Err(e) = sink.emit(phase, type_name, &value) {
1502 warn!(
1503 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1504 );
1505 }
1506 }
1507 }
1508 if let Err(e) = sink.phase_complete(phase) {
1509 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1510 }
1511 }
1512 }
1513
1514 pub fn with_progress(mut self, show: bool) -> Self {
1516 self.phase_config.show_progress = show;
1517 if show {
1518 self.multi_progress = Some(MultiProgress::new());
1519 }
1520 self
1521 }
1522
1523 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1525 let path = path.into();
1526 self.output_path = Some(path.clone());
1527 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1529 self
1530 }
1531
1532 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1534 &self.country_pack_registry
1535 }
1536
1537 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1539 self.country_pack_registry.get_by_str(country)
1540 }
1541
1542 fn primary_country_code(&self) -> &str {
1545 self.config
1546 .companies
1547 .first()
1548 .map(|c| c.country.as_str())
1549 .unwrap_or("US")
1550 }
1551
1552 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1554 self.country_pack_for(self.primary_country_code())
1555 }
1556
1557 fn resolve_coa_framework(&self) -> CoAFramework {
1559 if self.config.accounting_standards.enabled {
1560 match self.config.accounting_standards.framework {
1561 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1562 return CoAFramework::FrenchPcg;
1563 }
1564 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1565 return CoAFramework::GermanSkr04;
1566 }
1567 _ => {}
1568 }
1569 }
1570 let pack = self.primary_pack();
1572 match pack.accounting.framework.as_str() {
1573 "french_gaap" => CoAFramework::FrenchPcg,
1574 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1575 _ => CoAFramework::UsGaap,
1576 }
1577 }
1578
1579 pub fn has_copulas(&self) -> bool {
1584 !self.copula_generators.is_empty()
1585 }
1586
1587 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1593 &self.copula_generators
1594 }
1595
1596 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1600 &mut self.copula_generators
1601 }
1602
1603 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1607 self.copula_generators
1608 .iter_mut()
1609 .find(|c| c.name == copula_name)
1610 .map(|c| c.generator.sample())
1611 }
1612
1613 pub fn from_fingerprint(
1636 fingerprint_path: &std::path::Path,
1637 phase_config: PhaseConfig,
1638 scale: f64,
1639 ) -> SynthResult<Self> {
1640 info!("Loading fingerprint from: {}", fingerprint_path.display());
1641
1642 let reader = FingerprintReader::new();
1644 let fingerprint = reader
1645 .read_from_file(fingerprint_path)
1646 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1647
1648 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1649 }
1650
1651 pub fn from_fingerprint_data(
1658 fingerprint: Fingerprint,
1659 phase_config: PhaseConfig,
1660 scale: f64,
1661 ) -> SynthResult<Self> {
1662 info!(
1663 "Synthesizing config from fingerprint (version: {}, tables: {})",
1664 fingerprint.manifest.version,
1665 fingerprint.schema.tables.len()
1666 );
1667
1668 let seed: u64 = rand::random();
1670 info!("Fingerprint synthesis seed: {}", seed);
1671
1672 let options = SynthesisOptions {
1674 scale,
1675 seed: Some(seed),
1676 preserve_correlations: true,
1677 inject_anomalies: true,
1678 };
1679 let synthesizer = ConfigSynthesizer::with_options(options);
1680
1681 let synthesis_result = synthesizer
1683 .synthesize_full(&fingerprint, seed)
1684 .map_err(|e| {
1685 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1686 })?;
1687
1688 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1690 Self::base_config_for_industry(industry)
1691 } else {
1692 Self::base_config_for_industry("manufacturing")
1693 };
1694
1695 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1697
1698 info!(
1700 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1701 fingerprint.schema.tables.len(),
1702 scale,
1703 synthesis_result.copula_generators.len()
1704 );
1705
1706 if !synthesis_result.copula_generators.is_empty() {
1707 for spec in &synthesis_result.copula_generators {
1708 info!(
1709 " Copula '{}' for table '{}': {} columns",
1710 spec.name,
1711 spec.table,
1712 spec.columns.len()
1713 );
1714 }
1715 }
1716
1717 let mut orchestrator = Self::new(config, phase_config)?;
1719
1720 orchestrator.copula_generators = synthesis_result.copula_generators;
1722
1723 Ok(orchestrator)
1724 }
1725
1726 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1728 use datasynth_config::presets::create_preset;
1729 use datasynth_config::TransactionVolume;
1730 use datasynth_core::models::{CoAComplexity, IndustrySector};
1731
1732 let sector = match industry.to_lowercase().as_str() {
1733 "manufacturing" => IndustrySector::Manufacturing,
1734 "retail" => IndustrySector::Retail,
1735 "financial" | "financial_services" => IndustrySector::FinancialServices,
1736 "healthcare" => IndustrySector::Healthcare,
1737 "technology" | "tech" => IndustrySector::Technology,
1738 _ => IndustrySector::Manufacturing,
1739 };
1740
1741 create_preset(
1743 sector,
1744 1, 12, CoAComplexity::Medium,
1747 TransactionVolume::TenK,
1748 )
1749 }
1750
1751 fn apply_config_patch(
1753 mut config: GeneratorConfig,
1754 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1755 ) -> GeneratorConfig {
1756 use datasynth_fingerprint::synthesis::ConfigValue;
1757
1758 for (key, value) in patch.values() {
1759 match (key.as_str(), value) {
1760 ("transactions.count", ConfigValue::Integer(n)) => {
1763 info!(
1764 "Fingerprint suggests {} transactions (apply via company volumes)",
1765 n
1766 );
1767 }
1768 ("global.period_months", ConfigValue::Integer(n)) => {
1769 config.global.period_months = (*n).clamp(1, 120) as u32;
1770 }
1771 ("global.start_date", ConfigValue::String(s)) => {
1772 config.global.start_date = s.clone();
1773 }
1774 ("global.seed", ConfigValue::Integer(n)) => {
1775 config.global.seed = Some(*n as u64);
1776 }
1777 ("fraud.enabled", ConfigValue::Bool(b)) => {
1778 config.fraud.enabled = *b;
1779 }
1780 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1781 config.fraud.fraud_rate = *f;
1782 }
1783 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1784 config.data_quality.enabled = *b;
1785 }
1786 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1788 config.fraud.enabled = *b;
1789 }
1790 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1791 config.fraud.fraud_rate = *f;
1792 }
1793 _ => {
1794 debug!("Ignoring unknown config patch key: {}", key);
1795 }
1796 }
1797 }
1798
1799 config
1800 }
1801
1802 fn build_resource_guard(
1804 config: &GeneratorConfig,
1805 output_path: Option<PathBuf>,
1806 ) -> ResourceGuard {
1807 let mut builder = ResourceGuardBuilder::new();
1808
1809 if config.global.memory_limit_mb > 0 {
1811 builder = builder.memory_limit(config.global.memory_limit_mb);
1812 }
1813
1814 if let Some(path) = output_path {
1816 builder = builder.output_path(path).min_free_disk(100); }
1818
1819 builder = builder.conservative();
1821
1822 builder.build()
1823 }
1824
1825 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1830 self.resource_guard.check()
1831 }
1832
1833 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1835 let level = self.resource_guard.check()?;
1836
1837 if level != DegradationLevel::Normal {
1838 warn!(
1839 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1840 phase,
1841 level,
1842 self.resource_guard.current_memory_mb(),
1843 self.resource_guard.available_disk_mb()
1844 );
1845 }
1846
1847 Ok(level)
1848 }
1849
1850 fn get_degradation_actions(&self) -> DegradationActions {
1852 self.resource_guard.get_actions()
1853 }
1854
1855 fn check_memory_limit(&self) -> SynthResult<()> {
1857 self.check_resources()?;
1858 Ok(())
1859 }
1860
1861 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1863 info!("Starting enhanced generation workflow");
1864 info!(
1865 "Config: industry={:?}, period_months={}, companies={}",
1866 self.config.global.industry,
1867 self.config.global.period_months,
1868 self.config.companies.len()
1869 );
1870
1871 let initial_level = self.check_resources_with_log("initial")?;
1873 if initial_level == DegradationLevel::Emergency {
1874 return Err(SynthError::resource(
1875 "Insufficient resources to start generation",
1876 ));
1877 }
1878
1879 let mut stats = EnhancedGenerationStatistics {
1880 companies_count: self.config.companies.len(),
1881 period_months: self.config.global.period_months,
1882 ..Default::default()
1883 };
1884
1885 let coa = self.phase_chart_of_accounts(&mut stats)?;
1887
1888 self.phase_master_data(&mut stats)?;
1890
1891 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1893 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1894 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1895
1896 let (mut document_flows, mut subledger, fa_journal_entries) =
1898 self.phase_document_flows(&mut stats)?;
1899
1900 self.emit_phase_items(
1902 "document_flows",
1903 "PurchaseOrder",
1904 &document_flows.purchase_orders,
1905 );
1906 self.emit_phase_items(
1907 "document_flows",
1908 "GoodsReceipt",
1909 &document_flows.goods_receipts,
1910 );
1911 self.emit_phase_items(
1912 "document_flows",
1913 "VendorInvoice",
1914 &document_flows.vendor_invoices,
1915 );
1916 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1917 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1918
1919 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1921
1922 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1927 .iter()
1928 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1929 .collect();
1930 if !opening_balance_jes.is_empty() {
1931 debug!(
1932 "Prepending {} opening balance JEs to entries",
1933 opening_balance_jes.len()
1934 );
1935 }
1936
1937 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1939
1940 if !opening_balance_jes.is_empty() {
1943 let mut combined = opening_balance_jes;
1944 combined.extend(entries);
1945 entries = combined;
1946 }
1947
1948 if !fa_journal_entries.is_empty() {
1950 debug!(
1951 "Appending {} FA acquisition JEs to main entries",
1952 fa_journal_entries.len()
1953 );
1954 entries.extend(fa_journal_entries);
1955 }
1956
1957 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1959
1960 let actions = self.get_degradation_actions();
1962
1963 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1965
1966 if !sourcing.contracts.is_empty() {
1969 let mut linked_count = 0usize;
1970 let po_vendor_pairs: Vec<(String, String)> = document_flows
1972 .p2p_chains
1973 .iter()
1974 .map(|chain| {
1975 (
1976 chain.purchase_order.vendor_id.clone(),
1977 chain.purchase_order.header.document_id.clone(),
1978 )
1979 })
1980 .collect();
1981
1982 for chain in &mut document_flows.p2p_chains {
1983 if chain.purchase_order.contract_id.is_none() {
1984 if let Some(contract) = sourcing
1985 .contracts
1986 .iter()
1987 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1988 {
1989 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1990 linked_count += 1;
1991 }
1992 }
1993 }
1994
1995 for contract in &mut sourcing.contracts {
1997 let po_ids: Vec<String> = po_vendor_pairs
1998 .iter()
1999 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2000 .map(|(_, po_id)| po_id.clone())
2001 .collect();
2002 if !po_ids.is_empty() {
2003 contract.purchase_order_ids = po_ids;
2004 }
2005 }
2006
2007 if linked_count > 0 {
2008 debug!(
2009 "Linked {} purchase orders to S2C contracts by vendor match",
2010 linked_count
2011 );
2012 }
2013 }
2014
2015 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2017
2018 if !intercompany.seller_journal_entries.is_empty()
2020 || !intercompany.buyer_journal_entries.is_empty()
2021 {
2022 let ic_je_count = intercompany.seller_journal_entries.len()
2023 + intercompany.buyer_journal_entries.len();
2024 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2025 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2026 debug!(
2027 "Appended {} IC journal entries to main entries",
2028 ic_je_count
2029 );
2030 }
2031
2032 if !intercompany.elimination_entries.is_empty() {
2034 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2035 &intercompany.elimination_entries,
2036 );
2037 if !elim_jes.is_empty() {
2038 debug!(
2039 "Appended {} elimination journal entries to main entries",
2040 elim_jes.len()
2041 );
2042 let elim_debit: rust_decimal::Decimal =
2044 elim_jes.iter().map(|je| je.total_debit()).sum();
2045 let elim_credit: rust_decimal::Decimal =
2046 elim_jes.iter().map(|je| je.total_credit()).sum();
2047 if elim_debit != elim_credit {
2048 warn!(
2049 "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2050 elim_debit,
2051 elim_credit,
2052 elim_debit - elim_credit
2053 );
2054 }
2055 entries.extend(elim_jes);
2056 }
2057 }
2058
2059 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2061 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2062 document_flows
2063 .customer_invoices
2064 .extend(ic_docs.seller_invoices.iter().cloned());
2065 document_flows
2066 .purchase_orders
2067 .extend(ic_docs.buyer_orders.iter().cloned());
2068 document_flows
2069 .goods_receipts
2070 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2071 document_flows
2072 .vendor_invoices
2073 .extend(ic_docs.buyer_invoices.iter().cloned());
2074 debug!(
2075 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2076 ic_docs.seller_invoices.len(),
2077 ic_docs.buyer_orders.len(),
2078 ic_docs.buyer_goods_receipts.len(),
2079 ic_docs.buyer_invoices.len(),
2080 );
2081 }
2082 }
2083
2084 let hr = self.phase_hr_data(&mut stats)?;
2086
2087 if !hr.payroll_runs.is_empty() {
2089 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2090 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2091 entries.extend(payroll_jes);
2092 }
2093
2094 if !hr.pension_journal_entries.is_empty() {
2096 debug!(
2097 "Generated {} JEs from pension plans",
2098 hr.pension_journal_entries.len()
2099 );
2100 entries.extend(hr.pension_journal_entries.iter().cloned());
2101 }
2102
2103 if !hr.stock_comp_journal_entries.is_empty() {
2105 debug!(
2106 "Generated {} JEs from stock-based compensation",
2107 hr.stock_comp_journal_entries.len()
2108 );
2109 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2110 }
2111
2112 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2114
2115 if !manufacturing_snap.production_orders.is_empty() {
2117 let currency = self
2118 .config
2119 .companies
2120 .first()
2121 .map(|c| c.currency.as_str())
2122 .unwrap_or("USD");
2123 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2124 &manufacturing_snap.production_orders,
2125 &manufacturing_snap.quality_inspections,
2126 currency,
2127 );
2128 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2129 entries.extend(mfg_jes);
2130 }
2131
2132 if !manufacturing_snap.quality_inspections.is_empty() {
2134 let framework = match self.config.accounting_standards.framework {
2135 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2136 _ => "US_GAAP",
2137 };
2138 for company in &self.config.companies {
2139 let company_orders: Vec<_> = manufacturing_snap
2140 .production_orders
2141 .iter()
2142 .filter(|o| o.company_code == company.code)
2143 .cloned()
2144 .collect();
2145 let company_inspections: Vec<_> = manufacturing_snap
2146 .quality_inspections
2147 .iter()
2148 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2149 .cloned()
2150 .collect();
2151 if company_inspections.is_empty() {
2152 continue;
2153 }
2154 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2155 let warranty_result = warranty_gen.generate(
2156 &company.code,
2157 &company_orders,
2158 &company_inspections,
2159 &company.currency,
2160 framework,
2161 );
2162 if !warranty_result.journal_entries.is_empty() {
2163 debug!(
2164 "Generated {} warranty provision JEs for {}",
2165 warranty_result.journal_entries.len(),
2166 company.code
2167 );
2168 entries.extend(warranty_result.journal_entries);
2169 }
2170 }
2171 }
2172
2173 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2175 {
2176 let cogs_currency = self
2177 .config
2178 .companies
2179 .first()
2180 .map(|c| c.currency.as_str())
2181 .unwrap_or("USD");
2182 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2183 &document_flows.deliveries,
2184 &manufacturing_snap.production_orders,
2185 cogs_currency,
2186 );
2187 if !cogs_jes.is_empty() {
2188 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2189 entries.extend(cogs_jes);
2190 }
2191 }
2192
2193 if !manufacturing_snap.inventory_movements.is_empty()
2199 && !subledger.inventory_positions.is_empty()
2200 {
2201 use datasynth_core::models::MovementType as MfgMovementType;
2202 let mut receipt_count = 0usize;
2203 let mut issue_count = 0usize;
2204 for movement in &manufacturing_snap.inventory_movements {
2205 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2207 p.material_id == movement.material_code
2208 && p.company_code == movement.entity_code
2209 }) {
2210 match movement.movement_type {
2211 MfgMovementType::GoodsReceipt => {
2212 pos.add_quantity(
2214 movement.quantity,
2215 movement.value,
2216 movement.movement_date,
2217 );
2218 receipt_count += 1;
2219 }
2220 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2221 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2223 issue_count += 1;
2224 }
2225 _ => {}
2226 }
2227 }
2228 }
2229 debug!(
2230 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2231 manufacturing_snap.inventory_movements.len(),
2232 receipt_count,
2233 issue_count,
2234 );
2235 }
2236
2237 if !entries.is_empty() {
2240 stats.total_entries = entries.len() as u64;
2241 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2242 debug!(
2243 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2244 stats.total_entries, stats.total_line_items
2245 );
2246 }
2247
2248 if self.config.internal_controls.enabled && !entries.is_empty() {
2250 info!("Phase 7b: Applying internal controls to journal entries");
2251 let control_config = ControlGeneratorConfig {
2252 exception_rate: self.config.internal_controls.exception_rate,
2253 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2254 enable_sox_marking: true,
2255 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2256 self.config.internal_controls.sox_materiality_threshold,
2257 )
2258 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2259 ..Default::default()
2260 };
2261 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2262 for entry in &mut entries {
2263 control_gen.apply_controls(entry, &coa);
2264 }
2265 let with_controls = entries
2266 .iter()
2267 .filter(|e| !e.header.control_ids.is_empty())
2268 .count();
2269 info!(
2270 "Applied controls to {} entries ({} with control IDs assigned)",
2271 entries.len(),
2272 with_controls
2273 );
2274 }
2275
2276 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2280 .iter()
2281 .filter(|e| e.header.sod_violation)
2282 .filter_map(|e| {
2283 e.header.sod_conflict_type.map(|ct| {
2284 use datasynth_core::models::{RiskLevel, SodViolation};
2285 let severity = match ct {
2286 datasynth_core::models::SodConflictType::PaymentReleaser
2287 | datasynth_core::models::SodConflictType::RequesterApprover => {
2288 RiskLevel::Critical
2289 }
2290 datasynth_core::models::SodConflictType::PreparerApprover
2291 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2292 | datasynth_core::models::SodConflictType::JournalEntryPoster
2293 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2294 RiskLevel::High
2295 }
2296 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2297 RiskLevel::Medium
2298 }
2299 };
2300 let action = format!(
2301 "SoD conflict {:?} on entry {} ({})",
2302 ct, e.header.document_id, e.header.company_code
2303 );
2304 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2305 })
2306 })
2307 .collect();
2308 if !sod_violations.is_empty() {
2309 info!(
2310 "Phase 7c: Extracted {} SoD violations from {} entries",
2311 sod_violations.len(),
2312 entries.len()
2313 );
2314 }
2315
2316 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2318
2319 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2321
2322 self.emit_phase_items(
2324 "anomaly_injection",
2325 "LabeledAnomaly",
2326 &anomaly_labels.labels,
2327 );
2328
2329 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2331
2332 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2334
2335 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2337
2338 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2340
2341 let balance_validation = self.phase_balance_validation(&entries)?;
2343
2344 let subledger_reconciliation =
2346 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2347
2348 let (data_quality_stats, quality_issues) =
2350 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2351
2352 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2354
2355 let audit = self.phase_audit_data(&entries, &mut stats)?;
2357
2358 let banking = self.phase_banking_data(&mut stats)?;
2360
2361 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2363
2364 self.phase_llm_enrichment(&mut stats);
2366
2367 self.phase_diffusion_enhancement(&mut stats);
2369
2370 self.phase_causal_overlay(&mut stats);
2372
2373 let mut financial_reporting = self.phase_financial_reporting(
2377 &document_flows,
2378 &entries,
2379 &coa,
2380 &hr,
2381 &audit,
2382 &mut stats,
2383 )?;
2384
2385 {
2387 use datasynth_core::models::StatementType;
2388 for stmt in &financial_reporting.consolidated_statements {
2389 if stmt.statement_type == StatementType::BalanceSheet {
2390 let total_assets: rust_decimal::Decimal = stmt
2391 .line_items
2392 .iter()
2393 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2394 .map(|li| li.amount)
2395 .sum();
2396 let total_le: rust_decimal::Decimal = stmt
2397 .line_items
2398 .iter()
2399 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2400 .map(|li| li.amount)
2401 .sum();
2402 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2403 warn!(
2404 "BS equation imbalance: assets={}, L+E={}",
2405 total_assets, total_le
2406 );
2407 }
2408 }
2409 }
2410 }
2411
2412 let accounting_standards =
2414 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2415
2416 if !accounting_standards.ecl_journal_entries.is_empty() {
2418 debug!(
2419 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2420 accounting_standards.ecl_journal_entries.len()
2421 );
2422 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2423 }
2424
2425 if !accounting_standards.provision_journal_entries.is_empty() {
2427 debug!(
2428 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2429 accounting_standards.provision_journal_entries.len()
2430 );
2431 entries.extend(
2432 accounting_standards
2433 .provision_journal_entries
2434 .iter()
2435 .cloned(),
2436 );
2437 }
2438
2439 let ocpm = self.phase_ocpm_events(
2441 &document_flows,
2442 &sourcing,
2443 &hr,
2444 &manufacturing_snap,
2445 &banking,
2446 &audit,
2447 &financial_reporting,
2448 &mut stats,
2449 )?;
2450
2451 if let Some(ref event_log) = ocpm.event_log {
2453 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2454 }
2455
2456 let sales_kpi_budgets =
2458 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2459
2460 let treasury =
2464 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2465
2466 if !treasury.journal_entries.is_empty() {
2468 debug!(
2469 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2470 treasury.journal_entries.len()
2471 );
2472 entries.extend(treasury.journal_entries.iter().cloned());
2473 }
2474
2475 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2477
2478 if !tax.tax_posting_journal_entries.is_empty() {
2480 debug!(
2481 "Merging {} tax posting JEs into GL",
2482 tax.tax_posting_journal_entries.len()
2483 );
2484 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2485 }
2486
2487 {
2491 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2492
2493 let framework_str = {
2494 use datasynth_config::schema::AccountingFrameworkConfig;
2495 match self
2496 .config
2497 .accounting_standards
2498 .framework
2499 .unwrap_or_default()
2500 {
2501 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2502 "IFRS"
2503 }
2504 _ => "US_GAAP",
2505 }
2506 };
2507
2508 let depreciation_total: rust_decimal::Decimal = entries
2510 .iter()
2511 .filter(|je| je.header.document_type == "CL")
2512 .flat_map(|je| je.lines.iter())
2513 .filter(|l| l.gl_account.starts_with("6000"))
2514 .map(|l| l.debit_amount)
2515 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2516
2517 let interest_paid: rust_decimal::Decimal = entries
2519 .iter()
2520 .flat_map(|je| je.lines.iter())
2521 .filter(|l| l.gl_account.starts_with("7100"))
2522 .map(|l| l.debit_amount)
2523 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2524
2525 let tax_paid: rust_decimal::Decimal = entries
2527 .iter()
2528 .flat_map(|je| je.lines.iter())
2529 .filter(|l| l.gl_account.starts_with("8000"))
2530 .map(|l| l.debit_amount)
2531 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2532
2533 let capex: rust_decimal::Decimal = entries
2535 .iter()
2536 .flat_map(|je| je.lines.iter())
2537 .filter(|l| l.gl_account.starts_with("1500"))
2538 .map(|l| l.debit_amount)
2539 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2540
2541 let dividends_paid: rust_decimal::Decimal = entries
2543 .iter()
2544 .flat_map(|je| je.lines.iter())
2545 .filter(|l| l.gl_account == "2170")
2546 .map(|l| l.debit_amount)
2547 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2548
2549 let cf_data = CashFlowSourceData {
2550 depreciation_total,
2551 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
2553 delta_ap: rust_decimal::Decimal::ZERO,
2554 delta_inventory: rust_decimal::Decimal::ZERO,
2555 capex,
2556 debt_issuance: rust_decimal::Decimal::ZERO,
2557 debt_repayment: rust_decimal::Decimal::ZERO,
2558 interest_paid,
2559 tax_paid,
2560 dividends_paid,
2561 framework: framework_str.to_string(),
2562 };
2563
2564 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2565 if !enhanced_cf_items.is_empty() {
2566 use datasynth_core::models::StatementType;
2568 let merge_count = enhanced_cf_items.len();
2569 for stmt in financial_reporting
2570 .financial_statements
2571 .iter_mut()
2572 .chain(financial_reporting.consolidated_statements.iter_mut())
2573 .chain(
2574 financial_reporting
2575 .standalone_statements
2576 .values_mut()
2577 .flat_map(|v| v.iter_mut()),
2578 )
2579 {
2580 if stmt.statement_type == StatementType::CashFlowStatement {
2581 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2582 }
2583 }
2584 info!(
2585 "Enhanced cash flow: {} supplementary items merged into CF statements",
2586 merge_count
2587 );
2588 }
2589 }
2590
2591 self.generate_notes_to_financial_statements(
2594 &mut financial_reporting,
2595 &accounting_standards,
2596 &tax,
2597 &hr,
2598 &audit,
2599 &treasury,
2600 );
2601
2602 if self.config.companies.len() >= 2 && !entries.is_empty() {
2606 let companies: Vec<(String, String)> = self
2607 .config
2608 .companies
2609 .iter()
2610 .map(|c| (c.code.clone(), c.name.clone()))
2611 .collect();
2612 let ic_elim: rust_decimal::Decimal =
2613 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2614 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2615 .unwrap_or(NaiveDate::MIN);
2616 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2617 let period_label = format!(
2618 "{}-{:02}",
2619 end_date.year(),
2620 (end_date - chrono::Days::new(1)).month()
2621 );
2622
2623 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2624 let (je_segments, je_recon) =
2625 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2626 if !je_segments.is_empty() {
2627 info!(
2628 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2629 je_segments.len(),
2630 ic_elim,
2631 );
2632 if financial_reporting.segment_reports.is_empty() {
2634 financial_reporting.segment_reports = je_segments;
2635 financial_reporting.segment_reconciliations = vec![je_recon];
2636 } else {
2637 financial_reporting.segment_reports.extend(je_segments);
2638 financial_reporting.segment_reconciliations.push(je_recon);
2639 }
2640 }
2641 }
2642
2643 let esg_snap =
2645 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2646
2647 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2649
2650 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2652
2653 let disruption_events = self.phase_disruption_events(&mut stats)?;
2655
2656 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2658
2659 let (entity_relationship_graph, cross_process_links) =
2661 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2662
2663 let industry_output = self.phase_industry_data(&mut stats);
2665
2666 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2668
2669 self.phase_hypergraph_export(
2671 &coa,
2672 &entries,
2673 &document_flows,
2674 &sourcing,
2675 &hr,
2676 &manufacturing_snap,
2677 &banking,
2678 &audit,
2679 &financial_reporting,
2680 &ocpm,
2681 &compliance_regulations,
2682 &mut stats,
2683 )?;
2684
2685 if self.phase_config.generate_graph_export {
2688 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2689 }
2690
2691 if self.config.streaming.enabled {
2693 info!("Note: streaming config is enabled but batch mode does not use it");
2694 }
2695 if self.config.vendor_network.enabled {
2696 debug!("Vendor network config available; relationship graph generation is partial");
2697 }
2698 if self.config.customer_segmentation.enabled {
2699 debug!("Customer segmentation config available; segment-aware generation is partial");
2700 }
2701
2702 let resource_stats = self.resource_guard.stats();
2704 info!(
2705 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2706 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2707 resource_stats.disk.estimated_bytes_written,
2708 resource_stats.degradation_level
2709 );
2710
2711 if let Some(ref sink) = self.phase_sink {
2713 if let Err(e) = sink.flush() {
2714 warn!("Stream sink flush failed: {e}");
2715 }
2716 }
2717
2718 let lineage = self.build_lineage_graph();
2720
2721 let gate_result = if self.config.quality_gates.enabled {
2723 let profile_name = &self.config.quality_gates.profile;
2724 match datasynth_eval::gates::get_profile(profile_name) {
2725 Some(profile) => {
2726 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2728
2729 if balance_validation.validated {
2731 eval.coherence.balance =
2732 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2733 equation_balanced: balance_validation.is_balanced,
2734 max_imbalance: (balance_validation.total_debits
2735 - balance_validation.total_credits)
2736 .abs(),
2737 periods_evaluated: 1,
2738 periods_imbalanced: if balance_validation.is_balanced {
2739 0
2740 } else {
2741 1
2742 },
2743 period_results: Vec::new(),
2744 companies_evaluated: self.config.companies.len(),
2745 });
2746 }
2747
2748 eval.coherence.passes = balance_validation.is_balanced;
2750 if !balance_validation.is_balanced {
2751 eval.coherence
2752 .failures
2753 .push("Balance sheet equation not satisfied".to_string());
2754 }
2755
2756 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2758 eval.statistical.passes = !entries.is_empty();
2759
2760 eval.quality.overall_score = 0.9; eval.quality.passes = true;
2763
2764 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2765 info!(
2766 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2767 profile_name, result.gates_passed, result.gates_total, result.summary
2768 );
2769 Some(result)
2770 }
2771 None => {
2772 warn!(
2773 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2774 profile_name
2775 );
2776 None
2777 }
2778 }
2779 } else {
2780 None
2781 };
2782
2783 let internal_controls = if self.config.internal_controls.enabled {
2785 InternalControl::standard_controls()
2786 } else {
2787 Vec::new()
2788 };
2789
2790 Ok(EnhancedGenerationResult {
2791 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2792 master_data: std::mem::take(&mut self.master_data),
2793 document_flows,
2794 subledger,
2795 ocpm,
2796 audit,
2797 banking,
2798 graph_export,
2799 sourcing,
2800 financial_reporting,
2801 hr,
2802 accounting_standards,
2803 manufacturing: manufacturing_snap,
2804 sales_kpi_budgets,
2805 tax,
2806 esg: esg_snap,
2807 treasury,
2808 project_accounting,
2809 process_evolution,
2810 organizational_events,
2811 disruption_events,
2812 intercompany,
2813 journal_entries: entries,
2814 anomaly_labels,
2815 balance_validation,
2816 data_quality_stats,
2817 quality_issues,
2818 statistics: stats,
2819 lineage: Some(lineage),
2820 gate_result,
2821 internal_controls,
2822 sod_violations,
2823 opening_balances,
2824 subledger_reconciliation,
2825 counterfactual_pairs,
2826 red_flags,
2827 collusion_rings,
2828 temporal_vendor_chains,
2829 entity_relationship_graph,
2830 cross_process_links,
2831 industry_output,
2832 compliance_regulations,
2833 })
2834 }
2835
2836 fn phase_chart_of_accounts(
2842 &mut self,
2843 stats: &mut EnhancedGenerationStatistics,
2844 ) -> SynthResult<Arc<ChartOfAccounts>> {
2845 info!("Phase 1: Generating Chart of Accounts");
2846 let coa = self.generate_coa()?;
2847 stats.accounts_count = coa.account_count();
2848 info!(
2849 "Chart of Accounts generated: {} accounts",
2850 stats.accounts_count
2851 );
2852 self.check_resources_with_log("post-coa")?;
2853 Ok(coa)
2854 }
2855
2856 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2858 if self.phase_config.generate_master_data {
2859 info!("Phase 2: Generating Master Data");
2860 self.generate_master_data()?;
2861 stats.vendor_count = self.master_data.vendors.len();
2862 stats.customer_count = self.master_data.customers.len();
2863 stats.material_count = self.master_data.materials.len();
2864 stats.asset_count = self.master_data.assets.len();
2865 stats.employee_count = self.master_data.employees.len();
2866 info!(
2867 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2868 stats.vendor_count, stats.customer_count, stats.material_count,
2869 stats.asset_count, stats.employee_count
2870 );
2871 self.check_resources_with_log("post-master-data")?;
2872 } else {
2873 debug!("Phase 2: Skipped (master data generation disabled)");
2874 }
2875 Ok(())
2876 }
2877
2878 fn phase_document_flows(
2880 &mut self,
2881 stats: &mut EnhancedGenerationStatistics,
2882 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2883 let mut document_flows = DocumentFlowSnapshot::default();
2884 let mut subledger = SubledgerSnapshot::default();
2885 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
2888
2889 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2890 info!("Phase 3: Generating Document Flows");
2891 self.generate_document_flows(&mut document_flows)?;
2892 stats.p2p_chain_count = document_flows.p2p_chains.len();
2893 stats.o2c_chain_count = document_flows.o2c_chains.len();
2894 info!(
2895 "Document flows generated: {} P2P chains, {} O2C chains",
2896 stats.p2p_chain_count, stats.o2c_chain_count
2897 );
2898
2899 debug!("Phase 3b: Linking document flows to subledgers");
2901 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2902 stats.ap_invoice_count = subledger.ap_invoices.len();
2903 stats.ar_invoice_count = subledger.ar_invoices.len();
2904 debug!(
2905 "Subledgers linked: {} AP invoices, {} AR invoices",
2906 stats.ap_invoice_count, stats.ar_invoice_count
2907 );
2908
2909 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
2914 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
2915 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
2916 debug!("Payment settlements applied to AP and AR subledgers");
2917
2918 if let Ok(start_date) =
2921 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2922 {
2923 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2924 - chrono::Days::new(1);
2925 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
2926 for company in &self.config.companies {
2933 let ar_report = ARAgingReport::from_invoices(
2934 company.code.clone(),
2935 &subledger.ar_invoices,
2936 as_of_date,
2937 );
2938 subledger.ar_aging_reports.push(ar_report);
2939
2940 let ap_report = APAgingReport::from_invoices(
2941 company.code.clone(),
2942 &subledger.ap_invoices,
2943 as_of_date,
2944 );
2945 subledger.ap_aging_reports.push(ap_report);
2946 }
2947 debug!(
2948 "AR/AP aging reports built: {} AR, {} AP",
2949 subledger.ar_aging_reports.len(),
2950 subledger.ap_aging_reports.len()
2951 );
2952
2953 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
2955 {
2956 use datasynth_generators::DunningGenerator;
2957 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
2958 for company in &self.config.companies {
2959 let currency = company.currency.as_str();
2960 let mut company_invoices: Vec<
2963 datasynth_core::models::subledger::ar::ARInvoice,
2964 > = subledger
2965 .ar_invoices
2966 .iter()
2967 .filter(|inv| inv.company_code == company.code)
2968 .cloned()
2969 .collect();
2970
2971 if company_invoices.is_empty() {
2972 continue;
2973 }
2974
2975 let result = dunning_gen.execute_dunning_run(
2976 &company.code,
2977 as_of_date,
2978 &mut company_invoices,
2979 currency,
2980 );
2981
2982 for updated in &company_invoices {
2984 if let Some(orig) = subledger
2985 .ar_invoices
2986 .iter_mut()
2987 .find(|i| i.invoice_number == updated.invoice_number)
2988 {
2989 orig.dunning_info = updated.dunning_info.clone();
2990 }
2991 }
2992
2993 subledger.dunning_runs.push(result.dunning_run);
2994 subledger.dunning_letters.extend(result.letters);
2995 dunning_journal_entries.extend(result.journal_entries);
2997 }
2998 debug!(
2999 "Dunning runs complete: {} runs, {} letters",
3000 subledger.dunning_runs.len(),
3001 subledger.dunning_letters.len()
3002 );
3003 }
3004 }
3005
3006 self.check_resources_with_log("post-document-flows")?;
3007 } else {
3008 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3009 }
3010
3011 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3013 if !self.master_data.assets.is_empty() {
3014 debug!("Generating FA subledger records");
3015 let company_code = self
3016 .config
3017 .companies
3018 .first()
3019 .map(|c| c.code.as_str())
3020 .unwrap_or("1000");
3021 let currency = self
3022 .config
3023 .companies
3024 .first()
3025 .map(|c| c.currency.as_str())
3026 .unwrap_or("USD");
3027
3028 let mut fa_gen = datasynth_generators::FAGenerator::new(
3029 datasynth_generators::FAGeneratorConfig::default(),
3030 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3031 );
3032
3033 for asset in &self.master_data.assets {
3034 let (record, je) = fa_gen.generate_asset_acquisition(
3035 company_code,
3036 &format!("{:?}", asset.asset_class),
3037 &asset.description,
3038 asset.acquisition_date,
3039 currency,
3040 asset.cost_center.as_deref(),
3041 );
3042 subledger.fa_records.push(record);
3043 fa_journal_entries.push(je);
3044 }
3045
3046 stats.fa_subledger_count = subledger.fa_records.len();
3047 debug!(
3048 "FA subledger records generated: {} (with {} acquisition JEs)",
3049 stats.fa_subledger_count,
3050 fa_journal_entries.len()
3051 );
3052 }
3053
3054 if !self.master_data.materials.is_empty() {
3056 debug!("Generating Inventory subledger records");
3057 let first_company = self.config.companies.first();
3058 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3059 let inv_currency = first_company
3060 .map(|c| c.currency.clone())
3061 .unwrap_or_else(|| "USD".to_string());
3062
3063 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3064 datasynth_generators::InventoryGeneratorConfig::default(),
3065 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3066 inv_currency.clone(),
3067 );
3068
3069 for (i, material) in self.master_data.materials.iter().enumerate() {
3070 let plant = format!("PLANT{:02}", (i % 3) + 1);
3071 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3072 let initial_qty = rust_decimal::Decimal::from(
3073 material
3074 .safety_stock
3075 .to_string()
3076 .parse::<i64>()
3077 .unwrap_or(100),
3078 );
3079
3080 let position = inv_gen.generate_position(
3081 company_code,
3082 &plant,
3083 &storage_loc,
3084 &material.material_id,
3085 &material.description,
3086 initial_qty,
3087 Some(material.standard_cost),
3088 &inv_currency,
3089 );
3090 subledger.inventory_positions.push(position);
3091 }
3092
3093 stats.inventory_subledger_count = subledger.inventory_positions.len();
3094 debug!(
3095 "Inventory subledger records generated: {}",
3096 stats.inventory_subledger_count
3097 );
3098 }
3099
3100 if !subledger.fa_records.is_empty() {
3102 if let Ok(start_date) =
3103 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3104 {
3105 let company_code = self
3106 .config
3107 .companies
3108 .first()
3109 .map(|c| c.code.as_str())
3110 .unwrap_or("1000");
3111 let fiscal_year = start_date.year();
3112 let start_period = start_date.month();
3113 let end_period =
3114 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3115
3116 let depr_cfg = FaDepreciationScheduleConfig {
3117 fiscal_year,
3118 start_period,
3119 end_period,
3120 seed_offset: 800,
3121 };
3122 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3123 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3124 let run_count = runs.len();
3125 subledger.depreciation_runs = runs;
3126 debug!(
3127 "Depreciation runs generated: {} runs for {} periods",
3128 run_count, self.config.global.period_months
3129 );
3130 }
3131 }
3132
3133 if !subledger.inventory_positions.is_empty() {
3135 if let Ok(start_date) =
3136 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3137 {
3138 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3139 - chrono::Days::new(1);
3140
3141 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3142 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3143
3144 for company in &self.config.companies {
3145 let result = inv_val_gen.generate(
3146 &company.code,
3147 &subledger.inventory_positions,
3148 as_of_date,
3149 );
3150 subledger.inventory_valuations.push(result);
3151 }
3152 debug!(
3153 "Inventory valuations generated: {} company reports",
3154 subledger.inventory_valuations.len()
3155 );
3156 }
3157 }
3158
3159 Ok((document_flows, subledger, fa_journal_entries))
3160 }
3161
3162 #[allow(clippy::too_many_arguments)]
3164 fn phase_ocpm_events(
3165 &mut self,
3166 document_flows: &DocumentFlowSnapshot,
3167 sourcing: &SourcingSnapshot,
3168 hr: &HrSnapshot,
3169 manufacturing: &ManufacturingSnapshot,
3170 banking: &BankingSnapshot,
3171 audit: &AuditSnapshot,
3172 financial_reporting: &FinancialReportingSnapshot,
3173 stats: &mut EnhancedGenerationStatistics,
3174 ) -> SynthResult<OcpmSnapshot> {
3175 let degradation = self.check_resources()?;
3176 if degradation >= DegradationLevel::Reduced {
3177 debug!(
3178 "Phase skipped due to resource pressure (degradation: {:?})",
3179 degradation
3180 );
3181 return Ok(OcpmSnapshot::default());
3182 }
3183 if self.phase_config.generate_ocpm_events {
3184 info!("Phase 3c: Generating OCPM Events");
3185 let ocpm_snapshot = self.generate_ocpm_events(
3186 document_flows,
3187 sourcing,
3188 hr,
3189 manufacturing,
3190 banking,
3191 audit,
3192 financial_reporting,
3193 )?;
3194 stats.ocpm_event_count = ocpm_snapshot.event_count;
3195 stats.ocpm_object_count = ocpm_snapshot.object_count;
3196 stats.ocpm_case_count = ocpm_snapshot.case_count;
3197 info!(
3198 "OCPM events generated: {} events, {} objects, {} cases",
3199 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3200 );
3201 self.check_resources_with_log("post-ocpm")?;
3202 Ok(ocpm_snapshot)
3203 } else {
3204 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3205 Ok(OcpmSnapshot::default())
3206 }
3207 }
3208
3209 fn phase_journal_entries(
3211 &mut self,
3212 coa: &Arc<ChartOfAccounts>,
3213 document_flows: &DocumentFlowSnapshot,
3214 _stats: &mut EnhancedGenerationStatistics,
3215 ) -> SynthResult<Vec<JournalEntry>> {
3216 let mut entries = Vec::new();
3217
3218 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3220 debug!("Phase 4a: Generating JEs from document flows");
3221 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3222 debug!("Generated {} JEs from document flows", flow_entries.len());
3223 entries.extend(flow_entries);
3224 }
3225
3226 if self.phase_config.generate_journal_entries {
3228 info!("Phase 4: Generating Journal Entries");
3229 let je_entries = self.generate_journal_entries(coa)?;
3230 info!("Generated {} standalone journal entries", je_entries.len());
3231 entries.extend(je_entries);
3232 } else {
3233 debug!("Phase 4: Skipped (journal entry generation disabled)");
3234 }
3235
3236 if !entries.is_empty() {
3237 self.check_resources_with_log("post-journal-entries")?;
3240 }
3241
3242 Ok(entries)
3243 }
3244
3245 fn phase_anomaly_injection(
3247 &mut self,
3248 entries: &mut [JournalEntry],
3249 actions: &DegradationActions,
3250 stats: &mut EnhancedGenerationStatistics,
3251 ) -> SynthResult<AnomalyLabels> {
3252 if self.phase_config.inject_anomalies
3253 && !entries.is_empty()
3254 && !actions.skip_anomaly_injection
3255 {
3256 info!("Phase 5: Injecting Anomalies");
3257 let result = self.inject_anomalies(entries)?;
3258 stats.anomalies_injected = result.labels.len();
3259 info!("Injected {} anomalies", stats.anomalies_injected);
3260 self.check_resources_with_log("post-anomaly-injection")?;
3261 Ok(result)
3262 } else if actions.skip_anomaly_injection {
3263 warn!("Phase 5: Skipped due to resource degradation");
3264 Ok(AnomalyLabels::default())
3265 } else {
3266 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3267 Ok(AnomalyLabels::default())
3268 }
3269 }
3270
3271 fn phase_balance_validation(
3273 &mut self,
3274 entries: &[JournalEntry],
3275 ) -> SynthResult<BalanceValidationResult> {
3276 if self.phase_config.validate_balances && !entries.is_empty() {
3277 debug!("Phase 6: Validating Balances");
3278 let balance_validation = self.validate_journal_entries(entries)?;
3279 if balance_validation.is_balanced {
3280 debug!("Balance validation passed");
3281 } else {
3282 warn!(
3283 "Balance validation found {} errors",
3284 balance_validation.validation_errors.len()
3285 );
3286 }
3287 Ok(balance_validation)
3288 } else {
3289 Ok(BalanceValidationResult::default())
3290 }
3291 }
3292
3293 fn phase_data_quality_injection(
3295 &mut self,
3296 entries: &mut [JournalEntry],
3297 actions: &DegradationActions,
3298 stats: &mut EnhancedGenerationStatistics,
3299 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3300 if self.phase_config.inject_data_quality
3301 && !entries.is_empty()
3302 && !actions.skip_data_quality
3303 {
3304 info!("Phase 7: Injecting Data Quality Variations");
3305 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3306 stats.data_quality_issues = dq_stats.records_with_issues;
3307 info!("Injected {} data quality issues", stats.data_quality_issues);
3308 self.check_resources_with_log("post-data-quality")?;
3309 Ok((dq_stats, quality_issues))
3310 } else if actions.skip_data_quality {
3311 warn!("Phase 7: Skipped due to resource degradation");
3312 Ok((DataQualityStats::default(), Vec::new()))
3313 } else {
3314 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3315 Ok((DataQualityStats::default(), Vec::new()))
3316 }
3317 }
3318
3319 fn phase_period_close(
3329 &mut self,
3330 entries: &mut Vec<JournalEntry>,
3331 subledger: &SubledgerSnapshot,
3332 stats: &mut EnhancedGenerationStatistics,
3333 ) -> SynthResult<()> {
3334 if !self.phase_config.generate_period_close || entries.is_empty() {
3335 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3336 return Ok(());
3337 }
3338
3339 info!("Phase 10b: Generating period-close journal entries");
3340
3341 use datasynth_core::accounts::{
3342 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3343 };
3344 use rust_decimal::Decimal;
3345
3346 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3347 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3348 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3349 let close_date = end_date - chrono::Days::new(1);
3351
3352 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3357 .config
3358 .companies
3359 .iter()
3360 .map(|c| c.code.clone())
3361 .collect();
3362
3363 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3365 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3366
3367 let period_months = self.config.global.period_months;
3371 for asset in &subledger.fa_records {
3372 use datasynth_core::models::subledger::fa::AssetStatus;
3374 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3375 continue;
3376 }
3377 let useful_life_months = asset.useful_life_months();
3378 if useful_life_months == 0 {
3379 continue;
3381 }
3382 let salvage_value = asset.salvage_value();
3383 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3384 if depreciable_base == Decimal::ZERO {
3385 continue;
3386 }
3387 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3388 * Decimal::from(period_months))
3389 .round_dp(2);
3390 if period_depr <= Decimal::ZERO {
3391 continue;
3392 }
3393
3394 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3395 depr_header.document_type = "CL".to_string();
3396 depr_header.header_text = Some(format!(
3397 "Depreciation - {} {}",
3398 asset.asset_number, asset.description
3399 ));
3400 depr_header.created_by = "CLOSE_ENGINE".to_string();
3401 depr_header.source = TransactionSource::Automated;
3402 depr_header.business_process = Some(BusinessProcess::R2R);
3403
3404 let doc_id = depr_header.document_id;
3405 let mut depr_je = JournalEntry::new(depr_header);
3406
3407 depr_je.add_line(JournalEntryLine::debit(
3409 doc_id,
3410 1,
3411 expense_accounts::DEPRECIATION.to_string(),
3412 period_depr,
3413 ));
3414 depr_je.add_line(JournalEntryLine::credit(
3416 doc_id,
3417 2,
3418 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3419 period_depr,
3420 ));
3421
3422 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3423 close_jes.push(depr_je);
3424 }
3425
3426 if !subledger.fa_records.is_empty() {
3427 debug!(
3428 "Generated {} depreciation JEs from {} FA records",
3429 close_jes.len(),
3430 subledger.fa_records.len()
3431 );
3432 }
3433
3434 {
3438 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3439 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3440
3441 let accrual_items: &[(&str, &str, &str)] = &[
3443 ("Accrued Utilities", "6200", "2100"),
3444 ("Accrued Rent", "6300", "2100"),
3445 ("Accrued Interest", "6100", "2150"),
3446 ];
3447
3448 for company_code in &company_codes {
3449 let company_revenue: Decimal = entries
3451 .iter()
3452 .filter(|e| e.header.company_code == *company_code)
3453 .flat_map(|e| e.lines.iter())
3454 .filter(|l| l.gl_account.starts_with('4'))
3455 .map(|l| l.credit_amount - l.debit_amount)
3456 .fold(Decimal::ZERO, |acc, v| acc + v);
3457
3458 if company_revenue <= Decimal::ZERO {
3459 continue;
3460 }
3461
3462 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3464 if accrual_base <= Decimal::ZERO {
3465 continue;
3466 }
3467
3468 for (description, expense_acct, liability_acct) in accrual_items {
3469 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3470 company_code,
3471 description,
3472 accrual_base,
3473 expense_acct,
3474 liability_acct,
3475 close_date,
3476 None,
3477 );
3478 close_jes.push(accrual_je);
3479 if let Some(rev_je) = reversal_je {
3480 close_jes.push(rev_je);
3481 }
3482 }
3483 }
3484
3485 debug!(
3486 "Generated accrual entries for {} companies",
3487 company_codes.len()
3488 );
3489 }
3490
3491 for company_code in &company_codes {
3492 let mut total_revenue = Decimal::ZERO;
3497 let mut total_expenses = Decimal::ZERO;
3498
3499 for entry in entries.iter() {
3500 if entry.header.company_code != *company_code {
3501 continue;
3502 }
3503 for line in &entry.lines {
3504 let category = AccountCategory::from_account(&line.gl_account);
3505 match category {
3506 AccountCategory::Revenue => {
3507 total_revenue += line.credit_amount - line.debit_amount;
3509 }
3510 AccountCategory::Cogs
3511 | AccountCategory::OperatingExpense
3512 | AccountCategory::OtherIncomeExpense
3513 | AccountCategory::Tax => {
3514 total_expenses += line.debit_amount - line.credit_amount;
3516 }
3517 _ => {}
3518 }
3519 }
3520 }
3521
3522 let pre_tax_income = total_revenue - total_expenses;
3523
3524 if pre_tax_income == Decimal::ZERO {
3526 debug!(
3527 "Company {}: no pre-tax income, skipping period close",
3528 company_code
3529 );
3530 continue;
3531 }
3532
3533 if pre_tax_income > Decimal::ZERO {
3535 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3537
3538 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3539 tax_header.document_type = "CL".to_string();
3540 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3541 tax_header.created_by = "CLOSE_ENGINE".to_string();
3542 tax_header.source = TransactionSource::Automated;
3543 tax_header.business_process = Some(BusinessProcess::R2R);
3544
3545 let doc_id = tax_header.document_id;
3546 let mut tax_je = JournalEntry::new(tax_header);
3547
3548 tax_je.add_line(JournalEntryLine::debit(
3550 doc_id,
3551 1,
3552 tax_accounts::TAX_EXPENSE.to_string(),
3553 tax_amount,
3554 ));
3555 tax_je.add_line(JournalEntryLine::credit(
3557 doc_id,
3558 2,
3559 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3560 tax_amount,
3561 ));
3562
3563 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3564 close_jes.push(tax_je);
3565 } else {
3566 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3569 if dta_amount > Decimal::ZERO {
3570 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3571 dta_header.document_type = "CL".to_string();
3572 dta_header.header_text =
3573 Some(format!("Deferred tax asset (DTA) - {}", company_code));
3574 dta_header.created_by = "CLOSE_ENGINE".to_string();
3575 dta_header.source = TransactionSource::Automated;
3576 dta_header.business_process = Some(BusinessProcess::R2R);
3577
3578 let doc_id = dta_header.document_id;
3579 let mut dta_je = JournalEntry::new(dta_header);
3580
3581 dta_je.add_line(JournalEntryLine::debit(
3583 doc_id,
3584 1,
3585 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3586 dta_amount,
3587 ));
3588 dta_je.add_line(JournalEntryLine::credit(
3591 doc_id,
3592 2,
3593 tax_accounts::TAX_EXPENSE.to_string(),
3594 dta_amount,
3595 ));
3596
3597 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3598 close_jes.push(dta_je);
3599 debug!(
3600 "Company {}: loss year — recognised DTA of {}",
3601 company_code, dta_amount
3602 );
3603 }
3604 }
3605
3606 let tax_provision = if pre_tax_income > Decimal::ZERO {
3612 (pre_tax_income * tax_rate).round_dp(2)
3613 } else {
3614 Decimal::ZERO
3615 };
3616 let net_income = pre_tax_income - tax_provision;
3617
3618 if net_income > Decimal::ZERO {
3619 use datasynth_generators::DividendGenerator;
3620 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
3622 let currency_str = self
3623 .config
3624 .companies
3625 .iter()
3626 .find(|c| c.code == *company_code)
3627 .map(|c| c.currency.as_str())
3628 .unwrap_or("USD");
3629 let div_result = div_gen.generate(
3630 company_code,
3631 close_date,
3632 Decimal::new(1, 0), dividend_amount,
3634 currency_str,
3635 );
3636 let div_je_count = div_result.journal_entries.len();
3637 close_jes.extend(div_result.journal_entries);
3638 debug!(
3639 "Company {}: declared dividend of {} ({} JEs)",
3640 company_code, dividend_amount, div_je_count
3641 );
3642 }
3643
3644 if net_income != Decimal::ZERO {
3649 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3650 close_header.document_type = "CL".to_string();
3651 close_header.header_text =
3652 Some(format!("Income statement close - {}", company_code));
3653 close_header.created_by = "CLOSE_ENGINE".to_string();
3654 close_header.source = TransactionSource::Automated;
3655 close_header.business_process = Some(BusinessProcess::R2R);
3656
3657 let doc_id = close_header.document_id;
3658 let mut close_je = JournalEntry::new(close_header);
3659
3660 let abs_net_income = net_income.abs();
3661
3662 if net_income > Decimal::ZERO {
3663 close_je.add_line(JournalEntryLine::debit(
3665 doc_id,
3666 1,
3667 equity_accounts::INCOME_SUMMARY.to_string(),
3668 abs_net_income,
3669 ));
3670 close_je.add_line(JournalEntryLine::credit(
3671 doc_id,
3672 2,
3673 equity_accounts::RETAINED_EARNINGS.to_string(),
3674 abs_net_income,
3675 ));
3676 } else {
3677 close_je.add_line(JournalEntryLine::debit(
3679 doc_id,
3680 1,
3681 equity_accounts::RETAINED_EARNINGS.to_string(),
3682 abs_net_income,
3683 ));
3684 close_je.add_line(JournalEntryLine::credit(
3685 doc_id,
3686 2,
3687 equity_accounts::INCOME_SUMMARY.to_string(),
3688 abs_net_income,
3689 ));
3690 }
3691
3692 debug_assert!(
3693 close_je.is_balanced(),
3694 "Income statement closing JE must be balanced"
3695 );
3696 close_jes.push(close_je);
3697 }
3698 }
3699
3700 let close_count = close_jes.len();
3701 if close_count > 0 {
3702 info!("Generated {} period-close journal entries", close_count);
3703 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3704 entries.extend(close_jes);
3705 stats.period_close_je_count = close_count;
3706
3707 stats.total_entries = entries.len() as u64;
3709 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3710 } else {
3711 debug!("No period-close entries generated (no income statement activity)");
3712 }
3713
3714 Ok(())
3715 }
3716
3717 fn phase_audit_data(
3719 &mut self,
3720 entries: &[JournalEntry],
3721 stats: &mut EnhancedGenerationStatistics,
3722 ) -> SynthResult<AuditSnapshot> {
3723 if self.phase_config.generate_audit {
3724 info!("Phase 8: Generating Audit Data");
3725 let audit_snapshot = self.generate_audit_data(entries)?;
3726 stats.audit_engagement_count = audit_snapshot.engagements.len();
3727 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3728 stats.audit_evidence_count = audit_snapshot.evidence.len();
3729 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3730 stats.audit_finding_count = audit_snapshot.findings.len();
3731 stats.audit_judgment_count = audit_snapshot.judgments.len();
3732 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3733 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3734 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3735 stats.audit_sample_count = audit_snapshot.samples.len();
3736 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3737 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3738 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3739 stats.audit_related_party_count = audit_snapshot.related_parties.len();
3740 stats.audit_related_party_transaction_count =
3741 audit_snapshot.related_party_transactions.len();
3742 info!(
3743 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3744 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3745 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3746 {} RP transactions",
3747 stats.audit_engagement_count,
3748 stats.audit_workpaper_count,
3749 stats.audit_evidence_count,
3750 stats.audit_risk_count,
3751 stats.audit_finding_count,
3752 stats.audit_judgment_count,
3753 stats.audit_confirmation_count,
3754 stats.audit_procedure_step_count,
3755 stats.audit_sample_count,
3756 stats.audit_analytical_result_count,
3757 stats.audit_ia_function_count,
3758 stats.audit_ia_report_count,
3759 stats.audit_related_party_count,
3760 stats.audit_related_party_transaction_count,
3761 );
3762 self.check_resources_with_log("post-audit")?;
3763 Ok(audit_snapshot)
3764 } else {
3765 debug!("Phase 8: Skipped (audit generation disabled)");
3766 Ok(AuditSnapshot::default())
3767 }
3768 }
3769
3770 fn phase_banking_data(
3772 &mut self,
3773 stats: &mut EnhancedGenerationStatistics,
3774 ) -> SynthResult<BankingSnapshot> {
3775 if self.phase_config.generate_banking {
3776 info!("Phase 9: Generating Banking KYC/AML Data");
3777 let banking_snapshot = self.generate_banking_data()?;
3778 stats.banking_customer_count = banking_snapshot.customers.len();
3779 stats.banking_account_count = banking_snapshot.accounts.len();
3780 stats.banking_transaction_count = banking_snapshot.transactions.len();
3781 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3782 info!(
3783 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3784 stats.banking_customer_count, stats.banking_account_count,
3785 stats.banking_transaction_count, stats.banking_suspicious_count
3786 );
3787 self.check_resources_with_log("post-banking")?;
3788 Ok(banking_snapshot)
3789 } else {
3790 debug!("Phase 9: Skipped (banking generation disabled)");
3791 Ok(BankingSnapshot::default())
3792 }
3793 }
3794
3795 fn phase_graph_export(
3797 &mut self,
3798 entries: &[JournalEntry],
3799 coa: &Arc<ChartOfAccounts>,
3800 stats: &mut EnhancedGenerationStatistics,
3801 ) -> SynthResult<GraphExportSnapshot> {
3802 if self.phase_config.generate_graph_export && !entries.is_empty() {
3803 info!("Phase 10: Exporting Accounting Network Graphs");
3804 match self.export_graphs(entries, coa, stats) {
3805 Ok(snapshot) => {
3806 info!(
3807 "Graph export complete: {} graphs ({} nodes, {} edges)",
3808 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3809 );
3810 Ok(snapshot)
3811 }
3812 Err(e) => {
3813 warn!("Phase 10: Graph export failed: {}", e);
3814 Ok(GraphExportSnapshot::default())
3815 }
3816 }
3817 } else {
3818 debug!("Phase 10: Skipped (graph export disabled or no entries)");
3819 Ok(GraphExportSnapshot::default())
3820 }
3821 }
3822
3823 #[allow(clippy::too_many_arguments)]
3825 fn phase_hypergraph_export(
3826 &self,
3827 coa: &Arc<ChartOfAccounts>,
3828 entries: &[JournalEntry],
3829 document_flows: &DocumentFlowSnapshot,
3830 sourcing: &SourcingSnapshot,
3831 hr: &HrSnapshot,
3832 manufacturing: &ManufacturingSnapshot,
3833 banking: &BankingSnapshot,
3834 audit: &AuditSnapshot,
3835 financial_reporting: &FinancialReportingSnapshot,
3836 ocpm: &OcpmSnapshot,
3837 compliance: &ComplianceRegulationsSnapshot,
3838 stats: &mut EnhancedGenerationStatistics,
3839 ) -> SynthResult<()> {
3840 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3841 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3842 match self.export_hypergraph(
3843 coa,
3844 entries,
3845 document_flows,
3846 sourcing,
3847 hr,
3848 manufacturing,
3849 banking,
3850 audit,
3851 financial_reporting,
3852 ocpm,
3853 compliance,
3854 stats,
3855 ) {
3856 Ok(info) => {
3857 info!(
3858 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3859 info.node_count, info.edge_count, info.hyperedge_count
3860 );
3861 }
3862 Err(e) => {
3863 warn!("Phase 10b: Hypergraph export failed: {}", e);
3864 }
3865 }
3866 } else {
3867 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3868 }
3869 Ok(())
3870 }
3871
3872 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3878 if !self.config.llm.enabled {
3879 debug!("Phase 11: Skipped (LLM enrichment disabled)");
3880 return;
3881 }
3882
3883 info!("Phase 11: Starting LLM Enrichment");
3884 let start = std::time::Instant::now();
3885
3886 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3887 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
3890 let schema_provider = &self.config.llm.provider;
3891 let api_key_env = match schema_provider.as_str() {
3892 "openai" => Some("OPENAI_API_KEY"),
3893 "anthropic" => Some("ANTHROPIC_API_KEY"),
3894 "custom" => Some("LLM_API_KEY"),
3895 _ => None,
3896 };
3897 if let Some(key_env) = api_key_env {
3898 if std::env::var(key_env).is_ok() {
3899 let llm_config = datasynth_core::llm::LlmConfig {
3900 model: self.config.llm.model.clone(),
3901 api_key_env: key_env.to_string(),
3902 ..datasynth_core::llm::LlmConfig::default()
3903 };
3904 match HttpLlmProvider::new(llm_config) {
3905 Ok(p) => Arc::new(p),
3906 Err(e) => {
3907 warn!(
3908 "Failed to create HttpLlmProvider: {}; falling back to mock",
3909 e
3910 );
3911 Arc::new(MockLlmProvider::new(self.seed))
3912 }
3913 }
3914 } else {
3915 Arc::new(MockLlmProvider::new(self.seed))
3916 }
3917 } else {
3918 Arc::new(MockLlmProvider::new(self.seed))
3919 }
3920 };
3921 let enricher = VendorLlmEnricher::new(provider);
3922
3923 let industry = format!("{:?}", self.config.global.industry);
3924 let max_enrichments = self
3925 .config
3926 .llm
3927 .max_vendor_enrichments
3928 .min(self.master_data.vendors.len());
3929
3930 let mut enriched_count = 0usize;
3931 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
3932 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
3933 Ok(name) => {
3934 vendor.name = name;
3935 enriched_count += 1;
3936 }
3937 Err(e) => {
3938 warn!(
3939 "LLM vendor enrichment failed for {}: {}",
3940 vendor.vendor_id, e
3941 );
3942 }
3943 }
3944 }
3945
3946 enriched_count
3947 }));
3948
3949 match result {
3950 Ok(enriched_count) => {
3951 stats.llm_vendors_enriched = enriched_count;
3952 let elapsed = start.elapsed();
3953 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3954 info!(
3955 "Phase 11 complete: {} vendors enriched in {}ms",
3956 enriched_count, stats.llm_enrichment_ms
3957 );
3958 }
3959 Err(_) => {
3960 let elapsed = start.elapsed();
3961 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3962 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
3963 }
3964 }
3965 }
3966
3967 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
3973 if !self.config.diffusion.enabled {
3974 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
3975 return;
3976 }
3977
3978 info!("Phase 12: Starting Diffusion Enhancement");
3979 let start = std::time::Instant::now();
3980
3981 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3982 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
3985
3986 let diffusion_config = DiffusionConfig {
3987 n_steps: self.config.diffusion.n_steps,
3988 seed: self.seed,
3989 ..Default::default()
3990 };
3991
3992 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
3993
3994 let n_samples = self.config.diffusion.sample_size;
3995 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
3997
3998 samples.len()
3999 }));
4000
4001 match result {
4002 Ok(sample_count) => {
4003 stats.diffusion_samples_generated = sample_count;
4004 let elapsed = start.elapsed();
4005 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4006 info!(
4007 "Phase 12 complete: {} diffusion samples generated in {}ms",
4008 sample_count, stats.diffusion_enhancement_ms
4009 );
4010 }
4011 Err(_) => {
4012 let elapsed = start.elapsed();
4013 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4014 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4015 }
4016 }
4017 }
4018
4019 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4026 if !self.config.causal.enabled {
4027 debug!("Phase 13: Skipped (causal generation disabled)");
4028 return;
4029 }
4030
4031 info!("Phase 13: Starting Causal Overlay");
4032 let start = std::time::Instant::now();
4033
4034 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4035 let graph = match self.config.causal.template.as_str() {
4037 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4038 _ => CausalGraph::fraud_detection_template(),
4039 };
4040
4041 let scm = StructuralCausalModel::new(graph.clone())
4042 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4043
4044 let n_samples = self.config.causal.sample_size;
4045 let samples = scm
4046 .generate(n_samples, self.seed)
4047 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4048
4049 let validation_passed = if self.config.causal.validate {
4051 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4052 if report.valid {
4053 info!(
4054 "Causal validation passed: all {} checks OK",
4055 report.checks.len()
4056 );
4057 } else {
4058 warn!(
4059 "Causal validation: {} violations detected: {:?}",
4060 report.violations.len(),
4061 report.violations
4062 );
4063 }
4064 Some(report.valid)
4065 } else {
4066 None
4067 };
4068
4069 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4070 }));
4071
4072 match result {
4073 Ok(Ok((sample_count, validation_passed))) => {
4074 stats.causal_samples_generated = sample_count;
4075 stats.causal_validation_passed = validation_passed;
4076 let elapsed = start.elapsed();
4077 stats.causal_generation_ms = elapsed.as_millis() as u64;
4078 info!(
4079 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4080 sample_count, stats.causal_generation_ms, validation_passed,
4081 );
4082 }
4083 Ok(Err(e)) => {
4084 let elapsed = start.elapsed();
4085 stats.causal_generation_ms = elapsed.as_millis() as u64;
4086 warn!("Phase 13: Causal generation failed: {}", e);
4087 }
4088 Err(_) => {
4089 let elapsed = start.elapsed();
4090 stats.causal_generation_ms = elapsed.as_millis() as u64;
4091 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4092 }
4093 }
4094 }
4095
4096 fn phase_sourcing_data(
4098 &mut self,
4099 stats: &mut EnhancedGenerationStatistics,
4100 ) -> SynthResult<SourcingSnapshot> {
4101 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4102 debug!("Phase 14: Skipped (sourcing generation disabled)");
4103 return Ok(SourcingSnapshot::default());
4104 }
4105 let degradation = self.check_resources()?;
4106 if degradation >= DegradationLevel::Reduced {
4107 debug!(
4108 "Phase skipped due to resource pressure (degradation: {:?})",
4109 degradation
4110 );
4111 return Ok(SourcingSnapshot::default());
4112 }
4113
4114 info!("Phase 14: Generating S2C Sourcing Data");
4115 let seed = self.seed;
4116
4117 let vendor_ids: Vec<String> = self
4119 .master_data
4120 .vendors
4121 .iter()
4122 .map(|v| v.vendor_id.clone())
4123 .collect();
4124 if vendor_ids.is_empty() {
4125 debug!("Phase 14: Skipped (no vendors available)");
4126 return Ok(SourcingSnapshot::default());
4127 }
4128
4129 let categories: Vec<(String, String)> = vec![
4130 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4131 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4132 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4133 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4134 ("CAT-LOG".to_string(), "Logistics".to_string()),
4135 ];
4136 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4137 .iter()
4138 .map(|(id, name)| {
4139 (
4140 id.clone(),
4141 name.clone(),
4142 rust_decimal::Decimal::from(100_000),
4143 )
4144 })
4145 .collect();
4146
4147 let company_code = self
4148 .config
4149 .companies
4150 .first()
4151 .map(|c| c.code.as_str())
4152 .unwrap_or("1000");
4153 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4154 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4155 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4156 let fiscal_year = start_date.year() as u16;
4157 let owner_ids: Vec<String> = self
4158 .master_data
4159 .employees
4160 .iter()
4161 .take(5)
4162 .map(|e| e.employee_id.clone())
4163 .collect();
4164 let owner_id = owner_ids
4165 .first()
4166 .map(std::string::String::as_str)
4167 .unwrap_or("BUYER-001");
4168
4169 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4171 let spend_analyses =
4172 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4173
4174 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4176 let sourcing_projects = if owner_ids.is_empty() {
4177 Vec::new()
4178 } else {
4179 project_gen.generate(
4180 company_code,
4181 &categories_with_spend,
4182 &owner_ids,
4183 start_date,
4184 self.config.global.period_months,
4185 )
4186 };
4187 stats.sourcing_project_count = sourcing_projects.len();
4188
4189 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4191 let mut qual_gen = QualificationGenerator::new(seed + 2);
4192 let qualifications = qual_gen.generate(
4193 company_code,
4194 &qual_vendor_ids,
4195 sourcing_projects.first().map(|p| p.project_id.as_str()),
4196 owner_id,
4197 start_date,
4198 );
4199
4200 let mut rfx_gen = RfxGenerator::new(seed + 3);
4202 let rfx_events: Vec<RfxEvent> = sourcing_projects
4203 .iter()
4204 .map(|proj| {
4205 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4206 rfx_gen.generate(
4207 company_code,
4208 &proj.project_id,
4209 &proj.category_id,
4210 &qualified_vids,
4211 owner_id,
4212 start_date,
4213 50000.0,
4214 )
4215 })
4216 .collect();
4217 stats.rfx_event_count = rfx_events.len();
4218
4219 let mut bid_gen = BidGenerator::new(seed + 4);
4221 let mut all_bids = Vec::new();
4222 for rfx in &rfx_events {
4223 let bidder_count = vendor_ids.len().clamp(2, 5);
4224 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4225 let bids = bid_gen.generate(rfx, &responding, start_date);
4226 all_bids.extend(bids);
4227 }
4228 stats.bid_count = all_bids.len();
4229
4230 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4232 let bid_evaluations: Vec<BidEvaluation> = rfx_events
4233 .iter()
4234 .map(|rfx| {
4235 let rfx_bids: Vec<SupplierBid> = all_bids
4236 .iter()
4237 .filter(|b| b.rfx_id == rfx.rfx_id)
4238 .cloned()
4239 .collect();
4240 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4241 })
4242 .collect();
4243
4244 let mut contract_gen = ContractGenerator::new(seed + 6);
4246 let contracts: Vec<ProcurementContract> = bid_evaluations
4247 .iter()
4248 .zip(rfx_events.iter())
4249 .filter_map(|(eval, rfx)| {
4250 eval.ranked_bids.first().and_then(|winner| {
4251 all_bids
4252 .iter()
4253 .find(|b| b.bid_id == winner.bid_id)
4254 .map(|winning_bid| {
4255 contract_gen.generate_from_bid(
4256 winning_bid,
4257 Some(&rfx.sourcing_project_id),
4258 &rfx.category_id,
4259 owner_id,
4260 start_date,
4261 )
4262 })
4263 })
4264 })
4265 .collect();
4266 stats.contract_count = contracts.len();
4267
4268 let mut catalog_gen = CatalogGenerator::new(seed + 7);
4270 let catalog_items = catalog_gen.generate(&contracts);
4271 stats.catalog_item_count = catalog_items.len();
4272
4273 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4275 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4276 .iter()
4277 .fold(
4278 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4279 |mut acc, c| {
4280 acc.entry(c.vendor_id.clone()).or_default().push(c);
4281 acc
4282 },
4283 )
4284 .into_iter()
4285 .collect();
4286 let scorecards = scorecard_gen.generate(
4287 company_code,
4288 &vendor_contracts,
4289 start_date,
4290 end_date,
4291 owner_id,
4292 );
4293 stats.scorecard_count = scorecards.len();
4294
4295 let mut sourcing_projects = sourcing_projects;
4298 for project in &mut sourcing_projects {
4299 project.rfx_ids = rfx_events
4301 .iter()
4302 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4303 .map(|rfx| rfx.rfx_id.clone())
4304 .collect();
4305
4306 project.contract_id = contracts
4308 .iter()
4309 .find(|c| {
4310 c.sourcing_project_id
4311 .as_deref()
4312 .is_some_and(|sp| sp == project.project_id)
4313 })
4314 .map(|c| c.contract_id.clone());
4315
4316 project.spend_analysis_id = spend_analyses
4318 .iter()
4319 .find(|sa| sa.category_id == project.category_id)
4320 .map(|sa| sa.category_id.clone());
4321 }
4322
4323 info!(
4324 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4325 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4326 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4327 );
4328 self.check_resources_with_log("post-sourcing")?;
4329
4330 Ok(SourcingSnapshot {
4331 spend_analyses,
4332 sourcing_projects,
4333 qualifications,
4334 rfx_events,
4335 bids: all_bids,
4336 bid_evaluations,
4337 contracts,
4338 catalog_items,
4339 scorecards,
4340 })
4341 }
4342
4343 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4349 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4350
4351 let parent_code = self
4352 .config
4353 .companies
4354 .first()
4355 .map(|c| c.code.clone())
4356 .unwrap_or_else(|| "PARENT".to_string());
4357
4358 let mut group = GroupStructure::new(parent_code);
4359
4360 for company in self.config.companies.iter().skip(1) {
4361 let sub =
4362 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4363 group.add_subsidiary(sub);
4364 }
4365
4366 group
4367 }
4368
4369 fn phase_intercompany(
4371 &mut self,
4372 journal_entries: &[JournalEntry],
4373 stats: &mut EnhancedGenerationStatistics,
4374 ) -> SynthResult<IntercompanySnapshot> {
4375 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4377 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4378 return Ok(IntercompanySnapshot::default());
4379 }
4380
4381 if self.config.companies.len() < 2 {
4383 debug!(
4384 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4385 self.config.companies.len()
4386 );
4387 return Ok(IntercompanySnapshot::default());
4388 }
4389
4390 info!("Phase 14b: Generating Intercompany Transactions");
4391
4392 let group_structure = self.build_group_structure();
4395 debug!(
4396 "Group structure built: parent={}, subsidiaries={}",
4397 group_structure.parent_entity,
4398 group_structure.subsidiaries.len()
4399 );
4400
4401 let seed = self.seed;
4402 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4403 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4404 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4405
4406 let parent_code = self.config.companies[0].code.clone();
4409 let mut ownership_structure =
4410 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4411
4412 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4413 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4414 format!("REL{:03}", i + 1),
4415 parent_code.clone(),
4416 company.code.clone(),
4417 rust_decimal::Decimal::from(100), start_date,
4419 );
4420 ownership_structure.add_relationship(relationship);
4421 }
4422
4423 let tp_method = match self.config.intercompany.transfer_pricing_method {
4425 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4426 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4427 }
4428 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4429 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4430 }
4431 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4432 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4433 }
4434 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4435 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4436 }
4437 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4438 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4439 }
4440 };
4441
4442 let ic_currency = self
4444 .config
4445 .companies
4446 .first()
4447 .map(|c| c.currency.clone())
4448 .unwrap_or_else(|| "USD".to_string());
4449 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4450 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4451 transfer_pricing_method: tp_method,
4452 markup_percent: rust_decimal::Decimal::from_f64_retain(
4453 self.config.intercompany.markup_percent,
4454 )
4455 .unwrap_or(rust_decimal::Decimal::from(5)),
4456 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4457 default_currency: ic_currency,
4458 ..Default::default()
4459 };
4460
4461 let mut ic_generator = datasynth_generators::ICGenerator::new(
4463 ic_gen_config,
4464 ownership_structure.clone(),
4465 seed + 50,
4466 );
4467
4468 let transactions_per_day = 3;
4471 let matched_pairs = ic_generator.generate_transactions_for_period(
4472 start_date,
4473 end_date,
4474 transactions_per_day,
4475 );
4476
4477 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4479 debug!(
4480 "Generated {} IC seller invoices, {} IC buyer POs",
4481 ic_doc_chains.seller_invoices.len(),
4482 ic_doc_chains.buyer_orders.len()
4483 );
4484
4485 let mut seller_entries = Vec::new();
4487 let mut buyer_entries = Vec::new();
4488 let fiscal_year = start_date.year();
4489
4490 for pair in &matched_pairs {
4491 let fiscal_period = pair.posting_date.month();
4492 let (seller_je, buyer_je) =
4493 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4494 seller_entries.push(seller_je);
4495 buyer_entries.push(buyer_je);
4496 }
4497
4498 let matching_config = datasynth_generators::ICMatchingConfig {
4500 base_currency: self
4501 .config
4502 .companies
4503 .first()
4504 .map(|c| c.currency.clone())
4505 .unwrap_or_else(|| "USD".to_string()),
4506 ..Default::default()
4507 };
4508 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4509 matching_engine.load_matched_pairs(&matched_pairs);
4510 let matching_result = matching_engine.run_matching(end_date);
4511
4512 let mut elimination_entries = Vec::new();
4514 if self.config.intercompany.generate_eliminations {
4515 let elim_config = datasynth_generators::EliminationConfig {
4516 consolidation_entity: "GROUP".to_string(),
4517 base_currency: self
4518 .config
4519 .companies
4520 .first()
4521 .map(|c| c.currency.clone())
4522 .unwrap_or_else(|| "USD".to_string()),
4523 ..Default::default()
4524 };
4525
4526 let mut elim_generator =
4527 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4528
4529 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4530 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4531 matching_result
4532 .matched_balances
4533 .iter()
4534 .chain(matching_result.unmatched_balances.iter())
4535 .cloned()
4536 .collect();
4537
4538 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4550 std::collections::HashMap::new();
4551 let mut equity_amounts: std::collections::HashMap<
4552 String,
4553 std::collections::HashMap<String, rust_decimal::Decimal>,
4554 > = std::collections::HashMap::new();
4555 {
4556 use rust_decimal::Decimal;
4557 let hundred = Decimal::from(100u32);
4558 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
4562 for sub in &group_structure.subsidiaries {
4563 let net_assets = {
4564 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4565 if na > Decimal::ZERO {
4566 na
4567 } else {
4568 Decimal::from(1_000_000u64)
4569 }
4570 };
4571 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4573 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4574
4575 let mut eq_map = std::collections::HashMap::new();
4578 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4579 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4580 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4581 equity_amounts.insert(sub.entity_code.clone(), eq_map);
4582 }
4583 }
4584
4585 let journal = elim_generator.generate_eliminations(
4586 &fiscal_period,
4587 end_date,
4588 &all_balances,
4589 &matched_pairs,
4590 &investment_amounts,
4591 &equity_amounts,
4592 );
4593
4594 elimination_entries = journal.entries.clone();
4595 }
4596
4597 let matched_pair_count = matched_pairs.len();
4598 let elimination_entry_count = elimination_entries.len();
4599 let match_rate = matching_result.match_rate;
4600
4601 stats.ic_matched_pair_count = matched_pair_count;
4602 stats.ic_elimination_count = elimination_entry_count;
4603 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4604
4605 info!(
4606 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4607 matched_pair_count,
4608 stats.ic_transaction_count,
4609 seller_entries.len(),
4610 buyer_entries.len(),
4611 elimination_entry_count,
4612 match_rate * 100.0
4613 );
4614 self.check_resources_with_log("post-intercompany")?;
4615
4616 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4620 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4621 use rust_decimal::Decimal;
4622
4623 let eight_pct = Decimal::new(8, 2); group_structure
4626 .subsidiaries
4627 .iter()
4628 .filter(|sub| {
4629 sub.nci_percentage > Decimal::ZERO
4630 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4631 })
4632 .map(|sub| {
4633 let net_assets_from_jes =
4637 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4638
4639 let net_assets = if net_assets_from_jes > Decimal::ZERO {
4640 net_assets_from_jes.round_dp(2)
4641 } else {
4642 Decimal::from(1_000_000u64)
4644 };
4645
4646 let net_income = (net_assets * eight_pct).round_dp(2);
4648
4649 NciMeasurement::compute(
4650 sub.entity_code.clone(),
4651 sub.nci_percentage,
4652 net_assets,
4653 net_income,
4654 )
4655 })
4656 .collect()
4657 };
4658
4659 if !nci_measurements.is_empty() {
4660 info!(
4661 "NCI measurements: {} subsidiaries with non-controlling interests",
4662 nci_measurements.len()
4663 );
4664 }
4665
4666 Ok(IntercompanySnapshot {
4667 group_structure: Some(group_structure),
4668 matched_pairs,
4669 seller_journal_entries: seller_entries,
4670 buyer_journal_entries: buyer_entries,
4671 elimination_entries,
4672 nci_measurements,
4673 ic_document_chains: Some(ic_doc_chains),
4674 matched_pair_count,
4675 elimination_entry_count,
4676 match_rate,
4677 })
4678 }
4679
4680 fn phase_financial_reporting(
4682 &mut self,
4683 document_flows: &DocumentFlowSnapshot,
4684 journal_entries: &[JournalEntry],
4685 coa: &Arc<ChartOfAccounts>,
4686 _hr: &HrSnapshot,
4687 _audit: &AuditSnapshot,
4688 stats: &mut EnhancedGenerationStatistics,
4689 ) -> SynthResult<FinancialReportingSnapshot> {
4690 let fs_enabled = self.phase_config.generate_financial_statements
4691 || self.config.financial_reporting.enabled;
4692 let br_enabled = self.phase_config.generate_bank_reconciliation;
4693
4694 if !fs_enabled && !br_enabled {
4695 debug!("Phase 15: Skipped (financial reporting disabled)");
4696 return Ok(FinancialReportingSnapshot::default());
4697 }
4698
4699 info!("Phase 15: Generating Financial Reporting Data");
4700
4701 let seed = self.seed;
4702 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4703 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4704
4705 let mut financial_statements = Vec::new();
4706 let mut bank_reconciliations = Vec::new();
4707 let mut trial_balances = Vec::new();
4708 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4709 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4710 Vec::new();
4711 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4713 std::collections::HashMap::new();
4714 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4716 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4718
4719 if fs_enabled {
4727 let has_journal_entries = !journal_entries.is_empty();
4728
4729 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4732 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4734
4735 let elimination_entries: Vec<&JournalEntry> = journal_entries
4737 .iter()
4738 .filter(|je| je.header.is_elimination)
4739 .collect();
4740
4741 for period in 0..self.config.global.period_months {
4743 let period_start = start_date + chrono::Months::new(period);
4744 let period_end =
4745 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4746 let fiscal_year = period_end.year() as u16;
4747 let fiscal_period = period_end.month() as u8;
4748 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4749
4750 let mut entity_tb_map: std::collections::HashMap<
4753 String,
4754 std::collections::HashMap<String, rust_decimal::Decimal>,
4755 > = std::collections::HashMap::new();
4756
4757 for (company_idx, company) in self.config.companies.iter().enumerate() {
4759 let company_code = company.code.as_str();
4760 let currency = company.currency.as_str();
4761 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4764 let mut company_fs_gen =
4765 FinancialStatementGenerator::new(seed + company_seed_offset);
4766
4767 if has_journal_entries {
4768 let tb_entries = Self::build_cumulative_trial_balance(
4769 journal_entries,
4770 coa,
4771 company_code,
4772 start_date,
4773 period_end,
4774 fiscal_year,
4775 fiscal_period,
4776 );
4777
4778 let entity_cat_map =
4780 entity_tb_map.entry(company_code.to_string()).or_default();
4781 for tb_entry in &tb_entries {
4782 let net = tb_entry.debit_balance - tb_entry.credit_balance;
4783 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4784 }
4785
4786 let stmts = company_fs_gen.generate(
4787 company_code,
4788 currency,
4789 &tb_entries,
4790 period_start,
4791 period_end,
4792 fiscal_year,
4793 fiscal_period,
4794 None,
4795 "SYS-AUTOCLOSE",
4796 );
4797
4798 let mut entity_stmts = Vec::new();
4799 for stmt in stmts {
4800 if stmt.statement_type == StatementType::CashFlowStatement {
4801 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4802 let cf_items = Self::build_cash_flow_from_trial_balances(
4803 &tb_entries,
4804 None,
4805 net_income,
4806 );
4807 entity_stmts.push(FinancialStatement {
4808 cash_flow_items: cf_items,
4809 ..stmt
4810 });
4811 } else {
4812 entity_stmts.push(stmt);
4813 }
4814 }
4815
4816 financial_statements.extend(entity_stmts.clone());
4818
4819 standalone_statements
4821 .entry(company_code.to_string())
4822 .or_default()
4823 .extend(entity_stmts);
4824
4825 if company_idx == 0 {
4828 trial_balances.push(PeriodTrialBalance {
4829 fiscal_year,
4830 fiscal_period,
4831 period_start,
4832 period_end,
4833 entries: tb_entries,
4834 });
4835 }
4836 } else {
4837 let tb_entries = Self::build_trial_balance_from_entries(
4839 journal_entries,
4840 coa,
4841 company_code,
4842 fiscal_year,
4843 fiscal_period,
4844 );
4845
4846 let stmts = company_fs_gen.generate(
4847 company_code,
4848 currency,
4849 &tb_entries,
4850 period_start,
4851 period_end,
4852 fiscal_year,
4853 fiscal_period,
4854 None,
4855 "SYS-AUTOCLOSE",
4856 );
4857 financial_statements.extend(stmts.clone());
4858 standalone_statements
4859 .entry(company_code.to_string())
4860 .or_default()
4861 .extend(stmts);
4862
4863 if company_idx == 0 && !tb_entries.is_empty() {
4864 trial_balances.push(PeriodTrialBalance {
4865 fiscal_year,
4866 fiscal_period,
4867 period_start,
4868 period_end,
4869 entries: tb_entries,
4870 });
4871 }
4872 }
4873 }
4874
4875 let group_currency = self
4878 .config
4879 .companies
4880 .first()
4881 .map(|c| c.currency.as_str())
4882 .unwrap_or("USD");
4883
4884 let period_eliminations: Vec<JournalEntry> = elimination_entries
4886 .iter()
4887 .filter(|je| {
4888 je.header.fiscal_year == fiscal_year
4889 && je.header.fiscal_period == fiscal_period
4890 })
4891 .map(|je| (*je).clone())
4892 .collect();
4893
4894 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
4895 &entity_tb_map,
4896 &period_eliminations,
4897 &period_label,
4898 );
4899
4900 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
4903 .line_items
4904 .iter()
4905 .map(|li| {
4906 let net = li.post_elimination_total;
4907 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
4908 (net, rust_decimal::Decimal::ZERO)
4909 } else {
4910 (rust_decimal::Decimal::ZERO, -net)
4911 };
4912 datasynth_generators::TrialBalanceEntry {
4913 account_code: li.account_category.clone(),
4914 account_name: li.account_category.clone(),
4915 category: li.account_category.clone(),
4916 debit_balance: debit,
4917 credit_balance: credit,
4918 }
4919 })
4920 .collect();
4921
4922 let mut cons_stmts = cons_gen.generate(
4923 "GROUP",
4924 group_currency,
4925 &cons_tb,
4926 period_start,
4927 period_end,
4928 fiscal_year,
4929 fiscal_period,
4930 None,
4931 "SYS-AUTOCLOSE",
4932 );
4933
4934 let bs_categories: &[&str] = &[
4938 "CASH",
4939 "RECEIVABLES",
4940 "INVENTORY",
4941 "FIXEDASSETS",
4942 "PAYABLES",
4943 "ACCRUEDLIABILITIES",
4944 "LONGTERMDEBT",
4945 "EQUITY",
4946 ];
4947 let (bs_items, is_items): (Vec<_>, Vec<_>) =
4948 cons_line_items.into_iter().partition(|li| {
4949 let upper = li.label.to_uppercase();
4950 bs_categories.iter().any(|c| upper == *c)
4951 });
4952
4953 for stmt in &mut cons_stmts {
4954 stmt.is_consolidated = true;
4955 match stmt.statement_type {
4956 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
4957 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
4958 _ => {} }
4960 }
4961
4962 consolidated_statements.extend(cons_stmts);
4963 consolidation_schedules.push(schedule);
4964 }
4965
4966 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
4972 info!(
4973 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
4974 stats.financial_statement_count,
4975 consolidated_statements.len(),
4976 has_journal_entries
4977 );
4978
4979 let entity_seeds: Vec<SegmentSeed> = self
4984 .config
4985 .companies
4986 .iter()
4987 .map(|c| SegmentSeed {
4988 code: c.code.clone(),
4989 name: c.name.clone(),
4990 currency: c.currency.clone(),
4991 })
4992 .collect();
4993
4994 let mut seg_gen = SegmentGenerator::new(seed + 30);
4995
4996 for period in 0..self.config.global.period_months {
5001 let period_end =
5002 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5003 let fiscal_year = period_end.year() as u16;
5004 let fiscal_period = period_end.month() as u8;
5005 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5006
5007 use datasynth_core::models::StatementType;
5008
5009 let cons_is = consolidated_statements.iter().find(|s| {
5011 s.fiscal_year == fiscal_year
5012 && s.fiscal_period == fiscal_period
5013 && s.statement_type == StatementType::IncomeStatement
5014 });
5015 let cons_bs = consolidated_statements.iter().find(|s| {
5016 s.fiscal_year == fiscal_year
5017 && s.fiscal_period == fiscal_period
5018 && s.statement_type == StatementType::BalanceSheet
5019 });
5020
5021 let is_stmt = cons_is.or_else(|| {
5023 financial_statements.iter().find(|s| {
5024 s.fiscal_year == fiscal_year
5025 && s.fiscal_period == fiscal_period
5026 && s.statement_type == StatementType::IncomeStatement
5027 })
5028 });
5029 let bs_stmt = cons_bs.or_else(|| {
5030 financial_statements.iter().find(|s| {
5031 s.fiscal_year == fiscal_year
5032 && s.fiscal_period == fiscal_period
5033 && s.statement_type == StatementType::BalanceSheet
5034 })
5035 });
5036
5037 let consolidated_revenue = is_stmt
5038 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5039 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5041
5042 let consolidated_profit = is_stmt
5043 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5044 .map(|li| li.amount)
5045 .unwrap_or(rust_decimal::Decimal::ZERO);
5046
5047 let consolidated_assets = bs_stmt
5048 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5049 .map(|li| li.amount)
5050 .unwrap_or(rust_decimal::Decimal::ZERO);
5051
5052 if consolidated_revenue == rust_decimal::Decimal::ZERO
5054 && consolidated_assets == rust_decimal::Decimal::ZERO
5055 {
5056 continue;
5057 }
5058
5059 let group_code = self
5060 .config
5061 .companies
5062 .first()
5063 .map(|c| c.code.as_str())
5064 .unwrap_or("GROUP");
5065
5066 let total_depr: rust_decimal::Decimal = journal_entries
5069 .iter()
5070 .filter(|je| je.header.document_type == "CL")
5071 .flat_map(|je| je.lines.iter())
5072 .filter(|l| l.gl_account.starts_with("6000"))
5073 .map(|l| l.debit_amount)
5074 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5075 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5076 Some(total_depr)
5077 } else {
5078 None
5079 };
5080
5081 let (segs, recon) = seg_gen.generate(
5082 group_code,
5083 &period_label,
5084 consolidated_revenue,
5085 consolidated_profit,
5086 consolidated_assets,
5087 &entity_seeds,
5088 depr_param,
5089 );
5090 segment_reports.extend(segs);
5091 segment_reconciliations.push(recon);
5092 }
5093
5094 info!(
5095 "Segment reports generated: {} segments, {} reconciliations",
5096 segment_reports.len(),
5097 segment_reconciliations.len()
5098 );
5099 }
5100
5101 if br_enabled && !document_flows.payments.is_empty() {
5103 let employee_ids: Vec<String> = self
5104 .master_data
5105 .employees
5106 .iter()
5107 .map(|e| e.employee_id.clone())
5108 .collect();
5109 let mut br_gen =
5110 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5111
5112 for company in &self.config.companies {
5114 let company_payments: Vec<PaymentReference> = document_flows
5115 .payments
5116 .iter()
5117 .filter(|p| p.header.company_code == company.code)
5118 .map(|p| PaymentReference {
5119 id: p.header.document_id.clone(),
5120 amount: if p.is_vendor { p.amount } else { -p.amount },
5121 date: p.header.document_date,
5122 reference: p
5123 .check_number
5124 .clone()
5125 .or_else(|| p.wire_reference.clone())
5126 .unwrap_or_else(|| p.header.document_id.clone()),
5127 })
5128 .collect();
5129
5130 if company_payments.is_empty() {
5131 continue;
5132 }
5133
5134 let bank_account_id = format!("{}-MAIN", company.code);
5135
5136 for period in 0..self.config.global.period_months {
5138 let period_start = start_date + chrono::Months::new(period);
5139 let period_end =
5140 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5141
5142 let period_payments: Vec<PaymentReference> = company_payments
5143 .iter()
5144 .filter(|p| p.date >= period_start && p.date <= period_end)
5145 .cloned()
5146 .collect();
5147
5148 let recon = br_gen.generate(
5149 &company.code,
5150 &bank_account_id,
5151 period_start,
5152 period_end,
5153 &company.currency,
5154 &period_payments,
5155 );
5156 bank_reconciliations.push(recon);
5157 }
5158 }
5159 info!(
5160 "Bank reconciliations generated: {} reconciliations",
5161 bank_reconciliations.len()
5162 );
5163 }
5164
5165 stats.bank_reconciliation_count = bank_reconciliations.len();
5166 self.check_resources_with_log("post-financial-reporting")?;
5167
5168 if !trial_balances.is_empty() {
5169 info!(
5170 "Period-close trial balances captured: {} periods",
5171 trial_balances.len()
5172 );
5173 }
5174
5175 let notes_to_financial_statements = Vec::new();
5179
5180 Ok(FinancialReportingSnapshot {
5181 financial_statements,
5182 standalone_statements,
5183 consolidated_statements,
5184 consolidation_schedules,
5185 bank_reconciliations,
5186 trial_balances,
5187 segment_reports,
5188 segment_reconciliations,
5189 notes_to_financial_statements,
5190 })
5191 }
5192
5193 fn generate_notes_to_financial_statements(
5200 &self,
5201 financial_reporting: &mut FinancialReportingSnapshot,
5202 accounting_standards: &AccountingStandardsSnapshot,
5203 tax: &TaxSnapshot,
5204 hr: &HrSnapshot,
5205 audit: &AuditSnapshot,
5206 treasury: &TreasurySnapshot,
5207 ) {
5208 use datasynth_config::schema::AccountingFrameworkConfig;
5209 use datasynth_core::models::StatementType;
5210 use datasynth_generators::period_close::notes_generator::{
5211 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5212 };
5213
5214 let seed = self.seed;
5215 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5216 {
5217 Ok(d) => d,
5218 Err(_) => return,
5219 };
5220
5221 let mut notes_gen = NotesGenerator::new(seed + 4235);
5222
5223 for company in &self.config.companies {
5224 let last_period_end = start_date
5225 + chrono::Months::new(self.config.global.period_months)
5226 - chrono::Days::new(1);
5227 let fiscal_year = last_period_end.year() as u16;
5228
5229 let entity_is = financial_reporting
5231 .standalone_statements
5232 .get(&company.code)
5233 .and_then(|stmts| {
5234 stmts.iter().find(|s| {
5235 s.fiscal_year == fiscal_year
5236 && s.statement_type == StatementType::IncomeStatement
5237 })
5238 });
5239 let entity_bs = financial_reporting
5240 .standalone_statements
5241 .get(&company.code)
5242 .and_then(|stmts| {
5243 stmts.iter().find(|s| {
5244 s.fiscal_year == fiscal_year
5245 && s.statement_type == StatementType::BalanceSheet
5246 })
5247 });
5248
5249 let revenue_amount = entity_is
5251 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5252 .map(|li| li.amount);
5253 let ppe_gross = entity_bs
5254 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5255 .map(|li| li.amount);
5256
5257 let framework = match self
5258 .config
5259 .accounting_standards
5260 .framework
5261 .unwrap_or_default()
5262 {
5263 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5264 "IFRS".to_string()
5265 }
5266 _ => "US GAAP".to_string(),
5267 };
5268
5269 let (entity_dta, entity_dtl) = {
5272 let mut dta = rust_decimal::Decimal::ZERO;
5273 let mut dtl = rust_decimal::Decimal::ZERO;
5274 for rf in &tax.deferred_tax.rollforwards {
5275 if rf.entity_code == company.code {
5276 dta += rf.closing_dta;
5277 dtl += rf.closing_dtl;
5278 }
5279 }
5280 (
5281 if dta > rust_decimal::Decimal::ZERO {
5282 Some(dta)
5283 } else {
5284 None
5285 },
5286 if dtl > rust_decimal::Decimal::ZERO {
5287 Some(dtl)
5288 } else {
5289 None
5290 },
5291 )
5292 };
5293
5294 let entity_provisions: Vec<_> = accounting_standards
5297 .provisions
5298 .iter()
5299 .filter(|p| p.entity_code == company.code)
5300 .collect();
5301 let provision_count = entity_provisions.len();
5302 let total_provisions = if provision_count > 0 {
5303 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5304 } else {
5305 None
5306 };
5307
5308 let entity_pension_plan_count = hr
5310 .pension_plans
5311 .iter()
5312 .filter(|p| p.entity_code == company.code)
5313 .count();
5314 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5315 let sum: rust_decimal::Decimal = hr
5316 .pension_disclosures
5317 .iter()
5318 .filter(|d| {
5319 hr.pension_plans
5320 .iter()
5321 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5322 })
5323 .map(|d| d.net_pension_liability)
5324 .sum();
5325 let plan_assets_sum: rust_decimal::Decimal = hr
5326 .pension_plan_assets
5327 .iter()
5328 .filter(|a| {
5329 hr.pension_plans
5330 .iter()
5331 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5332 })
5333 .map(|a| a.fair_value_closing)
5334 .sum();
5335 if entity_pension_plan_count > 0 {
5336 Some(sum + plan_assets_sum)
5337 } else {
5338 None
5339 }
5340 };
5341 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5342 let sum: rust_decimal::Decimal = hr
5343 .pension_plan_assets
5344 .iter()
5345 .filter(|a| {
5346 hr.pension_plans
5347 .iter()
5348 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5349 })
5350 .map(|a| a.fair_value_closing)
5351 .sum();
5352 if entity_pension_plan_count > 0 {
5353 Some(sum)
5354 } else {
5355 None
5356 }
5357 };
5358
5359 let rp_count = audit.related_party_transactions.len();
5362 let se_count = audit.subsequent_events.len();
5363 let adjusting_count = audit
5364 .subsequent_events
5365 .iter()
5366 .filter(|e| {
5367 matches!(
5368 e.classification,
5369 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5370 )
5371 })
5372 .count();
5373
5374 let ctx = NotesGeneratorContext {
5375 entity_code: company.code.clone(),
5376 framework,
5377 period: format!("FY{}", fiscal_year),
5378 period_end: last_period_end,
5379 currency: company.currency.clone(),
5380 revenue_amount,
5381 total_ppe_gross: ppe_gross,
5382 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5383 deferred_tax_asset: entity_dta,
5385 deferred_tax_liability: entity_dtl,
5386 provision_count,
5388 total_provisions,
5389 pension_plan_count: entity_pension_plan_count,
5391 total_dbo: entity_total_dbo,
5392 total_plan_assets: entity_total_plan_assets,
5393 related_party_transaction_count: rp_count,
5395 subsequent_event_count: se_count,
5396 adjusting_event_count: adjusting_count,
5397 ..NotesGeneratorContext::default()
5398 };
5399
5400 let entity_notes = notes_gen.generate(&ctx);
5401 let standard_note_count = entity_notes.len() as u32;
5402 info!(
5403 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5404 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5405 );
5406 financial_reporting
5407 .notes_to_financial_statements
5408 .extend(entity_notes);
5409
5410 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5412 .debt_instruments
5413 .iter()
5414 .filter(|d| d.entity_id == company.code)
5415 .map(|d| {
5416 (
5417 format!("{:?}", d.instrument_type),
5418 d.principal,
5419 d.maturity_date.to_string(),
5420 )
5421 })
5422 .collect();
5423
5424 let hedge_count = treasury.hedge_relationships.len();
5425 let effective_hedges = treasury
5426 .hedge_relationships
5427 .iter()
5428 .filter(|h| h.is_effective)
5429 .count();
5430 let total_notional: rust_decimal::Decimal = treasury
5431 .hedging_instruments
5432 .iter()
5433 .map(|h| h.notional_amount)
5434 .sum();
5435 let total_fair_value: rust_decimal::Decimal = treasury
5436 .hedging_instruments
5437 .iter()
5438 .map(|h| h.fair_value)
5439 .sum();
5440
5441 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5443 .provisions
5444 .iter()
5445 .filter(|p| p.entity_code == company.code)
5446 .map(|p| p.id.as_str())
5447 .collect();
5448 let provision_movements: Vec<(
5449 String,
5450 rust_decimal::Decimal,
5451 rust_decimal::Decimal,
5452 rust_decimal::Decimal,
5453 )> = accounting_standards
5454 .provision_movements
5455 .iter()
5456 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5457 .map(|m| {
5458 let prov_type = accounting_standards
5459 .provisions
5460 .iter()
5461 .find(|p| p.id == m.provision_id)
5462 .map(|p| format!("{:?}", p.provision_type))
5463 .unwrap_or_else(|| "Unknown".to_string());
5464 (prov_type, m.opening, m.additions, m.closing)
5465 })
5466 .collect();
5467
5468 let enhanced_ctx = EnhancedNotesContext {
5469 entity_code: company.code.clone(),
5470 period: format!("FY{}", fiscal_year),
5471 currency: company.currency.clone(),
5472 finished_goods_value: rust_decimal::Decimal::ZERO,
5474 wip_value: rust_decimal::Decimal::ZERO,
5475 raw_materials_value: rust_decimal::Decimal::ZERO,
5476 debt_instruments,
5477 hedge_count,
5478 effective_hedges,
5479 total_notional,
5480 total_fair_value,
5481 provision_movements,
5482 };
5483
5484 let enhanced_notes =
5485 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5486 if !enhanced_notes.is_empty() {
5487 info!(
5488 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5489 company.code,
5490 enhanced_notes.len(),
5491 enhanced_ctx.debt_instruments.len(),
5492 hedge_count,
5493 enhanced_ctx.provision_movements.len(),
5494 );
5495 financial_reporting
5496 .notes_to_financial_statements
5497 .extend(enhanced_notes);
5498 }
5499 }
5500 }
5501
5502 fn build_trial_balance_from_entries(
5508 journal_entries: &[JournalEntry],
5509 coa: &ChartOfAccounts,
5510 company_code: &str,
5511 fiscal_year: u16,
5512 fiscal_period: u8,
5513 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5514 use rust_decimal::Decimal;
5515
5516 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5518 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5519
5520 for je in journal_entries {
5521 if je.header.company_code != company_code
5523 || je.header.fiscal_year != fiscal_year
5524 || je.header.fiscal_period != fiscal_period
5525 {
5526 continue;
5527 }
5528
5529 for line in &je.lines {
5530 let acct = &line.gl_account;
5531 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5532 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5533 }
5534 }
5535
5536 let mut all_accounts: Vec<&String> = account_debits
5538 .keys()
5539 .chain(account_credits.keys())
5540 .collect::<std::collections::HashSet<_>>()
5541 .into_iter()
5542 .collect();
5543 all_accounts.sort();
5544
5545 let mut entries = Vec::new();
5546
5547 for acct_number in all_accounts {
5548 let debit = account_debits
5549 .get(acct_number)
5550 .copied()
5551 .unwrap_or(Decimal::ZERO);
5552 let credit = account_credits
5553 .get(acct_number)
5554 .copied()
5555 .unwrap_or(Decimal::ZERO);
5556
5557 if debit.is_zero() && credit.is_zero() {
5558 continue;
5559 }
5560
5561 let account_name = coa
5563 .get_account(acct_number)
5564 .map(|gl| gl.short_description.clone())
5565 .unwrap_or_else(|| format!("Account {acct_number}"));
5566
5567 let category = Self::category_from_account_code(acct_number);
5572
5573 entries.push(datasynth_generators::TrialBalanceEntry {
5574 account_code: acct_number.clone(),
5575 account_name,
5576 category,
5577 debit_balance: debit,
5578 credit_balance: credit,
5579 });
5580 }
5581
5582 entries
5583 }
5584
5585 fn build_cumulative_trial_balance(
5592 journal_entries: &[JournalEntry],
5593 coa: &ChartOfAccounts,
5594 company_code: &str,
5595 start_date: NaiveDate,
5596 period_end: NaiveDate,
5597 fiscal_year: u16,
5598 fiscal_period: u8,
5599 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5600 use rust_decimal::Decimal;
5601
5602 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5604 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5605
5606 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5608 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5609
5610 for je in journal_entries {
5611 if je.header.company_code != company_code {
5612 continue;
5613 }
5614
5615 for line in &je.lines {
5616 let acct = &line.gl_account;
5617 let category = Self::category_from_account_code(acct);
5618 let is_bs_account = matches!(
5619 category.as_str(),
5620 "Cash"
5621 | "Receivables"
5622 | "Inventory"
5623 | "FixedAssets"
5624 | "Payables"
5625 | "AccruedLiabilities"
5626 | "LongTermDebt"
5627 | "Equity"
5628 );
5629
5630 if is_bs_account {
5631 if je.header.document_date <= period_end
5633 && je.header.document_date >= start_date
5634 {
5635 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5636 line.debit_amount;
5637 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5638 line.credit_amount;
5639 }
5640 } else {
5641 if je.header.fiscal_year == fiscal_year
5643 && je.header.fiscal_period == fiscal_period
5644 {
5645 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5646 line.debit_amount;
5647 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5648 line.credit_amount;
5649 }
5650 }
5651 }
5652 }
5653
5654 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5656 all_accounts.extend(bs_debits.keys().cloned());
5657 all_accounts.extend(bs_credits.keys().cloned());
5658 all_accounts.extend(is_debits.keys().cloned());
5659 all_accounts.extend(is_credits.keys().cloned());
5660
5661 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5662 sorted_accounts.sort();
5663
5664 let mut entries = Vec::new();
5665
5666 for acct_number in &sorted_accounts {
5667 let category = Self::category_from_account_code(acct_number);
5668 let is_bs_account = matches!(
5669 category.as_str(),
5670 "Cash"
5671 | "Receivables"
5672 | "Inventory"
5673 | "FixedAssets"
5674 | "Payables"
5675 | "AccruedLiabilities"
5676 | "LongTermDebt"
5677 | "Equity"
5678 );
5679
5680 let (debit, credit) = if is_bs_account {
5681 (
5682 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5683 bs_credits
5684 .get(acct_number)
5685 .copied()
5686 .unwrap_or(Decimal::ZERO),
5687 )
5688 } else {
5689 (
5690 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5691 is_credits
5692 .get(acct_number)
5693 .copied()
5694 .unwrap_or(Decimal::ZERO),
5695 )
5696 };
5697
5698 if debit.is_zero() && credit.is_zero() {
5699 continue;
5700 }
5701
5702 let account_name = coa
5703 .get_account(acct_number)
5704 .map(|gl| gl.short_description.clone())
5705 .unwrap_or_else(|| format!("Account {acct_number}"));
5706
5707 entries.push(datasynth_generators::TrialBalanceEntry {
5708 account_code: acct_number.clone(),
5709 account_name,
5710 category,
5711 debit_balance: debit,
5712 credit_balance: credit,
5713 });
5714 }
5715
5716 entries
5717 }
5718
5719 fn build_cash_flow_from_trial_balances(
5724 current_tb: &[datasynth_generators::TrialBalanceEntry],
5725 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5726 net_income: rust_decimal::Decimal,
5727 ) -> Vec<CashFlowItem> {
5728 use rust_decimal::Decimal;
5729
5730 let aggregate =
5732 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5733 let mut map: HashMap<String, Decimal> = HashMap::new();
5734 for entry in tb {
5735 let net = entry.debit_balance - entry.credit_balance;
5736 *map.entry(entry.category.clone()).or_default() += net;
5737 }
5738 map
5739 };
5740
5741 let current = aggregate(current_tb);
5742 let prior = prior_tb.map(aggregate);
5743
5744 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5746 *map.get(key).unwrap_or(&Decimal::ZERO)
5747 };
5748
5749 let change = |key: &str| -> Decimal {
5751 let curr = get(¤t, key);
5752 match &prior {
5753 Some(p) => curr - get(p, key),
5754 None => curr,
5755 }
5756 };
5757
5758 let fixed_asset_change = change("FixedAssets");
5761 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5762 -fixed_asset_change
5763 } else {
5764 Decimal::ZERO
5765 };
5766
5767 let ar_change = change("Receivables");
5769 let inventory_change = change("Inventory");
5770 let ap_change = change("Payables");
5772 let accrued_change = change("AccruedLiabilities");
5773
5774 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5775 + (-ap_change)
5776 + (-accrued_change);
5777
5778 let capex = if fixed_asset_change > Decimal::ZERO {
5780 -fixed_asset_change
5781 } else {
5782 Decimal::ZERO
5783 };
5784 let investing_cf = capex;
5785
5786 let debt_change = -change("LongTermDebt");
5788 let equity_change = -change("Equity");
5789 let financing_cf = debt_change + equity_change;
5790
5791 let net_change = operating_cf + investing_cf + financing_cf;
5792
5793 vec![
5794 CashFlowItem {
5795 item_code: "CF-NI".to_string(),
5796 label: "Net Income".to_string(),
5797 category: CashFlowCategory::Operating,
5798 amount: net_income,
5799 amount_prior: None,
5800 sort_order: 1,
5801 is_total: false,
5802 },
5803 CashFlowItem {
5804 item_code: "CF-DEP".to_string(),
5805 label: "Depreciation & Amortization".to_string(),
5806 category: CashFlowCategory::Operating,
5807 amount: depreciation_addback,
5808 amount_prior: None,
5809 sort_order: 2,
5810 is_total: false,
5811 },
5812 CashFlowItem {
5813 item_code: "CF-AR".to_string(),
5814 label: "Change in Accounts Receivable".to_string(),
5815 category: CashFlowCategory::Operating,
5816 amount: -ar_change,
5817 amount_prior: None,
5818 sort_order: 3,
5819 is_total: false,
5820 },
5821 CashFlowItem {
5822 item_code: "CF-AP".to_string(),
5823 label: "Change in Accounts Payable".to_string(),
5824 category: CashFlowCategory::Operating,
5825 amount: -ap_change,
5826 amount_prior: None,
5827 sort_order: 4,
5828 is_total: false,
5829 },
5830 CashFlowItem {
5831 item_code: "CF-INV".to_string(),
5832 label: "Change in Inventory".to_string(),
5833 category: CashFlowCategory::Operating,
5834 amount: -inventory_change,
5835 amount_prior: None,
5836 sort_order: 5,
5837 is_total: false,
5838 },
5839 CashFlowItem {
5840 item_code: "CF-OP".to_string(),
5841 label: "Net Cash from Operating Activities".to_string(),
5842 category: CashFlowCategory::Operating,
5843 amount: operating_cf,
5844 amount_prior: None,
5845 sort_order: 6,
5846 is_total: true,
5847 },
5848 CashFlowItem {
5849 item_code: "CF-CAPEX".to_string(),
5850 label: "Capital Expenditures".to_string(),
5851 category: CashFlowCategory::Investing,
5852 amount: capex,
5853 amount_prior: None,
5854 sort_order: 7,
5855 is_total: false,
5856 },
5857 CashFlowItem {
5858 item_code: "CF-INV-T".to_string(),
5859 label: "Net Cash from Investing Activities".to_string(),
5860 category: CashFlowCategory::Investing,
5861 amount: investing_cf,
5862 amount_prior: None,
5863 sort_order: 8,
5864 is_total: true,
5865 },
5866 CashFlowItem {
5867 item_code: "CF-DEBT".to_string(),
5868 label: "Net Borrowings / (Repayments)".to_string(),
5869 category: CashFlowCategory::Financing,
5870 amount: debt_change,
5871 amount_prior: None,
5872 sort_order: 9,
5873 is_total: false,
5874 },
5875 CashFlowItem {
5876 item_code: "CF-EQ".to_string(),
5877 label: "Equity Changes".to_string(),
5878 category: CashFlowCategory::Financing,
5879 amount: equity_change,
5880 amount_prior: None,
5881 sort_order: 10,
5882 is_total: false,
5883 },
5884 CashFlowItem {
5885 item_code: "CF-FIN-T".to_string(),
5886 label: "Net Cash from Financing Activities".to_string(),
5887 category: CashFlowCategory::Financing,
5888 amount: financing_cf,
5889 amount_prior: None,
5890 sort_order: 11,
5891 is_total: true,
5892 },
5893 CashFlowItem {
5894 item_code: "CF-NET".to_string(),
5895 label: "Net Change in Cash".to_string(),
5896 category: CashFlowCategory::Operating,
5897 amount: net_change,
5898 amount_prior: None,
5899 sort_order: 12,
5900 is_total: true,
5901 },
5902 ]
5903 }
5904
5905 fn calculate_net_income_from_tb(
5909 tb: &[datasynth_generators::TrialBalanceEntry],
5910 ) -> rust_decimal::Decimal {
5911 use rust_decimal::Decimal;
5912
5913 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
5914 for entry in tb {
5915 let net = entry.debit_balance - entry.credit_balance;
5916 *aggregated.entry(entry.category.clone()).or_default() += net;
5917 }
5918
5919 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
5920 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
5921 let opex = *aggregated
5922 .get("OperatingExpenses")
5923 .unwrap_or(&Decimal::ZERO);
5924 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
5925 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
5926
5927 let operating_income = revenue - cogs - opex - other_expenses - other_income;
5930 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
5932 operating_income - tax
5933 }
5934
5935 fn category_from_account_code(code: &str) -> String {
5942 let prefix: String = code.chars().take(2).collect();
5943 match prefix.as_str() {
5944 "10" => "Cash",
5945 "11" => "Receivables",
5946 "12" | "13" | "14" => "Inventory",
5947 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
5948 "20" => "Payables",
5949 "21" | "22" | "23" | "24" => "AccruedLiabilities",
5950 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
5951 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
5952 "40" | "41" | "42" | "43" | "44" => "Revenue",
5953 "50" | "51" | "52" => "CostOfSales",
5954 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
5955 "OperatingExpenses"
5956 }
5957 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
5958 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
5959 _ => "OperatingExpenses",
5960 }
5961 .to_string()
5962 }
5963
5964 fn phase_hr_data(
5966 &mut self,
5967 stats: &mut EnhancedGenerationStatistics,
5968 ) -> SynthResult<HrSnapshot> {
5969 if !self.phase_config.generate_hr {
5970 debug!("Phase 16: Skipped (HR generation disabled)");
5971 return Ok(HrSnapshot::default());
5972 }
5973
5974 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
5975
5976 let seed = self.seed;
5977 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5978 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5979 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5980 let company_code = self
5981 .config
5982 .companies
5983 .first()
5984 .map(|c| c.code.as_str())
5985 .unwrap_or("1000");
5986 let currency = self
5987 .config
5988 .companies
5989 .first()
5990 .map(|c| c.currency.as_str())
5991 .unwrap_or("USD");
5992
5993 let employee_ids: Vec<String> = self
5994 .master_data
5995 .employees
5996 .iter()
5997 .map(|e| e.employee_id.clone())
5998 .collect();
5999
6000 if employee_ids.is_empty() {
6001 debug!("Phase 16: Skipped (no employees available)");
6002 return Ok(HrSnapshot::default());
6003 }
6004
6005 let cost_center_ids: Vec<String> = self
6008 .master_data
6009 .employees
6010 .iter()
6011 .filter_map(|e| e.cost_center.clone())
6012 .collect::<std::collections::HashSet<_>>()
6013 .into_iter()
6014 .collect();
6015
6016 let mut snapshot = HrSnapshot::default();
6017
6018 if self.config.hr.payroll.enabled {
6020 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6021 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6022
6023 let payroll_pack = self.primary_pack();
6025
6026 payroll_gen.set_country_pack(payroll_pack.clone());
6029
6030 let employees_with_salary: Vec<(
6031 String,
6032 rust_decimal::Decimal,
6033 Option<String>,
6034 Option<String>,
6035 )> = self
6036 .master_data
6037 .employees
6038 .iter()
6039 .map(|e| {
6040 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6043 e.base_salary
6044 } else {
6045 rust_decimal::Decimal::from(60_000)
6046 };
6047 (
6048 e.employee_id.clone(),
6049 annual, e.cost_center.clone(),
6051 e.department_id.clone(),
6052 )
6053 })
6054 .collect();
6055
6056 let change_history = &self.master_data.employee_change_history;
6059 let has_changes = !change_history.is_empty();
6060 if has_changes {
6061 debug!(
6062 "Payroll will incorporate {} employee change events",
6063 change_history.len()
6064 );
6065 }
6066
6067 for month in 0..self.config.global.period_months {
6068 let period_start = start_date + chrono::Months::new(month);
6069 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6070 let (run, items) = if has_changes {
6071 payroll_gen.generate_with_changes(
6072 company_code,
6073 &employees_with_salary,
6074 period_start,
6075 period_end,
6076 currency,
6077 change_history,
6078 )
6079 } else {
6080 payroll_gen.generate(
6081 company_code,
6082 &employees_with_salary,
6083 period_start,
6084 period_end,
6085 currency,
6086 )
6087 };
6088 snapshot.payroll_runs.push(run);
6089 snapshot.payroll_run_count += 1;
6090 snapshot.payroll_line_item_count += items.len();
6091 snapshot.payroll_line_items.extend(items);
6092 }
6093 }
6094
6095 if self.config.hr.time_attendance.enabled {
6097 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6098 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6099 let entries = time_gen.generate(
6100 &employee_ids,
6101 start_date,
6102 end_date,
6103 &self.config.hr.time_attendance,
6104 );
6105 snapshot.time_entry_count = entries.len();
6106 snapshot.time_entries = entries;
6107 }
6108
6109 if self.config.hr.expenses.enabled {
6111 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6112 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6113 expense_gen.set_country_pack(self.primary_pack().clone());
6114 let company_currency = self
6115 .config
6116 .companies
6117 .first()
6118 .map(|c| c.currency.as_str())
6119 .unwrap_or("USD");
6120 let reports = expense_gen.generate_with_currency(
6121 &employee_ids,
6122 start_date,
6123 end_date,
6124 &self.config.hr.expenses,
6125 company_currency,
6126 );
6127 snapshot.expense_report_count = reports.len();
6128 snapshot.expense_reports = reports;
6129 }
6130
6131 if self.config.hr.payroll.enabled {
6133 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6134 let employee_pairs: Vec<(String, String)> = self
6135 .master_data
6136 .employees
6137 .iter()
6138 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6139 .collect();
6140 let enrollments =
6141 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6142 snapshot.benefit_enrollment_count = enrollments.len();
6143 snapshot.benefit_enrollments = enrollments;
6144 }
6145
6146 if self.phase_config.generate_hr {
6148 let entity_name = self
6149 .config
6150 .companies
6151 .first()
6152 .map(|c| c.name.as_str())
6153 .unwrap_or("Entity");
6154 let period_months = self.config.global.period_months;
6155 let period_label = {
6156 let y = start_date.year();
6157 let m = start_date.month();
6158 if period_months >= 12 {
6159 format!("FY{y}")
6160 } else {
6161 format!("{y}-{m:02}")
6162 }
6163 };
6164 let reporting_date =
6165 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6166
6167 let avg_salary: Option<rust_decimal::Decimal> = {
6172 let employee_count = employee_ids.len();
6173 if self.config.hr.payroll.enabled
6174 && employee_count > 0
6175 && !snapshot.payroll_runs.is_empty()
6176 {
6177 let total_gross: rust_decimal::Decimal = snapshot
6179 .payroll_runs
6180 .iter()
6181 .filter(|r| r.company_code == company_code)
6182 .map(|r| r.total_gross)
6183 .sum();
6184 if total_gross > rust_decimal::Decimal::ZERO {
6185 let annual_total = if period_months > 0 && period_months < 12 {
6187 total_gross * rust_decimal::Decimal::from(12u32)
6188 / rust_decimal::Decimal::from(period_months)
6189 } else {
6190 total_gross
6191 };
6192 Some(
6193 (annual_total / rust_decimal::Decimal::from(employee_count))
6194 .round_dp(2),
6195 )
6196 } else {
6197 None
6198 }
6199 } else {
6200 None
6201 }
6202 };
6203
6204 let mut pension_gen =
6205 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6206 let pension_snap = pension_gen.generate(
6207 company_code,
6208 entity_name,
6209 &period_label,
6210 reporting_date,
6211 employee_ids.len(),
6212 currency,
6213 avg_salary,
6214 period_months,
6215 );
6216 snapshot.pension_plan_count = pension_snap.plans.len();
6217 snapshot.pension_plans = pension_snap.plans;
6218 snapshot.pension_obligations = pension_snap.obligations;
6219 snapshot.pension_plan_assets = pension_snap.plan_assets;
6220 snapshot.pension_disclosures = pension_snap.disclosures;
6221 snapshot.pension_journal_entries = pension_snap.journal_entries;
6226 }
6227
6228 if self.phase_config.generate_hr && !employee_ids.is_empty() {
6230 let period_months = self.config.global.period_months;
6231 let period_label = {
6232 let y = start_date.year();
6233 let m = start_date.month();
6234 if period_months >= 12 {
6235 format!("FY{y}")
6236 } else {
6237 format!("{y}-{m:02}")
6238 }
6239 };
6240 let reporting_date =
6241 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6242
6243 let mut stock_comp_gen =
6244 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6245 let stock_snap = stock_comp_gen.generate(
6246 company_code,
6247 &employee_ids,
6248 start_date,
6249 &period_label,
6250 reporting_date,
6251 currency,
6252 );
6253 snapshot.stock_grant_count = stock_snap.grants.len();
6254 snapshot.stock_grants = stock_snap.grants;
6255 snapshot.stock_comp_expenses = stock_snap.expenses;
6256 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6257 }
6258
6259 stats.payroll_run_count = snapshot.payroll_run_count;
6260 stats.time_entry_count = snapshot.time_entry_count;
6261 stats.expense_report_count = snapshot.expense_report_count;
6262 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6263 stats.pension_plan_count = snapshot.pension_plan_count;
6264 stats.stock_grant_count = snapshot.stock_grant_count;
6265
6266 info!(
6267 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6268 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6269 snapshot.time_entry_count, snapshot.expense_report_count,
6270 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6271 snapshot.stock_grant_count
6272 );
6273 self.check_resources_with_log("post-hr")?;
6274
6275 Ok(snapshot)
6276 }
6277
6278 fn phase_accounting_standards(
6280 &mut self,
6281 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6282 journal_entries: &[JournalEntry],
6283 stats: &mut EnhancedGenerationStatistics,
6284 ) -> SynthResult<AccountingStandardsSnapshot> {
6285 if !self.phase_config.generate_accounting_standards {
6286 debug!("Phase 17: Skipped (accounting standards generation disabled)");
6287 return Ok(AccountingStandardsSnapshot::default());
6288 }
6289 info!("Phase 17: Generating Accounting Standards Data");
6290
6291 let seed = self.seed;
6292 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6293 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6294 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6295 let company_code = self
6296 .config
6297 .companies
6298 .first()
6299 .map(|c| c.code.as_str())
6300 .unwrap_or("1000");
6301 let currency = self
6302 .config
6303 .companies
6304 .first()
6305 .map(|c| c.currency.as_str())
6306 .unwrap_or("USD");
6307
6308 let framework = match self.config.accounting_standards.framework {
6313 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6314 datasynth_standards::framework::AccountingFramework::UsGaap
6315 }
6316 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6317 datasynth_standards::framework::AccountingFramework::Ifrs
6318 }
6319 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6320 datasynth_standards::framework::AccountingFramework::DualReporting
6321 }
6322 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6323 datasynth_standards::framework::AccountingFramework::FrenchGaap
6324 }
6325 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6326 datasynth_standards::framework::AccountingFramework::GermanGaap
6327 }
6328 None => {
6329 let pack = self.primary_pack();
6331 let pack_fw = pack.accounting.framework.as_str();
6332 match pack_fw {
6333 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6334 "dual_reporting" => {
6335 datasynth_standards::framework::AccountingFramework::DualReporting
6336 }
6337 "french_gaap" => {
6338 datasynth_standards::framework::AccountingFramework::FrenchGaap
6339 }
6340 "german_gaap" | "hgb" => {
6341 datasynth_standards::framework::AccountingFramework::GermanGaap
6342 }
6343 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6345 }
6346 }
6347 };
6348
6349 let mut snapshot = AccountingStandardsSnapshot::default();
6350
6351 if self.config.accounting_standards.revenue_recognition.enabled {
6353 let customer_ids: Vec<String> = self
6354 .master_data
6355 .customers
6356 .iter()
6357 .map(|c| c.customer_id.clone())
6358 .collect();
6359
6360 if !customer_ids.is_empty() {
6361 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6362 let contracts = rev_gen.generate(
6363 company_code,
6364 &customer_ids,
6365 start_date,
6366 end_date,
6367 currency,
6368 &self.config.accounting_standards.revenue_recognition,
6369 framework,
6370 );
6371 snapshot.revenue_contract_count = contracts.len();
6372 snapshot.contracts = contracts;
6373 }
6374 }
6375
6376 if self.config.accounting_standards.impairment.enabled {
6378 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6379 .master_data
6380 .assets
6381 .iter()
6382 .map(|a| {
6383 (
6384 a.asset_id.clone(),
6385 a.description.clone(),
6386 a.acquisition_cost,
6387 )
6388 })
6389 .collect();
6390
6391 if !asset_data.is_empty() {
6392 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6393 let tests = imp_gen.generate(
6394 company_code,
6395 &asset_data,
6396 end_date,
6397 &self.config.accounting_standards.impairment,
6398 framework,
6399 );
6400 snapshot.impairment_test_count = tests.len();
6401 snapshot.impairment_tests = tests;
6402 }
6403 }
6404
6405 if self
6407 .config
6408 .accounting_standards
6409 .business_combinations
6410 .enabled
6411 {
6412 let bc_config = &self.config.accounting_standards.business_combinations;
6413 let framework_str = match framework {
6414 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6415 _ => "US_GAAP",
6416 };
6417 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6418 let bc_snap = bc_gen.generate(
6419 company_code,
6420 currency,
6421 start_date,
6422 end_date,
6423 bc_config.acquisition_count,
6424 framework_str,
6425 );
6426 snapshot.business_combination_count = bc_snap.combinations.len();
6427 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6428 snapshot.business_combinations = bc_snap.combinations;
6429 }
6430
6431 if self
6433 .config
6434 .accounting_standards
6435 .expected_credit_loss
6436 .enabled
6437 {
6438 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6439 let framework_str = match framework {
6440 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6441 _ => "ASC_326",
6442 };
6443
6444 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6447
6448 let mut ecl_gen = EclGenerator::new(seed + 43);
6449
6450 let bucket_exposures: Vec<(
6452 datasynth_core::models::subledger::ar::AgingBucket,
6453 rust_decimal::Decimal,
6454 )> = if ar_aging_reports.is_empty() {
6455 use datasynth_core::models::subledger::ar::AgingBucket;
6457 vec![
6458 (
6459 AgingBucket::Current,
6460 rust_decimal::Decimal::from(500_000_u32),
6461 ),
6462 (
6463 AgingBucket::Days1To30,
6464 rust_decimal::Decimal::from(120_000_u32),
6465 ),
6466 (
6467 AgingBucket::Days31To60,
6468 rust_decimal::Decimal::from(45_000_u32),
6469 ),
6470 (
6471 AgingBucket::Days61To90,
6472 rust_decimal::Decimal::from(15_000_u32),
6473 ),
6474 (
6475 AgingBucket::Over90Days,
6476 rust_decimal::Decimal::from(8_000_u32),
6477 ),
6478 ]
6479 } else {
6480 use datasynth_core::models::subledger::ar::AgingBucket;
6481 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6483 std::collections::HashMap::new();
6484 for report in ar_aging_reports {
6485 for (bucket, amount) in &report.bucket_totals {
6486 *totals.entry(*bucket).or_default() += amount;
6487 }
6488 }
6489 AgingBucket::all()
6490 .into_iter()
6491 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6492 .collect()
6493 };
6494
6495 let ecl_snap = ecl_gen.generate(
6496 company_code,
6497 end_date,
6498 &bucket_exposures,
6499 ecl_config,
6500 &period_label,
6501 framework_str,
6502 );
6503
6504 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6505 snapshot.ecl_models = ecl_snap.ecl_models;
6506 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6507 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6508 }
6509
6510 {
6512 let framework_str = match framework {
6513 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6514 _ => "US_GAAP",
6515 };
6516
6517 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6522 .max(rust_decimal::Decimal::from(100_000_u32));
6523
6524 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6525
6526 let mut prov_gen = ProvisionGenerator::new(seed + 44);
6527 let prov_snap = prov_gen.generate(
6528 company_code,
6529 currency,
6530 revenue_proxy,
6531 end_date,
6532 &period_label,
6533 framework_str,
6534 None, );
6536
6537 snapshot.provision_count = prov_snap.provisions.len();
6538 snapshot.provisions = prov_snap.provisions;
6539 snapshot.provision_movements = prov_snap.movements;
6540 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6541 snapshot.provision_journal_entries = prov_snap.journal_entries;
6542 }
6543
6544 {
6548 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6549
6550 let presentation_currency = self
6551 .config
6552 .global
6553 .presentation_currency
6554 .clone()
6555 .unwrap_or_else(|| self.config.global.group_currency.clone());
6556
6557 let mut rate_table = FxRateTable::new(&presentation_currency);
6560
6561 let base_rates = base_rates_usd();
6565 for (ccy, rate) in &base_rates {
6566 rate_table.add_rate(FxRate::new(
6567 ccy,
6568 "USD",
6569 RateType::Closing,
6570 end_date,
6571 *rate,
6572 "SYNTHETIC",
6573 ));
6574 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6577 rate_table.add_rate(FxRate::new(
6578 ccy,
6579 "USD",
6580 RateType::Average,
6581 end_date,
6582 avg,
6583 "SYNTHETIC",
6584 ));
6585 }
6586
6587 let mut translation_results = Vec::new();
6588 for company in &self.config.companies {
6589 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6592 .max(rust_decimal::Decimal::from(100_000_u32));
6593
6594 let func_ccy = company
6595 .functional_currency
6596 .clone()
6597 .unwrap_or_else(|| company.currency.clone());
6598
6599 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6600 &company.code,
6601 &func_ccy,
6602 &presentation_currency,
6603 &ias21_period_label,
6604 end_date,
6605 company_revenue,
6606 &rate_table,
6607 );
6608 translation_results.push(result);
6609 }
6610
6611 snapshot.currency_translation_count = translation_results.len();
6612 snapshot.currency_translation_results = translation_results;
6613 }
6614
6615 stats.revenue_contract_count = snapshot.revenue_contract_count;
6616 stats.impairment_test_count = snapshot.impairment_test_count;
6617 stats.business_combination_count = snapshot.business_combination_count;
6618 stats.ecl_model_count = snapshot.ecl_model_count;
6619 stats.provision_count = snapshot.provision_count;
6620
6621 info!(
6622 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6623 snapshot.revenue_contract_count,
6624 snapshot.impairment_test_count,
6625 snapshot.business_combination_count,
6626 snapshot.ecl_model_count,
6627 snapshot.provision_count,
6628 snapshot.currency_translation_count
6629 );
6630 self.check_resources_with_log("post-accounting-standards")?;
6631
6632 Ok(snapshot)
6633 }
6634
6635 fn phase_manufacturing(
6637 &mut self,
6638 stats: &mut EnhancedGenerationStatistics,
6639 ) -> SynthResult<ManufacturingSnapshot> {
6640 if !self.phase_config.generate_manufacturing {
6641 debug!("Phase 18: Skipped (manufacturing generation disabled)");
6642 return Ok(ManufacturingSnapshot::default());
6643 }
6644 info!("Phase 18: Generating Manufacturing Data");
6645
6646 let seed = self.seed;
6647 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6648 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6649 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6650 let company_code = self
6651 .config
6652 .companies
6653 .first()
6654 .map(|c| c.code.as_str())
6655 .unwrap_or("1000");
6656
6657 let material_data: Vec<(String, String)> = self
6658 .master_data
6659 .materials
6660 .iter()
6661 .map(|m| (m.material_id.clone(), m.description.clone()))
6662 .collect();
6663
6664 if material_data.is_empty() {
6665 debug!("Phase 18: Skipped (no materials available)");
6666 return Ok(ManufacturingSnapshot::default());
6667 }
6668
6669 let mut snapshot = ManufacturingSnapshot::default();
6670
6671 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6673 let production_orders = prod_gen.generate(
6674 company_code,
6675 &material_data,
6676 start_date,
6677 end_date,
6678 &self.config.manufacturing.production_orders,
6679 &self.config.manufacturing.costing,
6680 &self.config.manufacturing.routing,
6681 );
6682 snapshot.production_order_count = production_orders.len();
6683
6684 let inspection_data: Vec<(String, String, String)> = production_orders
6686 .iter()
6687 .map(|po| {
6688 (
6689 po.order_id.clone(),
6690 po.material_id.clone(),
6691 po.material_description.clone(),
6692 )
6693 })
6694 .collect();
6695
6696 snapshot.production_orders = production_orders;
6697
6698 if !inspection_data.is_empty() {
6699 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6700 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6701 snapshot.quality_inspection_count = inspections.len();
6702 snapshot.quality_inspections = inspections;
6703 }
6704
6705 let storage_locations: Vec<(String, String)> = material_data
6707 .iter()
6708 .enumerate()
6709 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6710 .collect();
6711
6712 let employee_ids: Vec<String> = self
6713 .master_data
6714 .employees
6715 .iter()
6716 .map(|e| e.employee_id.clone())
6717 .collect();
6718 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
6719 .with_employee_pool(employee_ids);
6720 let mut cycle_count_total = 0usize;
6721 for month in 0..self.config.global.period_months {
6722 let count_date = start_date + chrono::Months::new(month);
6723 let items_per_count = storage_locations.len().clamp(10, 50);
6724 let cc = cc_gen.generate(
6725 company_code,
6726 &storage_locations,
6727 count_date,
6728 items_per_count,
6729 );
6730 snapshot.cycle_counts.push(cc);
6731 cycle_count_total += 1;
6732 }
6733 snapshot.cycle_count_count = cycle_count_total;
6734
6735 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
6737 let bom_components = bom_gen.generate(company_code, &material_data);
6738 snapshot.bom_component_count = bom_components.len();
6739 snapshot.bom_components = bom_components;
6740
6741 let currency = self
6743 .config
6744 .companies
6745 .first()
6746 .map(|c| c.currency.as_str())
6747 .unwrap_or("USD");
6748 let production_order_ids: Vec<String> = snapshot
6749 .production_orders
6750 .iter()
6751 .map(|po| po.order_id.clone())
6752 .collect();
6753 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
6754 let inventory_movements = inv_mov_gen.generate_with_production_orders(
6755 company_code,
6756 &material_data,
6757 start_date,
6758 end_date,
6759 2,
6760 currency,
6761 &production_order_ids,
6762 );
6763 snapshot.inventory_movement_count = inventory_movements.len();
6764 snapshot.inventory_movements = inventory_movements;
6765
6766 stats.production_order_count = snapshot.production_order_count;
6767 stats.quality_inspection_count = snapshot.quality_inspection_count;
6768 stats.cycle_count_count = snapshot.cycle_count_count;
6769 stats.bom_component_count = snapshot.bom_component_count;
6770 stats.inventory_movement_count = snapshot.inventory_movement_count;
6771
6772 info!(
6773 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6774 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6775 snapshot.bom_component_count, snapshot.inventory_movement_count
6776 );
6777 self.check_resources_with_log("post-manufacturing")?;
6778
6779 Ok(snapshot)
6780 }
6781
6782 fn phase_sales_kpi_budgets(
6784 &mut self,
6785 coa: &Arc<ChartOfAccounts>,
6786 financial_reporting: &FinancialReportingSnapshot,
6787 stats: &mut EnhancedGenerationStatistics,
6788 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6789 if !self.phase_config.generate_sales_kpi_budgets {
6790 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6791 return Ok(SalesKpiBudgetsSnapshot::default());
6792 }
6793 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6794
6795 let seed = self.seed;
6796 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6797 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6798 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6799 let company_code = self
6800 .config
6801 .companies
6802 .first()
6803 .map(|c| c.code.as_str())
6804 .unwrap_or("1000");
6805
6806 let mut snapshot = SalesKpiBudgetsSnapshot::default();
6807
6808 if self.config.sales_quotes.enabled {
6810 let customer_data: Vec<(String, String)> = self
6811 .master_data
6812 .customers
6813 .iter()
6814 .map(|c| (c.customer_id.clone(), c.name.clone()))
6815 .collect();
6816 let material_data: Vec<(String, String)> = self
6817 .master_data
6818 .materials
6819 .iter()
6820 .map(|m| (m.material_id.clone(), m.description.clone()))
6821 .collect();
6822
6823 if !customer_data.is_empty() && !material_data.is_empty() {
6824 let employee_ids: Vec<String> = self
6825 .master_data
6826 .employees
6827 .iter()
6828 .map(|e| e.employee_id.clone())
6829 .collect();
6830 let customer_ids: Vec<String> = self
6831 .master_data
6832 .customers
6833 .iter()
6834 .map(|c| c.customer_id.clone())
6835 .collect();
6836 let company_currency = self
6837 .config
6838 .companies
6839 .first()
6840 .map(|c| c.currency.as_str())
6841 .unwrap_or("USD");
6842
6843 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6844 .with_pools(employee_ids, customer_ids);
6845 let quotes = quote_gen.generate_with_currency(
6846 company_code,
6847 &customer_data,
6848 &material_data,
6849 start_date,
6850 end_date,
6851 &self.config.sales_quotes,
6852 company_currency,
6853 );
6854 snapshot.sales_quote_count = quotes.len();
6855 snapshot.sales_quotes = quotes;
6856 }
6857 }
6858
6859 if self.config.financial_reporting.management_kpis.enabled {
6861 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6862 let mut kpis = kpi_gen.generate(
6863 company_code,
6864 start_date,
6865 end_date,
6866 &self.config.financial_reporting.management_kpis,
6867 );
6868
6869 {
6871 use rust_decimal::Decimal;
6872
6873 if let Some(income_stmt) =
6874 financial_reporting.financial_statements.iter().find(|fs| {
6875 fs.statement_type == StatementType::IncomeStatement
6876 && fs.company_code == company_code
6877 })
6878 {
6879 let total_revenue: Decimal = income_stmt
6881 .line_items
6882 .iter()
6883 .filter(|li| li.section.contains("Revenue") && !li.is_total)
6884 .map(|li| li.amount)
6885 .sum();
6886 let total_cogs: Decimal = income_stmt
6887 .line_items
6888 .iter()
6889 .filter(|li| {
6890 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
6891 && !li.is_total
6892 })
6893 .map(|li| li.amount.abs())
6894 .sum();
6895 let total_opex: Decimal = income_stmt
6896 .line_items
6897 .iter()
6898 .filter(|li| {
6899 li.section.contains("Expense")
6900 && !li.is_total
6901 && !li.section.contains("Cost")
6902 })
6903 .map(|li| li.amount.abs())
6904 .sum();
6905
6906 if total_revenue > Decimal::ZERO {
6907 let hundred = Decimal::from(100);
6908 let gross_margin_pct =
6909 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
6910 let operating_income = total_revenue - total_cogs - total_opex;
6911 let op_margin_pct =
6912 (operating_income * hundred / total_revenue).round_dp(2);
6913
6914 for kpi in &mut kpis {
6916 if kpi.name == "Gross Margin" {
6917 kpi.value = gross_margin_pct;
6918 } else if kpi.name == "Operating Margin" {
6919 kpi.value = op_margin_pct;
6920 }
6921 }
6922 }
6923 }
6924
6925 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
6927 fs.statement_type == StatementType::BalanceSheet
6928 && fs.company_code == company_code
6929 }) {
6930 let current_assets: Decimal = bs
6931 .line_items
6932 .iter()
6933 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
6934 .map(|li| li.amount)
6935 .sum();
6936 let current_liabilities: Decimal = bs
6937 .line_items
6938 .iter()
6939 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
6940 .map(|li| li.amount.abs())
6941 .sum();
6942
6943 if current_liabilities > Decimal::ZERO {
6944 let current_ratio = (current_assets / current_liabilities).round_dp(2);
6945 for kpi in &mut kpis {
6946 if kpi.name == "Current Ratio" {
6947 kpi.value = current_ratio;
6948 }
6949 }
6950 }
6951 }
6952 }
6953
6954 snapshot.kpi_count = kpis.len();
6955 snapshot.kpis = kpis;
6956 }
6957
6958 if self.config.financial_reporting.budgets.enabled {
6960 let account_data: Vec<(String, String)> = coa
6961 .accounts
6962 .iter()
6963 .map(|a| (a.account_number.clone(), a.short_description.clone()))
6964 .collect();
6965
6966 if !account_data.is_empty() {
6967 let fiscal_year = start_date.year() as u32;
6968 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
6969 let budget = budget_gen.generate(
6970 company_code,
6971 fiscal_year,
6972 &account_data,
6973 &self.config.financial_reporting.budgets,
6974 );
6975 snapshot.budget_line_count = budget.line_items.len();
6976 snapshot.budgets.push(budget);
6977 }
6978 }
6979
6980 stats.sales_quote_count = snapshot.sales_quote_count;
6981 stats.kpi_count = snapshot.kpi_count;
6982 stats.budget_line_count = snapshot.budget_line_count;
6983
6984 info!(
6985 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
6986 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
6987 );
6988 self.check_resources_with_log("post-sales-kpi-budgets")?;
6989
6990 Ok(snapshot)
6991 }
6992
6993 fn compute_pre_tax_income(
7000 company_code: &str,
7001 journal_entries: &[JournalEntry],
7002 ) -> rust_decimal::Decimal {
7003 use datasynth_core::accounts::AccountCategory;
7004 use rust_decimal::Decimal;
7005
7006 let mut total_revenue = Decimal::ZERO;
7007 let mut total_expenses = Decimal::ZERO;
7008
7009 for je in journal_entries {
7010 if je.header.company_code != company_code {
7011 continue;
7012 }
7013 for line in &je.lines {
7014 let cat = AccountCategory::from_account(&line.gl_account);
7015 match cat {
7016 AccountCategory::Revenue => {
7017 total_revenue += line.credit_amount - line.debit_amount;
7018 }
7019 AccountCategory::Cogs
7020 | AccountCategory::OperatingExpense
7021 | AccountCategory::OtherIncomeExpense => {
7022 total_expenses += line.debit_amount - line.credit_amount;
7023 }
7024 _ => {}
7025 }
7026 }
7027 }
7028
7029 let pti = (total_revenue - total_expenses).round_dp(2);
7030 if pti == rust_decimal::Decimal::ZERO {
7031 rust_decimal::Decimal::from(1_000_000u32)
7034 } else {
7035 pti
7036 }
7037 }
7038
7039 fn phase_tax_generation(
7041 &mut self,
7042 document_flows: &DocumentFlowSnapshot,
7043 journal_entries: &[JournalEntry],
7044 stats: &mut EnhancedGenerationStatistics,
7045 ) -> SynthResult<TaxSnapshot> {
7046 if !self.phase_config.generate_tax {
7047 debug!("Phase 20: Skipped (tax generation disabled)");
7048 return Ok(TaxSnapshot::default());
7049 }
7050 info!("Phase 20: Generating Tax Data");
7051
7052 let seed = self.seed;
7053 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7054 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7055 let fiscal_year = start_date.year();
7056 let company_code = self
7057 .config
7058 .companies
7059 .first()
7060 .map(|c| c.code.as_str())
7061 .unwrap_or("1000");
7062
7063 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7064 seed + 370,
7065 self.config.tax.clone(),
7066 );
7067
7068 let pack = self.primary_pack().clone();
7069 let (jurisdictions, codes) =
7070 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7071
7072 let mut provisions = Vec::new();
7074 if self.config.tax.provisions.enabled {
7075 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7076 for company in &self.config.companies {
7077 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7078 let statutory_rate = rust_decimal::Decimal::new(
7079 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7080 2,
7081 );
7082 let provision = provision_gen.generate(
7083 &company.code,
7084 start_date,
7085 pre_tax_income,
7086 statutory_rate,
7087 );
7088 provisions.push(provision);
7089 }
7090 }
7091
7092 let mut tax_lines = Vec::new();
7094 if !codes.is_empty() {
7095 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7096 datasynth_generators::TaxLineGeneratorConfig::default(),
7097 codes.clone(),
7098 seed + 372,
7099 );
7100
7101 let buyer_country = self
7104 .config
7105 .companies
7106 .first()
7107 .map(|c| c.country.as_str())
7108 .unwrap_or("US");
7109 for vi in &document_flows.vendor_invoices {
7110 let lines = tax_line_gen.generate_for_document(
7111 datasynth_core::models::TaxableDocumentType::VendorInvoice,
7112 &vi.header.document_id,
7113 buyer_country, buyer_country,
7115 vi.payable_amount,
7116 vi.header.document_date,
7117 None,
7118 );
7119 tax_lines.extend(lines);
7120 }
7121
7122 for ci in &document_flows.customer_invoices {
7124 let lines = tax_line_gen.generate_for_document(
7125 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7126 &ci.header.document_id,
7127 buyer_country, buyer_country,
7129 ci.total_gross_amount,
7130 ci.header.document_date,
7131 None,
7132 );
7133 tax_lines.extend(lines);
7134 }
7135 }
7136
7137 let deferred_tax = {
7139 let companies: Vec<(&str, &str)> = self
7140 .config
7141 .companies
7142 .iter()
7143 .map(|c| (c.code.as_str(), c.country.as_str()))
7144 .collect();
7145 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7146 deferred_gen.generate(&companies, start_date, journal_entries)
7147 };
7148
7149 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7152 std::collections::HashMap::new();
7153 for vi in &document_flows.vendor_invoices {
7154 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7155 }
7156 for ci in &document_flows.customer_invoices {
7157 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7158 }
7159
7160 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7162 let tax_posting_journal_entries = if !tax_lines.is_empty() {
7163 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7164 &tax_lines,
7165 company_code,
7166 &doc_dates,
7167 end_date,
7168 );
7169 debug!("Generated {} tax posting JEs", jes.len());
7170 jes
7171 } else {
7172 Vec::new()
7173 };
7174
7175 let snapshot = TaxSnapshot {
7176 jurisdiction_count: jurisdictions.len(),
7177 code_count: codes.len(),
7178 jurisdictions,
7179 codes,
7180 tax_provisions: provisions,
7181 tax_lines,
7182 tax_returns: Vec::new(),
7183 withholding_records: Vec::new(),
7184 tax_anomaly_labels: Vec::new(),
7185 deferred_tax,
7186 tax_posting_journal_entries,
7187 };
7188
7189 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7190 stats.tax_code_count = snapshot.code_count;
7191 stats.tax_provision_count = snapshot.tax_provisions.len();
7192 stats.tax_line_count = snapshot.tax_lines.len();
7193
7194 info!(
7195 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7196 snapshot.jurisdiction_count,
7197 snapshot.code_count,
7198 snapshot.tax_provisions.len(),
7199 snapshot.deferred_tax.temporary_differences.len(),
7200 snapshot.deferred_tax.journal_entries.len(),
7201 snapshot.tax_posting_journal_entries.len(),
7202 );
7203 self.check_resources_with_log("post-tax")?;
7204
7205 Ok(snapshot)
7206 }
7207
7208 fn phase_esg_generation(
7210 &mut self,
7211 document_flows: &DocumentFlowSnapshot,
7212 manufacturing: &ManufacturingSnapshot,
7213 stats: &mut EnhancedGenerationStatistics,
7214 ) -> SynthResult<EsgSnapshot> {
7215 if !self.phase_config.generate_esg {
7216 debug!("Phase 21: Skipped (ESG generation disabled)");
7217 return Ok(EsgSnapshot::default());
7218 }
7219 let degradation = self.check_resources()?;
7220 if degradation >= DegradationLevel::Reduced {
7221 debug!(
7222 "Phase skipped due to resource pressure (degradation: {:?})",
7223 degradation
7224 );
7225 return Ok(EsgSnapshot::default());
7226 }
7227 info!("Phase 21: Generating ESG Data");
7228
7229 let seed = self.seed;
7230 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7231 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7232 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7233 let entity_id = self
7234 .config
7235 .companies
7236 .first()
7237 .map(|c| c.code.as_str())
7238 .unwrap_or("1000");
7239
7240 let esg_cfg = &self.config.esg;
7241 let mut snapshot = EsgSnapshot::default();
7242
7243 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7245 esg_cfg.environmental.energy.clone(),
7246 seed + 80,
7247 );
7248 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7249
7250 let facility_count = esg_cfg.environmental.energy.facility_count;
7252 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7253 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7254
7255 let mut waste_gen = datasynth_generators::WasteGenerator::new(
7257 seed + 82,
7258 esg_cfg.environmental.waste.diversion_target,
7259 facility_count,
7260 );
7261 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7262
7263 let mut emission_gen =
7265 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7266
7267 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7269 .iter()
7270 .map(|e| datasynth_generators::EnergyInput {
7271 facility_id: e.facility_id.clone(),
7272 energy_type: match e.energy_source {
7273 EnergySourceType::NaturalGas => {
7274 datasynth_generators::EnergyInputType::NaturalGas
7275 }
7276 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7277 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7278 _ => datasynth_generators::EnergyInputType::Electricity,
7279 },
7280 consumption_kwh: e.consumption_kwh,
7281 period: e.period,
7282 })
7283 .collect();
7284
7285 if !manufacturing.production_orders.is_empty() {
7287 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7288 &manufacturing.production_orders,
7289 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
7292 if !mfg_energy.is_empty() {
7293 info!(
7294 "ESG: {} energy inputs derived from {} production orders",
7295 mfg_energy.len(),
7296 manufacturing.production_orders.len(),
7297 );
7298 energy_inputs.extend(mfg_energy);
7299 }
7300 }
7301
7302 let mut emissions = Vec::new();
7303 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7304 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7305
7306 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7308 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7309 for payment in &document_flows.payments {
7310 if payment.is_vendor {
7311 *totals
7312 .entry(payment.business_partner_id.clone())
7313 .or_default() += payment.amount;
7314 }
7315 }
7316 totals
7317 };
7318 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7319 .master_data
7320 .vendors
7321 .iter()
7322 .map(|v| {
7323 let spend = vendor_payment_totals
7324 .get(&v.vendor_id)
7325 .copied()
7326 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7327 datasynth_generators::VendorSpendInput {
7328 vendor_id: v.vendor_id.clone(),
7329 category: format!("{:?}", v.vendor_type).to_lowercase(),
7330 spend,
7331 country: v.country.clone(),
7332 }
7333 })
7334 .collect();
7335 if !vendor_spend.is_empty() {
7336 emissions.extend(emission_gen.generate_scope3_purchased_goods(
7337 entity_id,
7338 &vendor_spend,
7339 start_date,
7340 end_date,
7341 ));
7342 }
7343
7344 let headcount = self.master_data.employees.len() as u32;
7346 if headcount > 0 {
7347 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7348 emissions.extend(emission_gen.generate_scope3_business_travel(
7349 entity_id,
7350 travel_spend,
7351 start_date,
7352 ));
7353 emissions
7354 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7355 }
7356
7357 snapshot.emission_count = emissions.len();
7358 snapshot.emissions = emissions;
7359 snapshot.energy = energy_records;
7360
7361 let mut workforce_gen =
7363 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7364 let total_headcount = headcount.max(100);
7365 snapshot.diversity =
7366 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7367 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7368
7369 if !self.master_data.employees.is_empty() {
7371 let hr_diversity = workforce_gen.generate_diversity_from_employees(
7372 entity_id,
7373 &self.master_data.employees,
7374 end_date,
7375 );
7376 if !hr_diversity.is_empty() {
7377 info!(
7378 "ESG: {} diversity metrics derived from {} actual employees",
7379 hr_diversity.len(),
7380 self.master_data.employees.len(),
7381 );
7382 snapshot.diversity.extend(hr_diversity);
7383 }
7384 }
7385
7386 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7387 entity_id,
7388 facility_count,
7389 start_date,
7390 end_date,
7391 );
7392
7393 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
7396 entity_id,
7397 &snapshot.safety_incidents,
7398 total_hours,
7399 start_date,
7400 );
7401 snapshot.safety_metrics = vec![safety_metric];
7402
7403 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7405 seed + 85,
7406 esg_cfg.governance.board_size,
7407 esg_cfg.governance.independence_target,
7408 );
7409 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7410
7411 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7413 esg_cfg.supply_chain_esg.clone(),
7414 seed + 86,
7415 );
7416 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7417 .master_data
7418 .vendors
7419 .iter()
7420 .map(|v| datasynth_generators::VendorInput {
7421 vendor_id: v.vendor_id.clone(),
7422 country: v.country.clone(),
7423 industry: format!("{:?}", v.vendor_type).to_lowercase(),
7424 quality_score: None,
7425 })
7426 .collect();
7427 snapshot.supplier_assessments =
7428 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7429
7430 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7432 seed + 87,
7433 esg_cfg.reporting.clone(),
7434 esg_cfg.climate_scenarios.clone(),
7435 );
7436 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7437 snapshot.disclosures = disclosure_gen.generate_disclosures(
7438 entity_id,
7439 &snapshot.materiality,
7440 start_date,
7441 end_date,
7442 );
7443 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7444 snapshot.disclosure_count = snapshot.disclosures.len();
7445
7446 if esg_cfg.anomaly_rate > 0.0 {
7448 let mut anomaly_injector =
7449 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7450 let mut labels = Vec::new();
7451 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7452 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7453 labels.extend(
7454 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7455 );
7456 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7457 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7458 snapshot.anomaly_labels = labels;
7459 }
7460
7461 stats.esg_emission_count = snapshot.emission_count;
7462 stats.esg_disclosure_count = snapshot.disclosure_count;
7463
7464 info!(
7465 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7466 snapshot.emission_count,
7467 snapshot.disclosure_count,
7468 snapshot.supplier_assessments.len()
7469 );
7470 self.check_resources_with_log("post-esg")?;
7471
7472 Ok(snapshot)
7473 }
7474
7475 fn phase_treasury_data(
7477 &mut self,
7478 document_flows: &DocumentFlowSnapshot,
7479 subledger: &SubledgerSnapshot,
7480 intercompany: &IntercompanySnapshot,
7481 stats: &mut EnhancedGenerationStatistics,
7482 ) -> SynthResult<TreasurySnapshot> {
7483 if !self.phase_config.generate_treasury {
7484 debug!("Phase 22: Skipped (treasury generation disabled)");
7485 return Ok(TreasurySnapshot::default());
7486 }
7487 let degradation = self.check_resources()?;
7488 if degradation >= DegradationLevel::Reduced {
7489 debug!(
7490 "Phase skipped due to resource pressure (degradation: {:?})",
7491 degradation
7492 );
7493 return Ok(TreasurySnapshot::default());
7494 }
7495 info!("Phase 22: Generating Treasury Data");
7496
7497 let seed = self.seed;
7498 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7499 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7500 let currency = self
7501 .config
7502 .companies
7503 .first()
7504 .map(|c| c.currency.as_str())
7505 .unwrap_or("USD");
7506 let entity_id = self
7507 .config
7508 .companies
7509 .first()
7510 .map(|c| c.code.as_str())
7511 .unwrap_or("1000");
7512
7513 let mut snapshot = TreasurySnapshot::default();
7514
7515 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7517 self.config.treasury.debt.clone(),
7518 seed + 90,
7519 );
7520 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7521
7522 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7524 self.config.treasury.hedging.clone(),
7525 seed + 91,
7526 );
7527 for debt in &snapshot.debt_instruments {
7528 if debt.rate_type == InterestRateType::Variable {
7529 let swap = hedge_gen.generate_ir_swap(
7530 currency,
7531 debt.principal,
7532 debt.origination_date,
7533 debt.maturity_date,
7534 );
7535 snapshot.hedging_instruments.push(swap);
7536 }
7537 }
7538
7539 {
7542 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7543 for payment in &document_flows.payments {
7544 if payment.currency != currency {
7545 let entry = fx_map
7546 .entry(payment.currency.clone())
7547 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7548 entry.0 += payment.amount;
7549 if payment.header.document_date > entry.1 {
7551 entry.1 = payment.header.document_date;
7552 }
7553 }
7554 }
7555 if !fx_map.is_empty() {
7556 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7557 .into_iter()
7558 .map(|(foreign_ccy, (net_amount, settlement_date))| {
7559 datasynth_generators::treasury::FxExposure {
7560 currency_pair: format!("{foreign_ccy}/{currency}"),
7561 foreign_currency: foreign_ccy,
7562 net_amount,
7563 settlement_date,
7564 description: "AP payment FX exposure".to_string(),
7565 }
7566 })
7567 .collect();
7568 let (fx_instruments, fx_relationships) =
7569 hedge_gen.generate(start_date, &fx_exposures);
7570 snapshot.hedging_instruments.extend(fx_instruments);
7571 snapshot.hedge_relationships.extend(fx_relationships);
7572 }
7573 }
7574
7575 if self.config.treasury.anomaly_rate > 0.0 {
7577 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7578 seed + 92,
7579 self.config.treasury.anomaly_rate,
7580 );
7581 let mut labels = Vec::new();
7582 labels.extend(
7583 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7584 );
7585 snapshot.treasury_anomaly_labels = labels;
7586 }
7587
7588 if self.config.treasury.cash_positioning.enabled {
7590 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7591
7592 for payment in &document_flows.payments {
7594 cash_flows.push(datasynth_generators::treasury::CashFlow {
7595 date: payment.header.document_date,
7596 account_id: format!("{entity_id}-MAIN"),
7597 amount: payment.amount,
7598 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7599 });
7600 }
7601
7602 for chain in &document_flows.o2c_chains {
7604 if let Some(ref receipt) = chain.customer_receipt {
7605 cash_flows.push(datasynth_generators::treasury::CashFlow {
7606 date: receipt.header.document_date,
7607 account_id: format!("{entity_id}-MAIN"),
7608 amount: receipt.amount,
7609 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7610 });
7611 }
7612 for receipt in &chain.remainder_receipts {
7614 cash_flows.push(datasynth_generators::treasury::CashFlow {
7615 date: receipt.header.document_date,
7616 account_id: format!("{entity_id}-MAIN"),
7617 amount: receipt.amount,
7618 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7619 });
7620 }
7621 }
7622
7623 if !cash_flows.is_empty() {
7624 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7625 self.config.treasury.cash_positioning.clone(),
7626 seed + 93,
7627 );
7628 let account_id = format!("{entity_id}-MAIN");
7629 snapshot.cash_positions = cash_gen.generate(
7630 entity_id,
7631 &account_id,
7632 currency,
7633 &cash_flows,
7634 start_date,
7635 start_date + chrono::Months::new(self.config.global.period_months),
7636 rust_decimal::Decimal::new(1_000_000, 0), );
7638 }
7639 }
7640
7641 if self.config.treasury.cash_forecasting.enabled {
7643 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7644
7645 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7647 .ar_invoices
7648 .iter()
7649 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7650 .map(|inv| {
7651 let days_past_due = if inv.due_date < end_date {
7652 (end_date - inv.due_date).num_days().max(0) as u32
7653 } else {
7654 0
7655 };
7656 datasynth_generators::treasury::ArAgingItem {
7657 expected_date: inv.due_date,
7658 amount: inv.amount_remaining,
7659 days_past_due,
7660 document_id: inv.invoice_number.clone(),
7661 }
7662 })
7663 .collect();
7664
7665 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7667 .ap_invoices
7668 .iter()
7669 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7670 .map(|inv| datasynth_generators::treasury::ApAgingItem {
7671 payment_date: inv.due_date,
7672 amount: inv.amount_remaining,
7673 document_id: inv.invoice_number.clone(),
7674 })
7675 .collect();
7676
7677 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7678 self.config.treasury.cash_forecasting.clone(),
7679 seed + 94,
7680 );
7681 let forecast = forecast_gen.generate(
7682 entity_id,
7683 currency,
7684 end_date,
7685 &ar_items,
7686 &ap_items,
7687 &[], );
7689 snapshot.cash_forecasts.push(forecast);
7690 }
7691
7692 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7694 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7695 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7696 self.config.treasury.cash_pooling.clone(),
7697 seed + 95,
7698 );
7699
7700 let account_ids: Vec<String> = snapshot
7702 .cash_positions
7703 .iter()
7704 .map(|cp| cp.bank_account_id.clone())
7705 .collect::<std::collections::HashSet<_>>()
7706 .into_iter()
7707 .collect();
7708
7709 if let Some(pool) =
7710 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7711 {
7712 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7714 for cp in &snapshot.cash_positions {
7715 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7716 }
7717
7718 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7719 latest_balances
7720 .into_iter()
7721 .filter(|(id, _)| pool.participant_accounts.contains(id))
7722 .map(
7723 |(id, balance)| datasynth_generators::treasury::AccountBalance {
7724 account_id: id,
7725 balance,
7726 },
7727 )
7728 .collect();
7729
7730 let sweeps =
7731 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7732 snapshot.cash_pool_sweeps = sweeps;
7733 snapshot.cash_pools.push(pool);
7734 }
7735 }
7736
7737 if self.config.treasury.bank_guarantees.enabled {
7739 let vendor_names: Vec<String> = self
7740 .master_data
7741 .vendors
7742 .iter()
7743 .map(|v| v.name.clone())
7744 .collect();
7745 if !vendor_names.is_empty() {
7746 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7747 self.config.treasury.bank_guarantees.clone(),
7748 seed + 96,
7749 );
7750 snapshot.bank_guarantees =
7751 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7752 }
7753 }
7754
7755 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7757 let entity_ids: Vec<String> = self
7758 .config
7759 .companies
7760 .iter()
7761 .map(|c| c.code.clone())
7762 .collect();
7763 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7764 .matched_pairs
7765 .iter()
7766 .map(|mp| {
7767 (
7768 mp.seller_company.clone(),
7769 mp.buyer_company.clone(),
7770 mp.amount,
7771 )
7772 })
7773 .collect();
7774 if entity_ids.len() >= 2 {
7775 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7776 self.config.treasury.netting.clone(),
7777 seed + 97,
7778 );
7779 snapshot.netting_runs = netting_gen.generate(
7780 &entity_ids,
7781 currency,
7782 start_date,
7783 self.config.global.period_months,
7784 &ic_amounts,
7785 );
7786 }
7787 }
7788
7789 {
7791 use datasynth_generators::treasury::TreasuryAccounting;
7792
7793 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7794 let mut treasury_jes = Vec::new();
7795
7796 if !snapshot.debt_instruments.is_empty() {
7798 let debt_jes =
7799 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
7800 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
7801 treasury_jes.extend(debt_jes);
7802 }
7803
7804 if !snapshot.hedging_instruments.is_empty() {
7806 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
7807 &snapshot.hedging_instruments,
7808 &snapshot.hedge_relationships,
7809 end_date,
7810 entity_id,
7811 );
7812 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
7813 treasury_jes.extend(hedge_jes);
7814 }
7815
7816 if !snapshot.cash_pool_sweeps.is_empty() {
7818 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
7819 &snapshot.cash_pool_sweeps,
7820 entity_id,
7821 );
7822 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
7823 treasury_jes.extend(sweep_jes);
7824 }
7825
7826 if !treasury_jes.is_empty() {
7827 debug!("Total treasury journal entries: {}", treasury_jes.len());
7828 }
7829 snapshot.journal_entries = treasury_jes;
7830 }
7831
7832 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7833 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7834 stats.cash_position_count = snapshot.cash_positions.len();
7835 stats.cash_forecast_count = snapshot.cash_forecasts.len();
7836 stats.cash_pool_count = snapshot.cash_pools.len();
7837
7838 info!(
7839 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
7840 snapshot.debt_instruments.len(),
7841 snapshot.hedging_instruments.len(),
7842 snapshot.cash_positions.len(),
7843 snapshot.cash_forecasts.len(),
7844 snapshot.cash_pools.len(),
7845 snapshot.bank_guarantees.len(),
7846 snapshot.netting_runs.len(),
7847 snapshot.journal_entries.len(),
7848 );
7849 self.check_resources_with_log("post-treasury")?;
7850
7851 Ok(snapshot)
7852 }
7853
7854 fn phase_project_accounting(
7856 &mut self,
7857 document_flows: &DocumentFlowSnapshot,
7858 hr: &HrSnapshot,
7859 stats: &mut EnhancedGenerationStatistics,
7860 ) -> SynthResult<ProjectAccountingSnapshot> {
7861 if !self.phase_config.generate_project_accounting {
7862 debug!("Phase 23: Skipped (project accounting disabled)");
7863 return Ok(ProjectAccountingSnapshot::default());
7864 }
7865 let degradation = self.check_resources()?;
7866 if degradation >= DegradationLevel::Reduced {
7867 debug!(
7868 "Phase skipped due to resource pressure (degradation: {:?})",
7869 degradation
7870 );
7871 return Ok(ProjectAccountingSnapshot::default());
7872 }
7873 info!("Phase 23: Generating Project Accounting Data");
7874
7875 let seed = self.seed;
7876 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7877 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7878 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7879 let company_code = self
7880 .config
7881 .companies
7882 .first()
7883 .map(|c| c.code.as_str())
7884 .unwrap_or("1000");
7885
7886 let mut snapshot = ProjectAccountingSnapshot::default();
7887
7888 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
7890 self.config.project_accounting.clone(),
7891 seed + 95,
7892 );
7893 let pool = project_gen.generate(company_code, start_date, end_date);
7894 snapshot.projects = pool.projects.clone();
7895
7896 {
7898 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
7899 Vec::new();
7900
7901 for te in &hr.time_entries {
7903 let total_hours = te.hours_regular + te.hours_overtime;
7904 if total_hours > 0.0 {
7905 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7906 id: te.entry_id.clone(),
7907 entity_id: company_code.to_string(),
7908 date: te.date,
7909 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
7910 .unwrap_or(rust_decimal::Decimal::ZERO),
7911 source_type: CostSourceType::TimeEntry,
7912 hours: Some(
7913 rust_decimal::Decimal::from_f64_retain(total_hours)
7914 .unwrap_or(rust_decimal::Decimal::ZERO),
7915 ),
7916 });
7917 }
7918 }
7919
7920 for er in &hr.expense_reports {
7922 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7923 id: er.report_id.clone(),
7924 entity_id: company_code.to_string(),
7925 date: er.submission_date,
7926 amount: er.total_amount,
7927 source_type: CostSourceType::ExpenseReport,
7928 hours: None,
7929 });
7930 }
7931
7932 for po in &document_flows.purchase_orders {
7934 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7935 id: po.header.document_id.clone(),
7936 entity_id: company_code.to_string(),
7937 date: po.header.document_date,
7938 amount: po.total_net_amount,
7939 source_type: CostSourceType::PurchaseOrder,
7940 hours: None,
7941 });
7942 }
7943
7944 for vi in &document_flows.vendor_invoices {
7946 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7947 id: vi.header.document_id.clone(),
7948 entity_id: company_code.to_string(),
7949 date: vi.header.document_date,
7950 amount: vi.payable_amount,
7951 source_type: CostSourceType::VendorInvoice,
7952 hours: None,
7953 });
7954 }
7955
7956 if !source_docs.is_empty() && !pool.projects.is_empty() {
7957 let mut cost_gen =
7958 datasynth_generators::project_accounting::ProjectCostGenerator::new(
7959 self.config.project_accounting.cost_allocation.clone(),
7960 seed + 99,
7961 );
7962 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
7963 }
7964 }
7965
7966 if self.config.project_accounting.change_orders.enabled {
7968 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
7969 self.config.project_accounting.change_orders.clone(),
7970 seed + 96,
7971 );
7972 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
7973 }
7974
7975 if self.config.project_accounting.milestones.enabled {
7977 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
7978 self.config.project_accounting.milestones.clone(),
7979 seed + 97,
7980 );
7981 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
7982 }
7983
7984 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
7986 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
7987 self.config.project_accounting.earned_value.clone(),
7988 seed + 98,
7989 );
7990 snapshot.earned_value_metrics =
7991 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
7992 }
7993
7994 if self.config.project_accounting.revenue_recognition.enabled
7996 && !snapshot.projects.is_empty()
7997 && !snapshot.cost_lines.is_empty()
7998 {
7999 use datasynth_generators::project_accounting::RevenueGenerator;
8000 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8001 let avg_contract_value =
8002 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8003 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8004
8005 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8008 snapshot
8009 .projects
8010 .iter()
8011 .filter(|p| {
8012 matches!(
8013 p.project_type,
8014 datasynth_core::models::ProjectType::Customer
8015 )
8016 })
8017 .map(|p| {
8018 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8019 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8020 } else {
8022 avg_contract_value
8023 };
8024 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8026 })
8027 .collect();
8028
8029 if !contract_values.is_empty() {
8030 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8031 snapshot.revenue_records = rev_gen.generate(
8032 &snapshot.projects,
8033 &snapshot.cost_lines,
8034 &contract_values,
8035 start_date,
8036 end_date,
8037 );
8038 debug!(
8039 "Generated {} revenue recognition records for {} customer projects",
8040 snapshot.revenue_records.len(),
8041 contract_values.len()
8042 );
8043 }
8044 }
8045
8046 stats.project_count = snapshot.projects.len();
8047 stats.project_change_order_count = snapshot.change_orders.len();
8048 stats.project_cost_line_count = snapshot.cost_lines.len();
8049
8050 info!(
8051 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8052 snapshot.projects.len(),
8053 snapshot.change_orders.len(),
8054 snapshot.milestones.len(),
8055 snapshot.earned_value_metrics.len()
8056 );
8057 self.check_resources_with_log("post-project-accounting")?;
8058
8059 Ok(snapshot)
8060 }
8061
8062 fn phase_evolution_events(
8064 &mut self,
8065 stats: &mut EnhancedGenerationStatistics,
8066 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8067 if !self.phase_config.generate_evolution_events {
8068 debug!("Phase 24: Skipped (evolution events disabled)");
8069 return Ok((Vec::new(), Vec::new()));
8070 }
8071 info!("Phase 24: Generating Process Evolution + Organizational Events");
8072
8073 let seed = self.seed;
8074 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8075 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8076 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8077
8078 let mut proc_gen =
8080 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8081 seed + 100,
8082 );
8083 let process_events = proc_gen.generate_events(start_date, end_date);
8084
8085 let company_codes: Vec<String> = self
8087 .config
8088 .companies
8089 .iter()
8090 .map(|c| c.code.clone())
8091 .collect();
8092 let mut org_gen =
8093 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8094 seed + 101,
8095 );
8096 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8097
8098 stats.process_evolution_event_count = process_events.len();
8099 stats.organizational_event_count = org_events.len();
8100
8101 info!(
8102 "Evolution events generated: {} process evolution, {} organizational",
8103 process_events.len(),
8104 org_events.len()
8105 );
8106 self.check_resources_with_log("post-evolution-events")?;
8107
8108 Ok((process_events, org_events))
8109 }
8110
8111 fn phase_disruption_events(
8114 &self,
8115 stats: &mut EnhancedGenerationStatistics,
8116 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8117 if !self.config.organizational_events.enabled {
8118 debug!("Phase 24b: Skipped (organizational events disabled)");
8119 return Ok(Vec::new());
8120 }
8121 info!("Phase 24b: Generating Disruption Events");
8122
8123 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8124 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8125 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8126
8127 let company_codes: Vec<String> = self
8128 .config
8129 .companies
8130 .iter()
8131 .map(|c| c.code.clone())
8132 .collect();
8133
8134 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8135 let events = gen.generate(start_date, end_date, &company_codes);
8136
8137 stats.disruption_event_count = events.len();
8138 info!("Disruption events generated: {} events", events.len());
8139 self.check_resources_with_log("post-disruption-events")?;
8140
8141 Ok(events)
8142 }
8143
8144 fn phase_counterfactuals(
8151 &self,
8152 journal_entries: &[JournalEntry],
8153 stats: &mut EnhancedGenerationStatistics,
8154 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8155 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8156 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8157 return Ok(Vec::new());
8158 }
8159 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8160
8161 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8162
8163 let mut gen = CounterfactualGenerator::new(self.seed + 110);
8164
8165 let specs = [
8167 CounterfactualSpec::ScaleAmount { factor: 2.5 },
8168 CounterfactualSpec::ShiftDate { days: -14 },
8169 CounterfactualSpec::SelfApprove,
8170 CounterfactualSpec::SplitTransaction { split_count: 3 },
8171 ];
8172
8173 let pairs: Vec<_> = journal_entries
8174 .iter()
8175 .enumerate()
8176 .map(|(i, je)| {
8177 let spec = &specs[i % specs.len()];
8178 gen.generate(je, spec)
8179 })
8180 .collect();
8181
8182 stats.counterfactual_pair_count = pairs.len();
8183 info!(
8184 "Counterfactual pairs generated: {} pairs from {} journal entries",
8185 pairs.len(),
8186 journal_entries.len()
8187 );
8188 self.check_resources_with_log("post-counterfactuals")?;
8189
8190 Ok(pairs)
8191 }
8192
8193 fn phase_red_flags(
8200 &self,
8201 anomaly_labels: &AnomalyLabels,
8202 document_flows: &DocumentFlowSnapshot,
8203 stats: &mut EnhancedGenerationStatistics,
8204 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8205 if !self.config.fraud.enabled {
8206 debug!("Phase 26: Skipped (fraud generation disabled)");
8207 return Ok(Vec::new());
8208 }
8209 info!("Phase 26: Generating Fraud Red-Flag Indicators");
8210
8211 use datasynth_generators::fraud::RedFlagGenerator;
8212
8213 let generator = RedFlagGenerator::new();
8214 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8215
8216 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8218 .labels
8219 .iter()
8220 .filter(|label| label.anomaly_type.is_intentional())
8221 .map(|label| label.document_id.as_str())
8222 .collect();
8223
8224 let mut flags = Vec::new();
8225
8226 for chain in &document_flows.p2p_chains {
8228 let doc_id = &chain.purchase_order.header.document_id;
8229 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8230 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8231 }
8232
8233 for chain in &document_flows.o2c_chains {
8235 let doc_id = &chain.sales_order.header.document_id;
8236 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8237 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8238 }
8239
8240 stats.red_flag_count = flags.len();
8241 info!(
8242 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8243 flags.len(),
8244 document_flows.p2p_chains.len(),
8245 document_flows.o2c_chains.len(),
8246 fraud_doc_ids.len()
8247 );
8248 self.check_resources_with_log("post-red-flags")?;
8249
8250 Ok(flags)
8251 }
8252
8253 fn phase_collusion_rings(
8259 &mut self,
8260 stats: &mut EnhancedGenerationStatistics,
8261 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8262 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8263 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8264 return Ok(Vec::new());
8265 }
8266 info!("Phase 26b: Generating Collusion Rings");
8267
8268 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8269 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8270 let months = self.config.global.period_months;
8271
8272 let employee_ids: Vec<String> = self
8273 .master_data
8274 .employees
8275 .iter()
8276 .map(|e| e.employee_id.clone())
8277 .collect();
8278 let vendor_ids: Vec<String> = self
8279 .master_data
8280 .vendors
8281 .iter()
8282 .map(|v| v.vendor_id.clone())
8283 .collect();
8284
8285 let mut generator =
8286 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8287 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8288
8289 stats.collusion_ring_count = rings.len();
8290 info!(
8291 "Collusion rings generated: {} rings, total members: {}",
8292 rings.len(),
8293 rings
8294 .iter()
8295 .map(datasynth_generators::fraud::CollusionRing::size)
8296 .sum::<usize>()
8297 );
8298 self.check_resources_with_log("post-collusion-rings")?;
8299
8300 Ok(rings)
8301 }
8302
8303 fn phase_temporal_attributes(
8308 &mut self,
8309 stats: &mut EnhancedGenerationStatistics,
8310 ) -> SynthResult<
8311 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8312 > {
8313 if !self.config.temporal_attributes.enabled {
8314 debug!("Phase 27: Skipped (temporal attributes disabled)");
8315 return Ok(Vec::new());
8316 }
8317 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8318
8319 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8320 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8321
8322 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8326 || self.config.temporal_attributes.enabled;
8327 let temporal_config = {
8328 let ta = &self.config.temporal_attributes;
8329 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8330 .enabled(ta.enabled)
8331 .closed_probability(ta.valid_time.closed_probability)
8332 .avg_validity_days(ta.valid_time.avg_validity_days)
8333 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8334 .with_version_chains(if generate_version_chains {
8335 ta.avg_versions_per_entity
8336 } else {
8337 1.0
8338 })
8339 .build()
8340 };
8341 let temporal_config = if self
8343 .config
8344 .temporal_attributes
8345 .transaction_time
8346 .allow_backdating
8347 {
8348 let mut c = temporal_config;
8349 c.transaction_time.allow_backdating = true;
8350 c.transaction_time.backdating_probability = self
8351 .config
8352 .temporal_attributes
8353 .transaction_time
8354 .backdating_probability;
8355 c.transaction_time.max_backdate_days = self
8356 .config
8357 .temporal_attributes
8358 .transaction_time
8359 .max_backdate_days;
8360 c
8361 } else {
8362 temporal_config
8363 };
8364 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8365 temporal_config,
8366 self.seed + 130,
8367 start_date,
8368 );
8369
8370 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8371 self.seed + 130,
8372 datasynth_core::GeneratorType::Vendor,
8373 );
8374
8375 let chains: Vec<_> = self
8376 .master_data
8377 .vendors
8378 .iter()
8379 .map(|vendor| {
8380 let id = uuid_factory.next();
8381 gen.generate_version_chain(vendor.clone(), id)
8382 })
8383 .collect();
8384
8385 stats.temporal_version_chain_count = chains.len();
8386 info!("Temporal version chains generated: {} chains", chains.len());
8387 self.check_resources_with_log("post-temporal-attributes")?;
8388
8389 Ok(chains)
8390 }
8391
8392 fn phase_entity_relationships(
8402 &self,
8403 journal_entries: &[JournalEntry],
8404 document_flows: &DocumentFlowSnapshot,
8405 stats: &mut EnhancedGenerationStatistics,
8406 ) -> SynthResult<(
8407 Option<datasynth_core::models::EntityGraph>,
8408 Vec<datasynth_core::models::CrossProcessLink>,
8409 )> {
8410 use datasynth_generators::relationships::{
8411 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8412 TransactionSummary,
8413 };
8414
8415 let rs_enabled = self.config.relationship_strength.enabled;
8416 let cpl_enabled = self.config.cross_process_links.enabled
8417 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8418
8419 if !rs_enabled && !cpl_enabled {
8420 debug!(
8421 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8422 );
8423 return Ok((None, Vec::new()));
8424 }
8425
8426 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8427
8428 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8429 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8430
8431 let company_code = self
8432 .config
8433 .companies
8434 .first()
8435 .map(|c| c.code.as_str())
8436 .unwrap_or("1000");
8437
8438 let gen_config = EntityGraphConfig {
8440 enabled: rs_enabled,
8441 cross_process: datasynth_generators::relationships::CrossProcessConfig {
8442 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8443 enable_return_flows: false,
8444 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8445 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8446 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8448 1.0
8449 } else {
8450 0.30
8451 },
8452 ..Default::default()
8453 },
8454 strength_config: datasynth_generators::relationships::StrengthConfig {
8455 transaction_volume_weight: self
8456 .config
8457 .relationship_strength
8458 .calculation
8459 .transaction_volume_weight,
8460 transaction_count_weight: self
8461 .config
8462 .relationship_strength
8463 .calculation
8464 .transaction_count_weight,
8465 duration_weight: self
8466 .config
8467 .relationship_strength
8468 .calculation
8469 .relationship_duration_weight,
8470 recency_weight: self.config.relationship_strength.calculation.recency_weight,
8471 mutual_connections_weight: self
8472 .config
8473 .relationship_strength
8474 .calculation
8475 .mutual_connections_weight,
8476 recency_half_life_days: self
8477 .config
8478 .relationship_strength
8479 .calculation
8480 .recency_half_life_days,
8481 },
8482 ..Default::default()
8483 };
8484
8485 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8486
8487 let entity_graph = if rs_enabled {
8489 let vendor_summaries: Vec<EntitySummary> = self
8491 .master_data
8492 .vendors
8493 .iter()
8494 .map(|v| {
8495 EntitySummary::new(
8496 &v.vendor_id,
8497 &v.name,
8498 datasynth_core::models::GraphEntityType::Vendor,
8499 start_date,
8500 )
8501 })
8502 .collect();
8503
8504 let customer_summaries: Vec<EntitySummary> = self
8505 .master_data
8506 .customers
8507 .iter()
8508 .map(|c| {
8509 EntitySummary::new(
8510 &c.customer_id,
8511 &c.name,
8512 datasynth_core::models::GraphEntityType::Customer,
8513 start_date,
8514 )
8515 })
8516 .collect();
8517
8518 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8523 std::collections::HashMap::new();
8524
8525 for je in journal_entries {
8526 let cc = je.header.company_code.clone();
8527 let posting_date = je.header.posting_date;
8528 for line in &je.lines {
8529 if let Some(ref tp) = line.trading_partner {
8530 let amount = if line.debit_amount > line.credit_amount {
8531 line.debit_amount
8532 } else {
8533 line.credit_amount
8534 };
8535 let entry = txn_summaries
8536 .entry((cc.clone(), tp.clone()))
8537 .or_insert_with(|| TransactionSummary {
8538 total_volume: rust_decimal::Decimal::ZERO,
8539 transaction_count: 0,
8540 first_transaction_date: posting_date,
8541 last_transaction_date: posting_date,
8542 related_entities: std::collections::HashSet::new(),
8543 });
8544 entry.total_volume += amount;
8545 entry.transaction_count += 1;
8546 if posting_date < entry.first_transaction_date {
8547 entry.first_transaction_date = posting_date;
8548 }
8549 if posting_date > entry.last_transaction_date {
8550 entry.last_transaction_date = posting_date;
8551 }
8552 entry.related_entities.insert(cc.clone());
8553 }
8554 }
8555 }
8556
8557 for chain in &document_flows.p2p_chains {
8560 let cc = chain.purchase_order.header.company_code.clone();
8561 let vendor_id = chain.purchase_order.vendor_id.clone();
8562 let po_date = chain.purchase_order.header.document_date;
8563 let amount = chain.purchase_order.total_net_amount;
8564
8565 let entry = txn_summaries
8566 .entry((cc.clone(), vendor_id))
8567 .or_insert_with(|| TransactionSummary {
8568 total_volume: rust_decimal::Decimal::ZERO,
8569 transaction_count: 0,
8570 first_transaction_date: po_date,
8571 last_transaction_date: po_date,
8572 related_entities: std::collections::HashSet::new(),
8573 });
8574 entry.total_volume += amount;
8575 entry.transaction_count += 1;
8576 if po_date < entry.first_transaction_date {
8577 entry.first_transaction_date = po_date;
8578 }
8579 if po_date > entry.last_transaction_date {
8580 entry.last_transaction_date = po_date;
8581 }
8582 entry.related_entities.insert(cc);
8583 }
8584
8585 for chain in &document_flows.o2c_chains {
8587 let cc = chain.sales_order.header.company_code.clone();
8588 let customer_id = chain.sales_order.customer_id.clone();
8589 let so_date = chain.sales_order.header.document_date;
8590 let amount = chain.sales_order.total_net_amount;
8591
8592 let entry = txn_summaries
8593 .entry((cc.clone(), customer_id))
8594 .or_insert_with(|| TransactionSummary {
8595 total_volume: rust_decimal::Decimal::ZERO,
8596 transaction_count: 0,
8597 first_transaction_date: so_date,
8598 last_transaction_date: so_date,
8599 related_entities: std::collections::HashSet::new(),
8600 });
8601 entry.total_volume += amount;
8602 entry.transaction_count += 1;
8603 if so_date < entry.first_transaction_date {
8604 entry.first_transaction_date = so_date;
8605 }
8606 if so_date > entry.last_transaction_date {
8607 entry.last_transaction_date = so_date;
8608 }
8609 entry.related_entities.insert(cc);
8610 }
8611
8612 let as_of_date = journal_entries
8613 .last()
8614 .map(|je| je.header.posting_date)
8615 .unwrap_or(start_date);
8616
8617 let graph = gen.generate_entity_graph(
8618 company_code,
8619 as_of_date,
8620 &vendor_summaries,
8621 &customer_summaries,
8622 &txn_summaries,
8623 );
8624
8625 info!(
8626 "Entity relationship graph: {} nodes, {} edges",
8627 graph.nodes.len(),
8628 graph.edges.len()
8629 );
8630 stats.entity_relationship_node_count = graph.nodes.len();
8631 stats.entity_relationship_edge_count = graph.edges.len();
8632 Some(graph)
8633 } else {
8634 None
8635 };
8636
8637 let cross_process_links = if cpl_enabled {
8639 let gr_refs: Vec<GoodsReceiptRef> = document_flows
8641 .p2p_chains
8642 .iter()
8643 .flat_map(|chain| {
8644 let vendor_id = chain.purchase_order.vendor_id.clone();
8645 let cc = chain.purchase_order.header.company_code.clone();
8646 chain.goods_receipts.iter().flat_map(move |gr| {
8647 gr.items.iter().filter_map({
8648 let doc_id = gr.header.document_id.clone();
8649 let v_id = vendor_id.clone();
8650 let company = cc.clone();
8651 let receipt_date = gr.header.document_date;
8652 move |item| {
8653 item.base
8654 .material_id
8655 .as_ref()
8656 .map(|mat_id| GoodsReceiptRef {
8657 document_id: doc_id.clone(),
8658 material_id: mat_id.clone(),
8659 quantity: item.base.quantity,
8660 receipt_date,
8661 vendor_id: v_id.clone(),
8662 company_code: company.clone(),
8663 })
8664 }
8665 })
8666 })
8667 })
8668 .collect();
8669
8670 let del_refs: Vec<DeliveryRef> = document_flows
8672 .o2c_chains
8673 .iter()
8674 .flat_map(|chain| {
8675 let customer_id = chain.sales_order.customer_id.clone();
8676 let cc = chain.sales_order.header.company_code.clone();
8677 chain.deliveries.iter().flat_map(move |del| {
8678 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8679 del.items.iter().filter_map({
8680 let doc_id = del.header.document_id.clone();
8681 let c_id = customer_id.clone();
8682 let company = cc.clone();
8683 move |item| {
8684 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8685 document_id: doc_id.clone(),
8686 material_id: mat_id.clone(),
8687 quantity: item.base.quantity,
8688 delivery_date,
8689 customer_id: c_id.clone(),
8690 company_code: company.clone(),
8691 })
8692 }
8693 })
8694 })
8695 })
8696 .collect();
8697
8698 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8699 info!("Cross-process links generated: {} links", links.len());
8700 stats.cross_process_link_count = links.len();
8701 links
8702 } else {
8703 Vec::new()
8704 };
8705
8706 self.check_resources_with_log("post-entity-relationships")?;
8707 Ok((entity_graph, cross_process_links))
8708 }
8709
8710 fn phase_industry_data(
8712 &self,
8713 stats: &mut EnhancedGenerationStatistics,
8714 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8715 if !self.config.industry_specific.enabled {
8716 return None;
8717 }
8718 info!("Phase 29: Generating industry-specific data");
8719 let output = datasynth_generators::industry::factory::generate_industry_output(
8720 self.config.global.industry,
8721 );
8722 stats.industry_gl_account_count = output.gl_accounts.len();
8723 info!(
8724 "Industry data generated: {} GL accounts for {:?}",
8725 output.gl_accounts.len(),
8726 self.config.global.industry
8727 );
8728 Some(output)
8729 }
8730
8731 fn phase_opening_balances(
8733 &mut self,
8734 coa: &Arc<ChartOfAccounts>,
8735 stats: &mut EnhancedGenerationStatistics,
8736 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8737 if !self.config.balance.generate_opening_balances {
8738 debug!("Phase 3b: Skipped (opening balance generation disabled)");
8739 return Ok(Vec::new());
8740 }
8741 info!("Phase 3b: Generating Opening Balances");
8742
8743 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8744 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8745 let fiscal_year = start_date.year();
8746
8747 let industry = match self.config.global.industry {
8748 IndustrySector::Manufacturing => IndustryType::Manufacturing,
8749 IndustrySector::Retail => IndustryType::Retail,
8750 IndustrySector::FinancialServices => IndustryType::Financial,
8751 IndustrySector::Healthcare => IndustryType::Healthcare,
8752 IndustrySector::Technology => IndustryType::Technology,
8753 _ => IndustryType::Manufacturing,
8754 };
8755
8756 let config = datasynth_generators::OpeningBalanceConfig {
8757 industry,
8758 ..Default::default()
8759 };
8760 let mut gen =
8761 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8762
8763 let mut results = Vec::new();
8764 for company in &self.config.companies {
8765 let spec = OpeningBalanceSpec::new(
8766 company.code.clone(),
8767 start_date,
8768 fiscal_year,
8769 company.currency.clone(),
8770 rust_decimal::Decimal::new(10_000_000, 0),
8771 industry,
8772 );
8773 let ob = gen.generate(&spec, coa, start_date, &company.code);
8774 results.push(ob);
8775 }
8776
8777 stats.opening_balance_count = results.len();
8778 info!("Opening balances generated: {} companies", results.len());
8779 self.check_resources_with_log("post-opening-balances")?;
8780
8781 Ok(results)
8782 }
8783
8784 fn phase_subledger_reconciliation(
8786 &mut self,
8787 subledger: &SubledgerSnapshot,
8788 entries: &[JournalEntry],
8789 stats: &mut EnhancedGenerationStatistics,
8790 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8791 if !self.config.balance.reconcile_subledgers {
8792 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8793 return Ok(Vec::new());
8794 }
8795 info!("Phase 9b: Reconciling GL to subledger balances");
8796
8797 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8798 .map(|d| d + chrono::Months::new(self.config.global.period_months))
8799 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8800
8801 let tracker_config = BalanceTrackerConfig {
8803 validate_on_each_entry: false,
8804 track_history: false,
8805 fail_on_validation_error: false,
8806 ..Default::default()
8807 };
8808 let recon_currency = self
8809 .config
8810 .companies
8811 .first()
8812 .map(|c| c.currency.clone())
8813 .unwrap_or_else(|| "USD".to_string());
8814 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8815 let validation_errors = tracker.apply_entries(entries);
8816 if !validation_errors.is_empty() {
8817 warn!(
8818 error_count = validation_errors.len(),
8819 "Balance tracker encountered validation errors during subledger reconciliation"
8820 );
8821 for err in &validation_errors {
8822 debug!("Balance validation error: {:?}", err);
8823 }
8824 }
8825
8826 let mut engine = datasynth_generators::ReconciliationEngine::new(
8827 datasynth_generators::ReconciliationConfig::default(),
8828 );
8829
8830 let mut results = Vec::new();
8831 let company_code = self
8832 .config
8833 .companies
8834 .first()
8835 .map(|c| c.code.as_str())
8836 .unwrap_or("1000");
8837
8838 if !subledger.ar_invoices.is_empty() {
8840 let gl_balance = tracker
8841 .get_account_balance(
8842 company_code,
8843 datasynth_core::accounts::control_accounts::AR_CONTROL,
8844 )
8845 .map(|b| b.closing_balance)
8846 .unwrap_or_default();
8847 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8848 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8849 }
8850
8851 if !subledger.ap_invoices.is_empty() {
8853 let gl_balance = tracker
8854 .get_account_balance(
8855 company_code,
8856 datasynth_core::accounts::control_accounts::AP_CONTROL,
8857 )
8858 .map(|b| b.closing_balance)
8859 .unwrap_or_default();
8860 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8861 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8862 }
8863
8864 if !subledger.fa_records.is_empty() {
8866 let gl_asset_balance = tracker
8867 .get_account_balance(
8868 company_code,
8869 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
8870 )
8871 .map(|b| b.closing_balance)
8872 .unwrap_or_default();
8873 let gl_accum_depr_balance = tracker
8874 .get_account_balance(
8875 company_code,
8876 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
8877 )
8878 .map(|b| b.closing_balance)
8879 .unwrap_or_default();
8880 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
8881 subledger.fa_records.iter().collect();
8882 let (asset_recon, depr_recon) = engine.reconcile_fa(
8883 company_code,
8884 end_date,
8885 gl_asset_balance,
8886 gl_accum_depr_balance,
8887 &fa_refs,
8888 );
8889 results.push(asset_recon);
8890 results.push(depr_recon);
8891 }
8892
8893 if !subledger.inventory_positions.is_empty() {
8895 let gl_balance = tracker
8896 .get_account_balance(
8897 company_code,
8898 datasynth_core::accounts::control_accounts::INVENTORY,
8899 )
8900 .map(|b| b.closing_balance)
8901 .unwrap_or_default();
8902 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
8903 subledger.inventory_positions.iter().collect();
8904 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
8905 }
8906
8907 stats.subledger_reconciliation_count = results.len();
8908 let passed = results.iter().filter(|r| r.is_balanced()).count();
8909 let failed = results.len() - passed;
8910 info!(
8911 "Subledger reconciliation: {} checks, {} passed, {} failed",
8912 results.len(),
8913 passed,
8914 failed
8915 );
8916 self.check_resources_with_log("post-subledger-reconciliation")?;
8917
8918 Ok(results)
8919 }
8920
8921 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
8923 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
8924
8925 let coa_framework = self.resolve_coa_framework();
8926
8927 let mut gen = ChartOfAccountsGenerator::new(
8928 self.config.chart_of_accounts.complexity,
8929 self.config.global.industry,
8930 self.seed,
8931 )
8932 .with_coa_framework(coa_framework);
8933
8934 let coa = Arc::new(gen.generate());
8935 self.coa = Some(Arc::clone(&coa));
8936
8937 if let Some(pb) = pb {
8938 pb.finish_with_message("Chart of Accounts complete");
8939 }
8940
8941 Ok(coa)
8942 }
8943
8944 fn generate_master_data(&mut self) -> SynthResult<()> {
8946 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8947 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8948 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8949
8950 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
8952
8953 let pack = self.primary_pack().clone();
8955
8956 let vendors_per_company = self.phase_config.vendors_per_company;
8958 let customers_per_company = self.phase_config.customers_per_company;
8959 let materials_per_company = self.phase_config.materials_per_company;
8960 let assets_per_company = self.phase_config.assets_per_company;
8961 let coa_framework = self.resolve_coa_framework();
8962
8963 let per_company_results: Vec<_> = self
8966 .config
8967 .companies
8968 .par_iter()
8969 .enumerate()
8970 .map(|(i, company)| {
8971 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
8972 let pack = pack.clone();
8973
8974 let mut vendor_gen = VendorGenerator::new(company_seed);
8976 vendor_gen.set_country_pack(pack.clone());
8977 vendor_gen.set_coa_framework(coa_framework);
8978 vendor_gen.set_counter_offset(i * vendors_per_company);
8979 if self.config.vendor_network.enabled {
8981 let vn = &self.config.vendor_network;
8982 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
8983 enabled: true,
8984 depth: vn.depth,
8985 tier1_count: datasynth_generators::TierCountConfig::new(
8986 vn.tier1.min,
8987 vn.tier1.max,
8988 ),
8989 tier2_per_parent: datasynth_generators::TierCountConfig::new(
8990 vn.tier2_per_parent.min,
8991 vn.tier2_per_parent.max,
8992 ),
8993 tier3_per_parent: datasynth_generators::TierCountConfig::new(
8994 vn.tier3_per_parent.min,
8995 vn.tier3_per_parent.max,
8996 ),
8997 cluster_distribution: datasynth_generators::ClusterDistribution {
8998 reliable_strategic: vn.clusters.reliable_strategic,
8999 standard_operational: vn.clusters.standard_operational,
9000 transactional: vn.clusters.transactional,
9001 problematic: vn.clusters.problematic,
9002 },
9003 concentration_limits: datasynth_generators::ConcentrationLimits {
9004 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9005 max_top5: vn.dependencies.top_5_concentration,
9006 },
9007 ..datasynth_generators::VendorNetworkConfig::default()
9008 });
9009 }
9010 let vendor_pool =
9011 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9012
9013 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9015 customer_gen.set_country_pack(pack.clone());
9016 customer_gen.set_coa_framework(coa_framework);
9017 customer_gen.set_counter_offset(i * customers_per_company);
9018 if self.config.customer_segmentation.enabled {
9020 let cs = &self.config.customer_segmentation;
9021 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9022 enabled: true,
9023 segment_distribution: datasynth_generators::SegmentDistribution {
9024 enterprise: cs.value_segments.enterprise.customer_share,
9025 mid_market: cs.value_segments.mid_market.customer_share,
9026 smb: cs.value_segments.smb.customer_share,
9027 consumer: cs.value_segments.consumer.customer_share,
9028 },
9029 referral_config: datasynth_generators::ReferralConfig {
9030 enabled: cs.networks.referrals.enabled,
9031 referral_rate: cs.networks.referrals.referral_rate,
9032 ..Default::default()
9033 },
9034 hierarchy_config: datasynth_generators::HierarchyConfig {
9035 enabled: cs.networks.corporate_hierarchies.enabled,
9036 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9037 ..Default::default()
9038 },
9039 ..Default::default()
9040 };
9041 customer_gen.set_segmentation_config(seg_cfg);
9042 }
9043 let customer_pool = customer_gen.generate_customer_pool(
9044 customers_per_company,
9045 &company.code,
9046 start_date,
9047 );
9048
9049 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9051 material_gen.set_country_pack(pack.clone());
9052 material_gen.set_counter_offset(i * materials_per_company);
9053 let material_pool = material_gen.generate_material_pool(
9054 materials_per_company,
9055 &company.code,
9056 start_date,
9057 );
9058
9059 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9061 let asset_pool = asset_gen.generate_asset_pool(
9062 assets_per_company,
9063 &company.code,
9064 (start_date, end_date),
9065 );
9066
9067 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9069 employee_gen.set_country_pack(pack);
9070 let employee_pool =
9071 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9072
9073 let employee_change_history =
9075 employee_gen.generate_all_change_history(&employee_pool, end_date);
9076
9077 let employee_ids: Vec<String> = employee_pool
9079 .employees
9080 .iter()
9081 .map(|e| e.employee_id.clone())
9082 .collect();
9083 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9084 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9085
9086 (
9087 vendor_pool.vendors,
9088 customer_pool.customers,
9089 material_pool.materials,
9090 asset_pool.assets,
9091 employee_pool.employees,
9092 employee_change_history,
9093 cost_centers,
9094 )
9095 })
9096 .collect();
9097
9098 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9100 per_company_results
9101 {
9102 self.master_data.vendors.extend(vendors);
9103 self.master_data.customers.extend(customers);
9104 self.master_data.materials.extend(materials);
9105 self.master_data.assets.extend(assets);
9106 self.master_data.employees.extend(employees);
9107 self.master_data.cost_centers.extend(cost_centers);
9108 self.master_data
9109 .employee_change_history
9110 .extend(change_history);
9111 }
9112
9113 if let Some(pb) = &pb {
9114 pb.inc(total);
9115 }
9116 if let Some(pb) = pb {
9117 pb.finish_with_message("Master data generation complete");
9118 }
9119
9120 Ok(())
9121 }
9122
9123 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9125 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9126 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9127
9128 let months = (self.config.global.period_months as usize).max(1);
9131 let p2p_count = self
9132 .phase_config
9133 .p2p_chains
9134 .min(self.master_data.vendors.len() * 2 * months);
9135 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9136
9137 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9139 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9140 p2p_gen.set_country_pack(self.primary_pack().clone());
9141
9142 for i in 0..p2p_count {
9143 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9144 let materials: Vec<&Material> = self
9145 .master_data
9146 .materials
9147 .iter()
9148 .skip(i % self.master_data.materials.len().max(1))
9149 .take(2.min(self.master_data.materials.len()))
9150 .collect();
9151
9152 if materials.is_empty() {
9153 continue;
9154 }
9155
9156 let company = &self.config.companies[i % self.config.companies.len()];
9157 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9158 let fiscal_period = po_date.month() as u8;
9159 let created_by = if self.master_data.employees.is_empty() {
9160 "SYSTEM"
9161 } else {
9162 self.master_data.employees[i % self.master_data.employees.len()]
9163 .user_id
9164 .as_str()
9165 };
9166
9167 let chain = p2p_gen.generate_chain(
9168 &company.code,
9169 vendor,
9170 &materials,
9171 po_date,
9172 start_date.year() as u16,
9173 fiscal_period,
9174 created_by,
9175 );
9176
9177 flows.purchase_orders.push(chain.purchase_order.clone());
9179 flows.goods_receipts.extend(chain.goods_receipts.clone());
9180 if let Some(vi) = &chain.vendor_invoice {
9181 flows.vendor_invoices.push(vi.clone());
9182 }
9183 if let Some(payment) = &chain.payment {
9184 flows.payments.push(payment.clone());
9185 }
9186 for remainder in &chain.remainder_payments {
9187 flows.payments.push(remainder.clone());
9188 }
9189 flows.p2p_chains.push(chain);
9190
9191 if let Some(pb) = &pb {
9192 pb.inc(1);
9193 }
9194 }
9195
9196 if let Some(pb) = pb {
9197 pb.finish_with_message("P2P document flows complete");
9198 }
9199
9200 let o2c_count = self
9203 .phase_config
9204 .o2c_chains
9205 .min(self.master_data.customers.len() * 2 * months);
9206 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9207
9208 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9210 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9211 o2c_gen.set_country_pack(self.primary_pack().clone());
9212
9213 for i in 0..o2c_count {
9214 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9215 let materials: Vec<&Material> = self
9216 .master_data
9217 .materials
9218 .iter()
9219 .skip(i % self.master_data.materials.len().max(1))
9220 .take(2.min(self.master_data.materials.len()))
9221 .collect();
9222
9223 if materials.is_empty() {
9224 continue;
9225 }
9226
9227 let company = &self.config.companies[i % self.config.companies.len()];
9228 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9229 let fiscal_period = so_date.month() as u8;
9230 let created_by = if self.master_data.employees.is_empty() {
9231 "SYSTEM"
9232 } else {
9233 self.master_data.employees[i % self.master_data.employees.len()]
9234 .user_id
9235 .as_str()
9236 };
9237
9238 let chain = o2c_gen.generate_chain(
9239 &company.code,
9240 customer,
9241 &materials,
9242 so_date,
9243 start_date.year() as u16,
9244 fiscal_period,
9245 created_by,
9246 );
9247
9248 flows.sales_orders.push(chain.sales_order.clone());
9250 flows.deliveries.extend(chain.deliveries.clone());
9251 if let Some(ci) = &chain.customer_invoice {
9252 flows.customer_invoices.push(ci.clone());
9253 }
9254 if let Some(receipt) = &chain.customer_receipt {
9255 flows.payments.push(receipt.clone());
9256 }
9257 for receipt in &chain.remainder_receipts {
9259 flows.payments.push(receipt.clone());
9260 }
9261 flows.o2c_chains.push(chain);
9262
9263 if let Some(pb) = &pb {
9264 pb.inc(1);
9265 }
9266 }
9267
9268 if let Some(pb) = pb {
9269 pb.finish_with_message("O2C document flows complete");
9270 }
9271
9272 {
9276 let mut refs = Vec::new();
9277 for doc in &flows.purchase_orders {
9278 refs.extend(doc.header.document_references.iter().cloned());
9279 }
9280 for doc in &flows.goods_receipts {
9281 refs.extend(doc.header.document_references.iter().cloned());
9282 }
9283 for doc in &flows.vendor_invoices {
9284 refs.extend(doc.header.document_references.iter().cloned());
9285 }
9286 for doc in &flows.sales_orders {
9287 refs.extend(doc.header.document_references.iter().cloned());
9288 }
9289 for doc in &flows.deliveries {
9290 refs.extend(doc.header.document_references.iter().cloned());
9291 }
9292 for doc in &flows.customer_invoices {
9293 refs.extend(doc.header.document_references.iter().cloned());
9294 }
9295 for doc in &flows.payments {
9296 refs.extend(doc.header.document_references.iter().cloned());
9297 }
9298 debug!(
9299 "Collected {} document cross-references from document headers",
9300 refs.len()
9301 );
9302 flows.document_references = refs;
9303 }
9304
9305 Ok(())
9306 }
9307
9308 fn generate_journal_entries(
9310 &mut self,
9311 coa: &Arc<ChartOfAccounts>,
9312 ) -> SynthResult<Vec<JournalEntry>> {
9313 use datasynth_core::traits::ParallelGenerator;
9314
9315 let total = self.calculate_total_transactions();
9316 let pb = self.create_progress_bar(total, "Generating Journal Entries");
9317
9318 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9319 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9320 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9321
9322 let company_codes: Vec<String> = self
9323 .config
9324 .companies
9325 .iter()
9326 .map(|c| c.code.clone())
9327 .collect();
9328
9329 let generator = JournalEntryGenerator::new_with_params(
9330 self.config.transactions.clone(),
9331 Arc::clone(coa),
9332 company_codes,
9333 start_date,
9334 end_date,
9335 self.seed,
9336 );
9337
9338 let je_pack = self.primary_pack();
9342
9343 let mut generator = generator
9344 .with_master_data(
9345 &self.master_data.vendors,
9346 &self.master_data.customers,
9347 &self.master_data.materials,
9348 )
9349 .with_country_pack_names(je_pack)
9350 .with_country_pack_temporal(
9351 self.config.temporal_patterns.clone(),
9352 self.seed + 200,
9353 je_pack,
9354 )
9355 .with_persona_errors(true)
9356 .with_fraud_config(self.config.fraud.clone());
9357
9358 if self.config.temporal.enabled {
9360 let drift_config = self.config.temporal.to_core_config();
9361 generator = generator.with_drift_config(drift_config, self.seed + 100);
9362 }
9363
9364 self.check_memory_limit()?;
9366
9367 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9369
9370 let entries = if total >= 10_000 && num_threads > 1 {
9374 let sub_generators = generator.split(num_threads);
9377 let entries_per_thread = total as usize / num_threads;
9378 let remainder = total as usize % num_threads;
9379
9380 let batches: Vec<Vec<JournalEntry>> = sub_generators
9381 .into_par_iter()
9382 .enumerate()
9383 .map(|(i, mut gen)| {
9384 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9385 gen.generate_batch(count)
9386 })
9387 .collect();
9388
9389 let entries = JournalEntryGenerator::merge_results(batches);
9391
9392 if let Some(pb) = &pb {
9393 pb.inc(total);
9394 }
9395 entries
9396 } else {
9397 let mut entries = Vec::with_capacity(total as usize);
9399 for _ in 0..total {
9400 let entry = generator.generate();
9401 entries.push(entry);
9402 if let Some(pb) = &pb {
9403 pb.inc(1);
9404 }
9405 }
9406 entries
9407 };
9408
9409 if let Some(pb) = pb {
9410 pb.finish_with_message("Journal entries complete");
9411 }
9412
9413 Ok(entries)
9414 }
9415
9416 fn generate_jes_from_document_flows(
9421 &mut self,
9422 flows: &DocumentFlowSnapshot,
9423 ) -> SynthResult<Vec<JournalEntry>> {
9424 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9425 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9426
9427 let je_config = match self.resolve_coa_framework() {
9428 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9429 CoAFramework::GermanSkr04 => {
9430 let fa = datasynth_core::FrameworkAccounts::german_gaap();
9431 DocumentFlowJeConfig::from(&fa)
9432 }
9433 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9434 };
9435
9436 let populate_fec = je_config.populate_fec_fields;
9437 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9438
9439 if populate_fec {
9443 let mut aux_lookup = std::collections::HashMap::new();
9444 for vendor in &self.master_data.vendors {
9445 if let Some(ref aux) = vendor.auxiliary_gl_account {
9446 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9447 }
9448 }
9449 for customer in &self.master_data.customers {
9450 if let Some(ref aux) = customer.auxiliary_gl_account {
9451 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9452 }
9453 }
9454 if !aux_lookup.is_empty() {
9455 generator.set_auxiliary_account_lookup(aux_lookup);
9456 }
9457 }
9458
9459 let mut entries = Vec::new();
9460
9461 for chain in &flows.p2p_chains {
9463 let chain_entries = generator.generate_from_p2p_chain(chain);
9464 entries.extend(chain_entries);
9465 if let Some(pb) = &pb {
9466 pb.inc(1);
9467 }
9468 }
9469
9470 for chain in &flows.o2c_chains {
9472 let chain_entries = generator.generate_from_o2c_chain(chain);
9473 entries.extend(chain_entries);
9474 if let Some(pb) = &pb {
9475 pb.inc(1);
9476 }
9477 }
9478
9479 if let Some(pb) = pb {
9480 pb.finish_with_message(format!(
9481 "Generated {} JEs from document flows",
9482 entries.len()
9483 ));
9484 }
9485
9486 Ok(entries)
9487 }
9488
9489 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9495 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9496
9497 let mut jes = Vec::with_capacity(payroll_runs.len());
9498
9499 for run in payroll_runs {
9500 let mut je = JournalEntry::new_simple(
9501 format!("JE-PAYROLL-{}", run.payroll_id),
9502 run.company_code.clone(),
9503 run.run_date,
9504 format!("Payroll {}", run.payroll_id),
9505 );
9506
9507 je.add_line(JournalEntryLine {
9509 line_number: 1,
9510 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9511 debit_amount: run.total_gross,
9512 reference: Some(run.payroll_id.clone()),
9513 text: Some(format!(
9514 "Payroll {} ({} employees)",
9515 run.payroll_id, run.employee_count
9516 )),
9517 ..Default::default()
9518 });
9519
9520 je.add_line(JournalEntryLine {
9522 line_number: 2,
9523 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9524 credit_amount: run.total_gross,
9525 reference: Some(run.payroll_id.clone()),
9526 ..Default::default()
9527 });
9528
9529 jes.push(je);
9530 }
9531
9532 jes
9533 }
9534
9535 fn link_document_flows_to_subledgers(
9540 &mut self,
9541 flows: &DocumentFlowSnapshot,
9542 ) -> SynthResult<SubledgerSnapshot> {
9543 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9544 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9545
9546 let vendor_names: std::collections::HashMap<String, String> = self
9548 .master_data
9549 .vendors
9550 .iter()
9551 .map(|v| (v.vendor_id.clone(), v.name.clone()))
9552 .collect();
9553 let customer_names: std::collections::HashMap<String, String> = self
9554 .master_data
9555 .customers
9556 .iter()
9557 .map(|c| (c.customer_id.clone(), c.name.clone()))
9558 .collect();
9559
9560 let mut linker = DocumentFlowLinker::new()
9561 .with_vendor_names(vendor_names)
9562 .with_customer_names(customer_names);
9563
9564 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9566 if let Some(pb) = &pb {
9567 pb.inc(flows.vendor_invoices.len() as u64);
9568 }
9569
9570 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9572 if let Some(pb) = &pb {
9573 pb.inc(flows.customer_invoices.len() as u64);
9574 }
9575
9576 if let Some(pb) = pb {
9577 pb.finish_with_message(format!(
9578 "Linked {} AP and {} AR invoices",
9579 ap_invoices.len(),
9580 ar_invoices.len()
9581 ));
9582 }
9583
9584 Ok(SubledgerSnapshot {
9585 ap_invoices,
9586 ar_invoices,
9587 fa_records: Vec::new(),
9588 inventory_positions: Vec::new(),
9589 inventory_movements: Vec::new(),
9590 ar_aging_reports: Vec::new(),
9592 ap_aging_reports: Vec::new(),
9593 depreciation_runs: Vec::new(),
9595 inventory_valuations: Vec::new(),
9596 dunning_runs: Vec::new(),
9598 dunning_letters: Vec::new(),
9599 })
9600 }
9601
9602 #[allow(clippy::too_many_arguments)]
9607 fn generate_ocpm_events(
9608 &mut self,
9609 flows: &DocumentFlowSnapshot,
9610 sourcing: &SourcingSnapshot,
9611 hr: &HrSnapshot,
9612 manufacturing: &ManufacturingSnapshot,
9613 banking: &BankingSnapshot,
9614 audit: &AuditSnapshot,
9615 financial_reporting: &FinancialReportingSnapshot,
9616 ) -> SynthResult<OcpmSnapshot> {
9617 let total_chains = flows.p2p_chains.len()
9618 + flows.o2c_chains.len()
9619 + sourcing.sourcing_projects.len()
9620 + hr.payroll_runs.len()
9621 + manufacturing.production_orders.len()
9622 + banking.customers.len()
9623 + audit.engagements.len()
9624 + financial_reporting.bank_reconciliations.len();
9625 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9626
9627 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9629 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9630
9631 let ocpm_config = OcpmGeneratorConfig {
9633 generate_p2p: true,
9634 generate_o2c: true,
9635 generate_s2c: !sourcing.sourcing_projects.is_empty(),
9636 generate_h2r: !hr.payroll_runs.is_empty(),
9637 generate_mfg: !manufacturing.production_orders.is_empty(),
9638 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9639 generate_bank: !banking.customers.is_empty(),
9640 generate_audit: !audit.engagements.is_empty(),
9641 happy_path_rate: 0.75,
9642 exception_path_rate: 0.20,
9643 error_path_rate: 0.05,
9644 add_duration_variability: true,
9645 duration_std_dev_factor: 0.3,
9646 };
9647 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9648 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9649
9650 let available_users: Vec<String> = self
9652 .master_data
9653 .employees
9654 .iter()
9655 .take(20)
9656 .map(|e| e.user_id.clone())
9657 .collect();
9658
9659 let fallback_date =
9661 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9662 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9663 .unwrap_or(fallback_date);
9664 let base_midnight = base_date
9665 .and_hms_opt(0, 0, 0)
9666 .expect("midnight is always valid");
9667 let base_datetime =
9668 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9669
9670 let add_result = |event_log: &mut OcpmEventLog,
9672 result: datasynth_ocpm::CaseGenerationResult| {
9673 for event in result.events {
9674 event_log.add_event(event);
9675 }
9676 for object in result.objects {
9677 event_log.add_object(object);
9678 }
9679 for relationship in result.relationships {
9680 event_log.add_relationship(relationship);
9681 }
9682 for corr in result.correlation_events {
9683 event_log.add_correlation_event(corr);
9684 }
9685 event_log.add_case(result.case_trace);
9686 };
9687
9688 for chain in &flows.p2p_chains {
9690 let po = &chain.purchase_order;
9691 let documents = P2pDocuments::new(
9692 &po.header.document_id,
9693 &po.vendor_id,
9694 &po.header.company_code,
9695 po.total_net_amount,
9696 &po.header.currency,
9697 &ocpm_uuid_factory,
9698 )
9699 .with_goods_receipt(
9700 chain
9701 .goods_receipts
9702 .first()
9703 .map(|gr| gr.header.document_id.as_str())
9704 .unwrap_or(""),
9705 &ocpm_uuid_factory,
9706 )
9707 .with_invoice(
9708 chain
9709 .vendor_invoice
9710 .as_ref()
9711 .map(|vi| vi.header.document_id.as_str())
9712 .unwrap_or(""),
9713 &ocpm_uuid_factory,
9714 )
9715 .with_payment(
9716 chain
9717 .payment
9718 .as_ref()
9719 .map(|p| p.header.document_id.as_str())
9720 .unwrap_or(""),
9721 &ocpm_uuid_factory,
9722 );
9723
9724 let start_time =
9725 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9726 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9727 add_result(&mut event_log, result);
9728
9729 if let Some(pb) = &pb {
9730 pb.inc(1);
9731 }
9732 }
9733
9734 for chain in &flows.o2c_chains {
9736 let so = &chain.sales_order;
9737 let documents = O2cDocuments::new(
9738 &so.header.document_id,
9739 &so.customer_id,
9740 &so.header.company_code,
9741 so.total_net_amount,
9742 &so.header.currency,
9743 &ocpm_uuid_factory,
9744 )
9745 .with_delivery(
9746 chain
9747 .deliveries
9748 .first()
9749 .map(|d| d.header.document_id.as_str())
9750 .unwrap_or(""),
9751 &ocpm_uuid_factory,
9752 )
9753 .with_invoice(
9754 chain
9755 .customer_invoice
9756 .as_ref()
9757 .map(|ci| ci.header.document_id.as_str())
9758 .unwrap_or(""),
9759 &ocpm_uuid_factory,
9760 )
9761 .with_receipt(
9762 chain
9763 .customer_receipt
9764 .as_ref()
9765 .map(|r| r.header.document_id.as_str())
9766 .unwrap_or(""),
9767 &ocpm_uuid_factory,
9768 );
9769
9770 let start_time =
9771 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9772 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9773 add_result(&mut event_log, result);
9774
9775 if let Some(pb) = &pb {
9776 pb.inc(1);
9777 }
9778 }
9779
9780 for project in &sourcing.sourcing_projects {
9782 let vendor_id = sourcing
9784 .contracts
9785 .iter()
9786 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9787 .map(|c| c.vendor_id.clone())
9788 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9789 .or_else(|| {
9790 self.master_data
9791 .vendors
9792 .first()
9793 .map(|v| v.vendor_id.clone())
9794 })
9795 .unwrap_or_else(|| "V000".to_string());
9796 let mut docs = S2cDocuments::new(
9797 &project.project_id,
9798 &vendor_id,
9799 &project.company_code,
9800 project.estimated_annual_spend,
9801 &ocpm_uuid_factory,
9802 );
9803 if let Some(rfx) = sourcing
9805 .rfx_events
9806 .iter()
9807 .find(|r| r.sourcing_project_id == project.project_id)
9808 {
9809 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9810 if let Some(bid) = sourcing.bids.iter().find(|b| {
9812 b.rfx_id == rfx.rfx_id
9813 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9814 }) {
9815 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9816 }
9817 }
9818 if let Some(contract) = sourcing
9820 .contracts
9821 .iter()
9822 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9823 {
9824 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9825 }
9826 let start_time = base_datetime - chrono::Duration::days(90);
9827 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9828 add_result(&mut event_log, result);
9829
9830 if let Some(pb) = &pb {
9831 pb.inc(1);
9832 }
9833 }
9834
9835 for run in &hr.payroll_runs {
9837 let employee_id = hr
9839 .payroll_line_items
9840 .iter()
9841 .find(|li| li.payroll_id == run.payroll_id)
9842 .map(|li| li.employee_id.as_str())
9843 .unwrap_or("EMP000");
9844 let docs = H2rDocuments::new(
9845 &run.payroll_id,
9846 employee_id,
9847 &run.company_code,
9848 run.total_gross,
9849 &ocpm_uuid_factory,
9850 )
9851 .with_time_entries(
9852 hr.time_entries
9853 .iter()
9854 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9855 .take(5)
9856 .map(|t| t.entry_id.as_str())
9857 .collect(),
9858 );
9859 let start_time = base_datetime - chrono::Duration::days(30);
9860 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9861 add_result(&mut event_log, result);
9862
9863 if let Some(pb) = &pb {
9864 pb.inc(1);
9865 }
9866 }
9867
9868 for order in &manufacturing.production_orders {
9870 let mut docs = MfgDocuments::new(
9871 &order.order_id,
9872 &order.material_id,
9873 &order.company_code,
9874 order.planned_quantity,
9875 &ocpm_uuid_factory,
9876 )
9877 .with_operations(
9878 order
9879 .operations
9880 .iter()
9881 .map(|o| format!("OP-{:04}", o.operation_number))
9882 .collect::<Vec<_>>()
9883 .iter()
9884 .map(std::string::String::as_str)
9885 .collect(),
9886 );
9887 if let Some(insp) = manufacturing
9889 .quality_inspections
9890 .iter()
9891 .find(|i| i.reference_id == order.order_id)
9892 {
9893 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
9894 }
9895 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
9897 cc.items
9898 .iter()
9899 .any(|item| item.material_id == order.material_id)
9900 }) {
9901 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
9902 }
9903 let start_time = base_datetime - chrono::Duration::days(60);
9904 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
9905 add_result(&mut event_log, result);
9906
9907 if let Some(pb) = &pb {
9908 pb.inc(1);
9909 }
9910 }
9911
9912 for customer in &banking.customers {
9914 let customer_id_str = customer.customer_id.to_string();
9915 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
9916 if let Some(account) = banking
9918 .accounts
9919 .iter()
9920 .find(|a| a.primary_owner_id == customer.customer_id)
9921 {
9922 let account_id_str = account.account_id.to_string();
9923 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
9924 let txn_strs: Vec<String> = banking
9926 .transactions
9927 .iter()
9928 .filter(|t| t.account_id == account.account_id)
9929 .take(10)
9930 .map(|t| t.transaction_id.to_string())
9931 .collect();
9932 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
9933 let txn_amounts: Vec<rust_decimal::Decimal> = banking
9934 .transactions
9935 .iter()
9936 .filter(|t| t.account_id == account.account_id)
9937 .take(10)
9938 .map(|t| t.amount)
9939 .collect();
9940 if !txn_ids.is_empty() {
9941 docs = docs.with_transactions(txn_ids, txn_amounts);
9942 }
9943 }
9944 let start_time = base_datetime - chrono::Duration::days(180);
9945 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
9946 add_result(&mut event_log, result);
9947
9948 if let Some(pb) = &pb {
9949 pb.inc(1);
9950 }
9951 }
9952
9953 for engagement in &audit.engagements {
9955 let engagement_id_str = engagement.engagement_id.to_string();
9956 let docs = AuditDocuments::new(
9957 &engagement_id_str,
9958 &engagement.client_entity_id,
9959 &ocpm_uuid_factory,
9960 )
9961 .with_workpapers(
9962 audit
9963 .workpapers
9964 .iter()
9965 .filter(|w| w.engagement_id == engagement.engagement_id)
9966 .take(10)
9967 .map(|w| w.workpaper_id.to_string())
9968 .collect::<Vec<_>>()
9969 .iter()
9970 .map(std::string::String::as_str)
9971 .collect(),
9972 )
9973 .with_evidence(
9974 audit
9975 .evidence
9976 .iter()
9977 .filter(|e| e.engagement_id == engagement.engagement_id)
9978 .take(10)
9979 .map(|e| e.evidence_id.to_string())
9980 .collect::<Vec<_>>()
9981 .iter()
9982 .map(std::string::String::as_str)
9983 .collect(),
9984 )
9985 .with_risks(
9986 audit
9987 .risk_assessments
9988 .iter()
9989 .filter(|r| r.engagement_id == engagement.engagement_id)
9990 .take(5)
9991 .map(|r| r.risk_id.to_string())
9992 .collect::<Vec<_>>()
9993 .iter()
9994 .map(std::string::String::as_str)
9995 .collect(),
9996 )
9997 .with_findings(
9998 audit
9999 .findings
10000 .iter()
10001 .filter(|f| f.engagement_id == engagement.engagement_id)
10002 .take(5)
10003 .map(|f| f.finding_id.to_string())
10004 .collect::<Vec<_>>()
10005 .iter()
10006 .map(std::string::String::as_str)
10007 .collect(),
10008 )
10009 .with_judgments(
10010 audit
10011 .judgments
10012 .iter()
10013 .filter(|j| j.engagement_id == engagement.engagement_id)
10014 .take(5)
10015 .map(|j| j.judgment_id.to_string())
10016 .collect::<Vec<_>>()
10017 .iter()
10018 .map(std::string::String::as_str)
10019 .collect(),
10020 );
10021 let start_time = base_datetime - chrono::Duration::days(120);
10022 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10023 add_result(&mut event_log, result);
10024
10025 if let Some(pb) = &pb {
10026 pb.inc(1);
10027 }
10028 }
10029
10030 for recon in &financial_reporting.bank_reconciliations {
10032 let docs = BankReconDocuments::new(
10033 &recon.reconciliation_id,
10034 &recon.bank_account_id,
10035 &recon.company_code,
10036 recon.bank_ending_balance,
10037 &ocpm_uuid_factory,
10038 )
10039 .with_statement_lines(
10040 recon
10041 .statement_lines
10042 .iter()
10043 .take(20)
10044 .map(|l| l.line_id.as_str())
10045 .collect(),
10046 )
10047 .with_reconciling_items(
10048 recon
10049 .reconciling_items
10050 .iter()
10051 .take(10)
10052 .map(|i| i.item_id.as_str())
10053 .collect(),
10054 );
10055 let start_time = base_datetime - chrono::Duration::days(30);
10056 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10057 add_result(&mut event_log, result);
10058
10059 if let Some(pb) = &pb {
10060 pb.inc(1);
10061 }
10062 }
10063
10064 event_log.compute_variants();
10066
10067 let summary = event_log.summary();
10068
10069 if let Some(pb) = pb {
10070 pb.finish_with_message(format!(
10071 "Generated {} OCPM events, {} objects",
10072 summary.event_count, summary.object_count
10073 ));
10074 }
10075
10076 Ok(OcpmSnapshot {
10077 event_count: summary.event_count,
10078 object_count: summary.object_count,
10079 case_count: summary.case_count,
10080 event_log: Some(event_log),
10081 })
10082 }
10083
10084 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10086 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10087
10088 let total_rate = if self.config.anomaly_injection.enabled {
10091 self.config.anomaly_injection.rates.total_rate
10092 } else if self.config.fraud.enabled {
10093 self.config.fraud.fraud_rate
10094 } else {
10095 0.02
10096 };
10097
10098 let fraud_rate = if self.config.anomaly_injection.enabled {
10099 self.config.anomaly_injection.rates.fraud_rate
10100 } else {
10101 AnomalyRateConfig::default().fraud_rate
10102 };
10103
10104 let error_rate = if self.config.anomaly_injection.enabled {
10105 self.config.anomaly_injection.rates.error_rate
10106 } else {
10107 AnomalyRateConfig::default().error_rate
10108 };
10109
10110 let process_issue_rate = if self.config.anomaly_injection.enabled {
10111 self.config.anomaly_injection.rates.process_rate
10112 } else {
10113 AnomalyRateConfig::default().process_issue_rate
10114 };
10115
10116 let anomaly_config = AnomalyInjectorConfig {
10117 rates: AnomalyRateConfig {
10118 total_rate,
10119 fraud_rate,
10120 error_rate,
10121 process_issue_rate,
10122 ..Default::default()
10123 },
10124 seed: self.seed + 5000,
10125 ..Default::default()
10126 };
10127
10128 let mut injector = AnomalyInjector::new(anomaly_config);
10129 let result = injector.process_entries(entries);
10130
10131 if let Some(pb) = &pb {
10132 pb.inc(entries.len() as u64);
10133 pb.finish_with_message("Anomaly injection complete");
10134 }
10135
10136 let mut by_type = HashMap::new();
10137 for label in &result.labels {
10138 *by_type
10139 .entry(format!("{:?}", label.anomaly_type))
10140 .or_insert(0) += 1;
10141 }
10142
10143 Ok(AnomalyLabels {
10144 labels: result.labels,
10145 summary: Some(result.summary),
10146 by_type,
10147 })
10148 }
10149
10150 fn validate_journal_entries(
10159 &mut self,
10160 entries: &[JournalEntry],
10161 ) -> SynthResult<BalanceValidationResult> {
10162 let clean_entries: Vec<&JournalEntry> = entries
10164 .iter()
10165 .filter(|e| {
10166 e.header
10167 .header_text
10168 .as_ref()
10169 .map(|t| !t.contains("[HUMAN_ERROR:"))
10170 .unwrap_or(true)
10171 })
10172 .collect();
10173
10174 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10175
10176 let config = BalanceTrackerConfig {
10178 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
10182 };
10183 let validation_currency = self
10184 .config
10185 .companies
10186 .first()
10187 .map(|c| c.currency.clone())
10188 .unwrap_or_else(|| "USD".to_string());
10189
10190 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10191
10192 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10194 let errors = tracker.apply_entries(&clean_refs);
10195
10196 if let Some(pb) = &pb {
10197 pb.inc(entries.len() as u64);
10198 }
10199
10200 let has_unbalanced = tracker
10203 .get_validation_errors()
10204 .iter()
10205 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10206
10207 let mut all_errors = errors;
10210 all_errors.extend(tracker.get_validation_errors().iter().cloned());
10211 let company_codes: Vec<String> = self
10212 .config
10213 .companies
10214 .iter()
10215 .map(|c| c.code.clone())
10216 .collect();
10217
10218 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10219 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10220 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10221
10222 for company_code in &company_codes {
10223 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10224 all_errors.push(e);
10225 }
10226 }
10227
10228 let stats = tracker.get_statistics();
10230
10231 let is_balanced = all_errors.is_empty();
10233
10234 if let Some(pb) = pb {
10235 let msg = if is_balanced {
10236 "Balance validation passed"
10237 } else {
10238 "Balance validation completed with errors"
10239 };
10240 pb.finish_with_message(msg);
10241 }
10242
10243 Ok(BalanceValidationResult {
10244 validated: true,
10245 is_balanced,
10246 entries_processed: stats.entries_processed,
10247 total_debits: stats.total_debits,
10248 total_credits: stats.total_credits,
10249 accounts_tracked: stats.accounts_tracked,
10250 companies_tracked: stats.companies_tracked,
10251 validation_errors: all_errors,
10252 has_unbalanced_entries: has_unbalanced,
10253 })
10254 }
10255
10256 fn inject_data_quality(
10261 &mut self,
10262 entries: &mut [JournalEntry],
10263 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10264 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10265
10266 let config = if self.config.data_quality.enabled {
10269 let dq = &self.config.data_quality;
10270 DataQualityConfig {
10271 enable_missing_values: dq.missing_values.enabled,
10272 missing_values: datasynth_generators::MissingValueConfig {
10273 global_rate: dq.effective_missing_rate(),
10274 ..Default::default()
10275 },
10276 enable_format_variations: dq.format_variations.enabled,
10277 format_variations: datasynth_generators::FormatVariationConfig {
10278 date_variation_rate: dq.format_variations.dates.rate,
10279 amount_variation_rate: dq.format_variations.amounts.rate,
10280 identifier_variation_rate: dq.format_variations.identifiers.rate,
10281 ..Default::default()
10282 },
10283 enable_duplicates: dq.duplicates.enabled,
10284 duplicates: datasynth_generators::DuplicateConfig {
10285 duplicate_rate: dq.effective_duplicate_rate(),
10286 ..Default::default()
10287 },
10288 enable_typos: dq.typos.enabled,
10289 typos: datasynth_generators::TypoConfig {
10290 char_error_rate: dq.effective_typo_rate(),
10291 ..Default::default()
10292 },
10293 enable_encoding_issues: dq.encoding_issues.enabled,
10294 encoding_issue_rate: dq.encoding_issues.rate,
10295 seed: self.seed.wrapping_add(77), track_statistics: true,
10297 }
10298 } else {
10299 DataQualityConfig::minimal()
10300 };
10301 let mut injector = DataQualityInjector::new(config);
10302
10303 injector.set_country_pack(self.primary_pack().clone());
10305
10306 let context = HashMap::new();
10308
10309 for entry in entries.iter_mut() {
10310 if let Some(text) = &entry.header.header_text {
10312 let processed = injector.process_text_field(
10313 "header_text",
10314 text,
10315 &entry.header.document_id.to_string(),
10316 &context,
10317 );
10318 match processed {
10319 Some(new_text) if new_text != *text => {
10320 entry.header.header_text = Some(new_text);
10321 }
10322 None => {
10323 entry.header.header_text = None; }
10325 _ => {}
10326 }
10327 }
10328
10329 if let Some(ref_text) = &entry.header.reference {
10331 let processed = injector.process_text_field(
10332 "reference",
10333 ref_text,
10334 &entry.header.document_id.to_string(),
10335 &context,
10336 );
10337 match processed {
10338 Some(new_text) if new_text != *ref_text => {
10339 entry.header.reference = Some(new_text);
10340 }
10341 None => {
10342 entry.header.reference = None;
10343 }
10344 _ => {}
10345 }
10346 }
10347
10348 let user_persona = entry.header.user_persona.clone();
10350 if let Some(processed) = injector.process_text_field(
10351 "user_persona",
10352 &user_persona,
10353 &entry.header.document_id.to_string(),
10354 &context,
10355 ) {
10356 if processed != user_persona {
10357 entry.header.user_persona = processed;
10358 }
10359 }
10360
10361 for line in &mut entry.lines {
10363 if let Some(ref text) = line.line_text {
10365 let processed = injector.process_text_field(
10366 "line_text",
10367 text,
10368 &entry.header.document_id.to_string(),
10369 &context,
10370 );
10371 match processed {
10372 Some(new_text) if new_text != *text => {
10373 line.line_text = Some(new_text);
10374 }
10375 None => {
10376 line.line_text = None;
10377 }
10378 _ => {}
10379 }
10380 }
10381
10382 if let Some(cc) = &line.cost_center {
10384 let processed = injector.process_text_field(
10385 "cost_center",
10386 cc,
10387 &entry.header.document_id.to_string(),
10388 &context,
10389 );
10390 match processed {
10391 Some(new_cc) if new_cc != *cc => {
10392 line.cost_center = Some(new_cc);
10393 }
10394 None => {
10395 line.cost_center = None;
10396 }
10397 _ => {}
10398 }
10399 }
10400 }
10401
10402 if let Some(pb) = &pb {
10403 pb.inc(1);
10404 }
10405 }
10406
10407 if let Some(pb) = pb {
10408 pb.finish_with_message("Data quality injection complete");
10409 }
10410
10411 let quality_issues = injector.issues().to_vec();
10412 Ok((injector.stats().clone(), quality_issues))
10413 }
10414
10415 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10426 let use_fsm = self
10428 .config
10429 .audit
10430 .fsm
10431 .as_ref()
10432 .map(|f| f.enabled)
10433 .unwrap_or(false);
10434
10435 if use_fsm {
10436 return self.generate_audit_data_with_fsm(entries);
10437 }
10438
10439 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10441 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10442 let fiscal_year = start_date.year() as u16;
10443 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10444
10445 let total_revenue: rust_decimal::Decimal = entries
10447 .iter()
10448 .flat_map(|e| e.lines.iter())
10449 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10450 .map(|l| l.credit_amount)
10451 .sum();
10452
10453 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10455
10456 let mut snapshot = AuditSnapshot::default();
10457
10458 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10460 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10461 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10462 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10463 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10464 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10465 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10466 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10467 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10468 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10469 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10470 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10471
10472 let accounts: Vec<String> = self
10474 .coa
10475 .as_ref()
10476 .map(|coa| {
10477 coa.get_postable_accounts()
10478 .iter()
10479 .map(|acc| acc.account_code().to_string())
10480 .collect()
10481 })
10482 .unwrap_or_default();
10483
10484 for (i, company) in self.config.companies.iter().enumerate() {
10486 let company_revenue = total_revenue
10488 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10489
10490 let engagements_for_company =
10492 self.phase_config.audit_engagements / self.config.companies.len().max(1);
10493 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10494 1
10495 } else {
10496 0
10497 };
10498
10499 for _eng_idx in 0..(engagements_for_company + extra) {
10500 let mut engagement = engagement_gen.generate_engagement(
10502 &company.code,
10503 &company.name,
10504 fiscal_year,
10505 period_end,
10506 company_revenue,
10507 None, );
10509
10510 if !self.master_data.employees.is_empty() {
10512 let emp_count = self.master_data.employees.len();
10513 let base = (i * 10 + _eng_idx) % emp_count;
10515 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10516 .employee_id
10517 .clone();
10518 engagement.engagement_manager_id = self.master_data.employees
10519 [(base + 1) % emp_count]
10520 .employee_id
10521 .clone();
10522 let real_team: Vec<String> = engagement
10523 .team_member_ids
10524 .iter()
10525 .enumerate()
10526 .map(|(j, _)| {
10527 self.master_data.employees[(base + 2 + j) % emp_count]
10528 .employee_id
10529 .clone()
10530 })
10531 .collect();
10532 engagement.team_member_ids = real_team;
10533 }
10534
10535 if let Some(pb) = &pb {
10536 pb.inc(1);
10537 }
10538
10539 let team_members: Vec<String> = engagement.team_member_ids.clone();
10541
10542 let workpapers =
10544 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10545
10546 for wp in &workpapers {
10547 if let Some(pb) = &pb {
10548 pb.inc(1);
10549 }
10550
10551 let evidence = evidence_gen.generate_evidence_for_workpaper(
10553 wp,
10554 &team_members,
10555 wp.preparer_date,
10556 );
10557
10558 for _ in &evidence {
10559 if let Some(pb) = &pb {
10560 pb.inc(1);
10561 }
10562 }
10563
10564 snapshot.evidence.extend(evidence);
10565 }
10566
10567 let risks =
10569 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10570
10571 for _ in &risks {
10572 if let Some(pb) = &pb {
10573 pb.inc(1);
10574 }
10575 }
10576 snapshot.risk_assessments.extend(risks);
10577
10578 let findings = finding_gen.generate_findings_for_engagement(
10580 &engagement,
10581 &workpapers,
10582 &team_members,
10583 );
10584
10585 for _ in &findings {
10586 if let Some(pb) = &pb {
10587 pb.inc(1);
10588 }
10589 }
10590 snapshot.findings.extend(findings);
10591
10592 let judgments =
10594 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10595
10596 for _ in &judgments {
10597 if let Some(pb) = &pb {
10598 pb.inc(1);
10599 }
10600 }
10601 snapshot.judgments.extend(judgments);
10602
10603 let (confs, resps) =
10605 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10606 snapshot.confirmations.extend(confs);
10607 snapshot.confirmation_responses.extend(resps);
10608
10609 let team_pairs: Vec<(String, String)> = team_members
10611 .iter()
10612 .map(|id| {
10613 let name = self
10614 .master_data
10615 .employees
10616 .iter()
10617 .find(|e| e.employee_id == *id)
10618 .map(|e| e.display_name.clone())
10619 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10620 (id.clone(), name)
10621 })
10622 .collect();
10623 for wp in &workpapers {
10624 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10625 snapshot.procedure_steps.extend(steps);
10626 }
10627
10628 for wp in &workpapers {
10630 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10631 snapshot.samples.push(sample);
10632 }
10633 }
10634
10635 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10637 snapshot.analytical_results.extend(analytical);
10638
10639 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10641 snapshot.ia_functions.push(ia_func);
10642 snapshot.ia_reports.extend(ia_reports);
10643
10644 let vendor_names: Vec<String> = self
10646 .master_data
10647 .vendors
10648 .iter()
10649 .map(|v| v.name.clone())
10650 .collect();
10651 let customer_names: Vec<String> = self
10652 .master_data
10653 .customers
10654 .iter()
10655 .map(|c| c.name.clone())
10656 .collect();
10657 let (parties, rp_txns) =
10658 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10659 snapshot.related_parties.extend(parties);
10660 snapshot.related_party_transactions.extend(rp_txns);
10661
10662 snapshot.workpapers.extend(workpapers);
10664
10665 {
10667 let scope_id = format!(
10668 "SCOPE-{}-{}",
10669 engagement.engagement_id.simple(),
10670 &engagement.client_entity_id
10671 );
10672 let scope = datasynth_core::models::audit::AuditScope::new(
10673 scope_id.clone(),
10674 engagement.engagement_id.to_string(),
10675 engagement.client_entity_id.clone(),
10676 engagement.materiality,
10677 );
10678 let mut eng = engagement;
10680 eng.scope_id = Some(scope_id);
10681 snapshot.audit_scopes.push(scope);
10682 snapshot.engagements.push(eng);
10683 }
10684 }
10685 }
10686
10687 if self.config.companies.len() > 1 {
10691 let group_materiality = snapshot
10694 .engagements
10695 .first()
10696 .map(|e| e.materiality)
10697 .unwrap_or_else(|| {
10698 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10699 total_revenue * pct
10700 });
10701
10702 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10703 let group_engagement_id = snapshot
10704 .engagements
10705 .first()
10706 .map(|e| e.engagement_id.to_string())
10707 .unwrap_or_else(|| "GROUP-ENG".to_string());
10708
10709 let component_snapshot = component_gen.generate(
10710 &self.config.companies,
10711 group_materiality,
10712 &group_engagement_id,
10713 period_end,
10714 );
10715
10716 snapshot.component_auditors = component_snapshot.component_auditors;
10717 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10718 snapshot.component_instructions = component_snapshot.component_instructions;
10719 snapshot.component_reports = component_snapshot.component_reports;
10720
10721 info!(
10722 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10723 snapshot.component_auditors.len(),
10724 snapshot.component_instructions.len(),
10725 snapshot.component_reports.len(),
10726 );
10727 }
10728
10729 {
10733 let applicable_framework = self
10734 .config
10735 .accounting_standards
10736 .framework
10737 .as_ref()
10738 .map(|f| format!("{f:?}"))
10739 .unwrap_or_else(|| "IFRS".to_string());
10740
10741 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10742 let entity_count = self.config.companies.len();
10743
10744 for engagement in &snapshot.engagements {
10745 let company = self
10746 .config
10747 .companies
10748 .iter()
10749 .find(|c| c.code == engagement.client_entity_id);
10750 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10751 let letter_date = engagement.planning_start;
10752 let letter = letter_gen.generate(
10753 &engagement.engagement_id.to_string(),
10754 &engagement.client_name,
10755 entity_count,
10756 engagement.period_end_date,
10757 currency,
10758 &applicable_framework,
10759 letter_date,
10760 );
10761 snapshot.engagement_letters.push(letter);
10762 }
10763
10764 info!(
10765 "ISA 210 engagement letters: {} generated",
10766 snapshot.engagement_letters.len()
10767 );
10768 }
10769
10770 {
10774 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10775 let entity_codes: Vec<String> = self
10776 .config
10777 .companies
10778 .iter()
10779 .map(|c| c.code.clone())
10780 .collect();
10781 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10782 info!(
10783 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10784 subsequent.len(),
10785 subsequent
10786 .iter()
10787 .filter(|e| matches!(
10788 e.classification,
10789 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10790 ))
10791 .count(),
10792 subsequent
10793 .iter()
10794 .filter(|e| matches!(
10795 e.classification,
10796 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10797 ))
10798 .count(),
10799 );
10800 snapshot.subsequent_events = subsequent;
10801 }
10802
10803 {
10807 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10808 let entity_codes: Vec<String> = self
10809 .config
10810 .companies
10811 .iter()
10812 .map(|c| c.code.clone())
10813 .collect();
10814 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10815 info!(
10816 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10817 soc_snapshot.service_organizations.len(),
10818 soc_snapshot.soc_reports.len(),
10819 soc_snapshot.user_entity_controls.len(),
10820 );
10821 snapshot.service_organizations = soc_snapshot.service_organizations;
10822 snapshot.soc_reports = soc_snapshot.soc_reports;
10823 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10824 }
10825
10826 {
10830 use datasynth_generators::audit::going_concern_generator::{
10831 GoingConcernGenerator, GoingConcernInput,
10832 };
10833 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10834 let entity_codes: Vec<String> = self
10835 .config
10836 .companies
10837 .iter()
10838 .map(|c| c.code.clone())
10839 .collect();
10840 let assessment_date = period_end + chrono::Duration::days(75);
10842 let period_label = format!("FY{}", period_end.year());
10843
10844 let gc_inputs: Vec<GoingConcernInput> = self
10855 .config
10856 .companies
10857 .iter()
10858 .map(|company| {
10859 let code = &company.code;
10860 let mut revenue = rust_decimal::Decimal::ZERO;
10861 let mut expenses = rust_decimal::Decimal::ZERO;
10862 let mut current_assets = rust_decimal::Decimal::ZERO;
10863 let mut current_liabs = rust_decimal::Decimal::ZERO;
10864 let mut total_debt = rust_decimal::Decimal::ZERO;
10865
10866 for je in entries.iter().filter(|je| &je.header.company_code == code) {
10867 for line in &je.lines {
10868 let acct = line.gl_account.as_str();
10869 let net = line.debit_amount - line.credit_amount;
10870 if acct.starts_with('4') {
10871 revenue -= net;
10873 } else if acct.starts_with('6') {
10874 expenses += net;
10876 }
10877 if acct.starts_with('1') {
10879 if let Ok(n) = acct.parse::<u32>() {
10881 if (1000..=1499).contains(&n) {
10882 current_assets += net;
10883 }
10884 }
10885 } else if acct.starts_with('2') {
10886 if let Ok(n) = acct.parse::<u32>() {
10887 if (2000..=2499).contains(&n) {
10888 current_liabs -= net; } else if (2500..=2999).contains(&n) {
10891 total_debt -= net;
10893 }
10894 }
10895 }
10896 }
10897 }
10898
10899 let net_income = revenue - expenses;
10900 let working_capital = current_assets - current_liabs;
10901 let operating_cash_flow = net_income;
10904
10905 GoingConcernInput {
10906 entity_code: code.clone(),
10907 net_income,
10908 working_capital,
10909 operating_cash_flow,
10910 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
10911 assessment_date,
10912 }
10913 })
10914 .collect();
10915
10916 let assessments = if gc_inputs.is_empty() {
10917 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
10918 } else {
10919 gc_gen.generate_for_entities_with_inputs(
10920 &entity_codes,
10921 &gc_inputs,
10922 assessment_date,
10923 &period_label,
10924 )
10925 };
10926 info!(
10927 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
10928 assessments.len(),
10929 assessments.iter().filter(|a| matches!(
10930 a.auditor_conclusion,
10931 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
10932 )).count(),
10933 assessments.iter().filter(|a| matches!(
10934 a.auditor_conclusion,
10935 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
10936 )).count(),
10937 assessments.iter().filter(|a| matches!(
10938 a.auditor_conclusion,
10939 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
10940 )).count(),
10941 );
10942 snapshot.going_concern_assessments = assessments;
10943 }
10944
10945 {
10949 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
10950 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
10951 let entity_codes: Vec<String> = self
10952 .config
10953 .companies
10954 .iter()
10955 .map(|c| c.code.clone())
10956 .collect();
10957 let estimates = est_gen.generate_for_entities(&entity_codes);
10958 info!(
10959 "ISA 540 accounting estimates: {} estimates across {} entities \
10960 ({} with retrospective reviews, {} with auditor point estimates)",
10961 estimates.len(),
10962 entity_codes.len(),
10963 estimates
10964 .iter()
10965 .filter(|e| e.retrospective_review.is_some())
10966 .count(),
10967 estimates
10968 .iter()
10969 .filter(|e| e.auditor_point_estimate.is_some())
10970 .count(),
10971 );
10972 snapshot.accounting_estimates = estimates;
10973 }
10974
10975 {
10979 use datasynth_generators::audit::audit_opinion_generator::{
10980 AuditOpinionGenerator, AuditOpinionInput,
10981 };
10982
10983 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
10984
10985 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
10987 .engagements
10988 .iter()
10989 .map(|eng| {
10990 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10992 .findings
10993 .iter()
10994 .filter(|f| f.engagement_id == eng.engagement_id)
10995 .cloned()
10996 .collect();
10997
10998 let gc = snapshot
11000 .going_concern_assessments
11001 .iter()
11002 .find(|g| g.entity_code == eng.client_entity_id)
11003 .cloned();
11004
11005 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11007 snapshot.component_reports.clone();
11008
11009 let auditor = self
11010 .master_data
11011 .employees
11012 .first()
11013 .map(|e| e.display_name.clone())
11014 .unwrap_or_else(|| "Global Audit LLP".into());
11015
11016 let partner = self
11017 .master_data
11018 .employees
11019 .get(1)
11020 .map(|e| e.display_name.clone())
11021 .unwrap_or_else(|| eng.engagement_partner_id.clone());
11022
11023 AuditOpinionInput {
11024 entity_code: eng.client_entity_id.clone(),
11025 entity_name: eng.client_name.clone(),
11026 engagement_id: eng.engagement_id,
11027 period_end: eng.period_end_date,
11028 findings: eng_findings,
11029 going_concern: gc,
11030 component_reports: comp_reports,
11031 is_us_listed: {
11033 let fw = &self.config.audit_standards.isa_compliance.framework;
11034 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11035 },
11036 auditor_name: auditor,
11037 engagement_partner: partner,
11038 }
11039 })
11040 .collect();
11041
11042 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11043
11044 for go in &generated_opinions {
11045 snapshot
11046 .key_audit_matters
11047 .extend(go.key_audit_matters.clone());
11048 }
11049 snapshot.audit_opinions = generated_opinions
11050 .into_iter()
11051 .map(|go| go.opinion)
11052 .collect();
11053
11054 info!(
11055 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11056 snapshot.audit_opinions.len(),
11057 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11058 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11059 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11060 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11061 );
11062 }
11063
11064 {
11068 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11069
11070 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11071
11072 for (i, company) in self.config.companies.iter().enumerate() {
11073 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11075 .engagements
11076 .iter()
11077 .filter(|e| e.client_entity_id == company.code)
11078 .map(|e| e.engagement_id)
11079 .collect();
11080
11081 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11082 .findings
11083 .iter()
11084 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11085 .cloned()
11086 .collect();
11087
11088 let emp_count = self.master_data.employees.len();
11090 let ceo_name = if emp_count > 0 {
11091 self.master_data.employees[i % emp_count]
11092 .display_name
11093 .clone()
11094 } else {
11095 format!("CEO of {}", company.name)
11096 };
11097 let cfo_name = if emp_count > 1 {
11098 self.master_data.employees[(i + 1) % emp_count]
11099 .display_name
11100 .clone()
11101 } else {
11102 format!("CFO of {}", company.name)
11103 };
11104
11105 let materiality = snapshot
11107 .engagements
11108 .iter()
11109 .find(|e| e.client_entity_id == company.code)
11110 .map(|e| e.materiality)
11111 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11112
11113 let input = SoxGeneratorInput {
11114 company_code: company.code.clone(),
11115 company_name: company.name.clone(),
11116 fiscal_year,
11117 period_end,
11118 findings: company_findings,
11119 ceo_name,
11120 cfo_name,
11121 materiality_threshold: materiality,
11122 revenue_percent: rust_decimal::Decimal::from(100),
11123 assets_percent: rust_decimal::Decimal::from(100),
11124 significant_accounts: vec![
11125 "Revenue".into(),
11126 "Accounts Receivable".into(),
11127 "Inventory".into(),
11128 "Fixed Assets".into(),
11129 "Accounts Payable".into(),
11130 ],
11131 };
11132
11133 let (certs, assessment) = sox_gen.generate(&input);
11134 snapshot.sox_302_certifications.extend(certs);
11135 snapshot.sox_404_assessments.push(assessment);
11136 }
11137
11138 info!(
11139 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11140 snapshot.sox_302_certifications.len(),
11141 snapshot.sox_404_assessments.len(),
11142 snapshot
11143 .sox_404_assessments
11144 .iter()
11145 .filter(|a| a.icfr_effective)
11146 .count(),
11147 snapshot
11148 .sox_404_assessments
11149 .iter()
11150 .filter(|a| !a.icfr_effective)
11151 .count(),
11152 );
11153 }
11154
11155 {
11159 use datasynth_generators::audit::materiality_generator::{
11160 MaterialityGenerator, MaterialityInput,
11161 };
11162
11163 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11164
11165 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11169
11170 for company in &self.config.companies {
11171 let company_code = company.code.clone();
11172
11173 let company_revenue: rust_decimal::Decimal = entries
11175 .iter()
11176 .filter(|e| e.company_code() == company_code)
11177 .flat_map(|e| e.lines.iter())
11178 .filter(|l| l.account_code.starts_with('4'))
11179 .map(|l| l.credit_amount)
11180 .sum();
11181
11182 let total_assets: rust_decimal::Decimal = entries
11184 .iter()
11185 .filter(|e| e.company_code() == company_code)
11186 .flat_map(|e| e.lines.iter())
11187 .filter(|l| l.account_code.starts_with('1'))
11188 .map(|l| l.debit_amount)
11189 .sum();
11190
11191 let total_expenses: rust_decimal::Decimal = entries
11193 .iter()
11194 .filter(|e| e.company_code() == company_code)
11195 .flat_map(|e| e.lines.iter())
11196 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11197 .map(|l| l.debit_amount)
11198 .sum();
11199
11200 let equity: rust_decimal::Decimal = entries
11202 .iter()
11203 .filter(|e| e.company_code() == company_code)
11204 .flat_map(|e| e.lines.iter())
11205 .filter(|l| l.account_code.starts_with('3'))
11206 .map(|l| l.credit_amount)
11207 .sum();
11208
11209 let pretax_income = company_revenue - total_expenses;
11210
11211 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11213 let w = rust_decimal::Decimal::try_from(company.volume_weight)
11214 .unwrap_or(rust_decimal::Decimal::ONE);
11215 (
11216 total_revenue * w,
11217 total_revenue * w * rust_decimal::Decimal::from(3),
11218 total_revenue * w * rust_decimal::Decimal::new(1, 1),
11219 total_revenue * w * rust_decimal::Decimal::from(2),
11220 )
11221 } else {
11222 (company_revenue, total_assets, pretax_income, equity)
11223 };
11224
11225 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
11228 entity_code: company_code,
11229 period: format!("FY{}", fiscal_year),
11230 revenue: rev,
11231 pretax_income: pti,
11232 total_assets: assets,
11233 equity: eq,
11234 gross_profit,
11235 });
11236 }
11237
11238 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11239
11240 info!(
11241 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11242 {} total assets, {} equity benchmarks)",
11243 snapshot.materiality_calculations.len(),
11244 snapshot
11245 .materiality_calculations
11246 .iter()
11247 .filter(|m| matches!(
11248 m.benchmark,
11249 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11250 ))
11251 .count(),
11252 snapshot
11253 .materiality_calculations
11254 .iter()
11255 .filter(|m| matches!(
11256 m.benchmark,
11257 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11258 ))
11259 .count(),
11260 snapshot
11261 .materiality_calculations
11262 .iter()
11263 .filter(|m| matches!(
11264 m.benchmark,
11265 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11266 ))
11267 .count(),
11268 snapshot
11269 .materiality_calculations
11270 .iter()
11271 .filter(|m| matches!(
11272 m.benchmark,
11273 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11274 ))
11275 .count(),
11276 );
11277 }
11278
11279 {
11283 use datasynth_generators::audit::cra_generator::CraGenerator;
11284
11285 let mut cra_gen = CraGenerator::new(self.seed + 8315);
11286
11287 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11289 .audit_scopes
11290 .iter()
11291 .map(|s| (s.entity_code.clone(), s.id.clone()))
11292 .collect();
11293
11294 for company in &self.config.companies {
11295 let cras = cra_gen.generate_for_entity(&company.code, None);
11296 let scope_id = entity_scope_map.get(&company.code).cloned();
11297 let cras_with_scope: Vec<_> = cras
11298 .into_iter()
11299 .map(|mut cra| {
11300 cra.scope_id = scope_id.clone();
11301 cra
11302 })
11303 .collect();
11304 snapshot.combined_risk_assessments.extend(cras_with_scope);
11305 }
11306
11307 let significant_count = snapshot
11308 .combined_risk_assessments
11309 .iter()
11310 .filter(|c| c.significant_risk)
11311 .count();
11312 let high_cra_count = snapshot
11313 .combined_risk_assessments
11314 .iter()
11315 .filter(|c| {
11316 matches!(
11317 c.combined_risk,
11318 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11319 )
11320 })
11321 .count();
11322
11323 info!(
11324 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11325 snapshot.combined_risk_assessments.len(),
11326 significant_count,
11327 high_cra_count,
11328 );
11329 }
11330
11331 {
11335 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11336
11337 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11338
11339 for company in &self.config.companies {
11341 let entity_code = company.code.clone();
11342
11343 let tolerable_error = snapshot
11345 .materiality_calculations
11346 .iter()
11347 .find(|m| m.entity_code == entity_code)
11348 .map(|m| m.tolerable_error);
11349
11350 let entity_cras: Vec<_> = snapshot
11352 .combined_risk_assessments
11353 .iter()
11354 .filter(|c| c.entity_code == entity_code)
11355 .cloned()
11356 .collect();
11357
11358 if !entity_cras.is_empty() {
11359 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11360 snapshot.sampling_plans.extend(plans);
11361 snapshot.sampled_items.extend(items);
11362 }
11363 }
11364
11365 let misstatement_count = snapshot
11366 .sampled_items
11367 .iter()
11368 .filter(|i| i.misstatement_found)
11369 .count();
11370
11371 info!(
11372 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11373 snapshot.sampling_plans.len(),
11374 snapshot.sampled_items.len(),
11375 misstatement_count,
11376 );
11377 }
11378
11379 {
11383 use datasynth_generators::audit::scots_generator::{
11384 ScotsGenerator, ScotsGeneratorConfig,
11385 };
11386
11387 let ic_enabled = self.config.intercompany.enabled;
11388
11389 let config = ScotsGeneratorConfig {
11390 intercompany_enabled: ic_enabled,
11391 ..ScotsGeneratorConfig::default()
11392 };
11393 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11394
11395 for company in &self.config.companies {
11396 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11397 snapshot
11398 .significant_transaction_classes
11399 .extend(entity_scots);
11400 }
11401
11402 let estimation_count = snapshot
11403 .significant_transaction_classes
11404 .iter()
11405 .filter(|s| {
11406 matches!(
11407 s.transaction_type,
11408 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11409 )
11410 })
11411 .count();
11412
11413 info!(
11414 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11415 snapshot.significant_transaction_classes.len(),
11416 estimation_count,
11417 );
11418 }
11419
11420 {
11424 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11425
11426 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11427 let entity_codes: Vec<String> = self
11428 .config
11429 .companies
11430 .iter()
11431 .map(|c| c.code.clone())
11432 .collect();
11433 let unusual_flags =
11434 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11435 info!(
11436 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11437 unusual_flags.len(),
11438 unusual_flags
11439 .iter()
11440 .filter(|f| matches!(
11441 f.severity,
11442 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11443 ))
11444 .count(),
11445 unusual_flags
11446 .iter()
11447 .filter(|f| matches!(
11448 f.severity,
11449 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11450 ))
11451 .count(),
11452 unusual_flags
11453 .iter()
11454 .filter(|f| matches!(
11455 f.severity,
11456 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11457 ))
11458 .count(),
11459 );
11460 snapshot.unusual_items = unusual_flags;
11461 }
11462
11463 {
11467 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11468
11469 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11470 let entity_codes: Vec<String> = self
11471 .config
11472 .companies
11473 .iter()
11474 .map(|c| c.code.clone())
11475 .collect();
11476 let current_period_label = format!("FY{fiscal_year}");
11477 let prior_period_label = format!("FY{}", fiscal_year - 1);
11478 let analytical_rels = ar_gen.generate_for_entities(
11479 &entity_codes,
11480 entries,
11481 ¤t_period_label,
11482 &prior_period_label,
11483 );
11484 let out_of_range = analytical_rels
11485 .iter()
11486 .filter(|r| !r.within_expected_range)
11487 .count();
11488 info!(
11489 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11490 analytical_rels.len(),
11491 out_of_range,
11492 );
11493 snapshot.analytical_relationships = analytical_rels;
11494 }
11495
11496 if let Some(pb) = pb {
11497 pb.finish_with_message(format!(
11498 "Audit data: {} engagements, {} workpapers, {} evidence, \
11499 {} confirmations, {} procedure steps, {} samples, \
11500 {} analytical, {} IA funcs, {} related parties, \
11501 {} component auditors, {} letters, {} subsequent events, \
11502 {} service orgs, {} going concern, {} accounting estimates, \
11503 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11504 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11505 {} unusual items, {} analytical relationships",
11506 snapshot.engagements.len(),
11507 snapshot.workpapers.len(),
11508 snapshot.evidence.len(),
11509 snapshot.confirmations.len(),
11510 snapshot.procedure_steps.len(),
11511 snapshot.samples.len(),
11512 snapshot.analytical_results.len(),
11513 snapshot.ia_functions.len(),
11514 snapshot.related_parties.len(),
11515 snapshot.component_auditors.len(),
11516 snapshot.engagement_letters.len(),
11517 snapshot.subsequent_events.len(),
11518 snapshot.service_organizations.len(),
11519 snapshot.going_concern_assessments.len(),
11520 snapshot.accounting_estimates.len(),
11521 snapshot.audit_opinions.len(),
11522 snapshot.key_audit_matters.len(),
11523 snapshot.sox_302_certifications.len(),
11524 snapshot.sox_404_assessments.len(),
11525 snapshot.materiality_calculations.len(),
11526 snapshot.combined_risk_assessments.len(),
11527 snapshot.sampling_plans.len(),
11528 snapshot.significant_transaction_classes.len(),
11529 snapshot.unusual_items.len(),
11530 snapshot.analytical_relationships.len(),
11531 ));
11532 }
11533
11534 {
11541 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11542 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11543 debug!(
11544 "PCAOB-ISA mappings generated: {} mappings",
11545 snapshot.isa_pcaob_mappings.len()
11546 );
11547 }
11548
11549 {
11556 use datasynth_standards::audit::isa_reference::IsaStandard;
11557 snapshot.isa_mappings = IsaStandard::standard_entries();
11558 debug!(
11559 "ISA standard entries generated: {} standards",
11560 snapshot.isa_mappings.len()
11561 );
11562 }
11563
11564 {
11567 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11568 .engagements
11569 .iter()
11570 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11571 .collect();
11572
11573 for rpt in &mut snapshot.related_party_transactions {
11574 if rpt.journal_entry_id.is_some() {
11575 continue; }
11577 let entity = engagement_by_id
11578 .get(&rpt.engagement_id.to_string())
11579 .copied()
11580 .unwrap_or("");
11581
11582 let best_je = entries
11584 .iter()
11585 .filter(|je| je.header.company_code == entity)
11586 .min_by_key(|je| {
11587 (je.header.posting_date - rpt.transaction_date)
11588 .num_days()
11589 .abs()
11590 });
11591
11592 if let Some(je) = best_je {
11593 rpt.journal_entry_id = Some(je.header.document_id.to_string());
11594 }
11595 }
11596
11597 let linked = snapshot
11598 .related_party_transactions
11599 .iter()
11600 .filter(|t| t.journal_entry_id.is_some())
11601 .count();
11602 debug!(
11603 "Linked {}/{} related party transactions to journal entries",
11604 linked,
11605 snapshot.related_party_transactions.len()
11606 );
11607 }
11608
11609 Ok(snapshot)
11610 }
11611
11612 fn generate_audit_data_with_fsm(
11619 &mut self,
11620 entries: &[JournalEntry],
11621 ) -> SynthResult<AuditSnapshot> {
11622 use datasynth_audit_fsm::{
11623 context::EngagementContext,
11624 engine::AuditFsmEngine,
11625 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11626 };
11627 use rand::SeedableRng;
11628 use rand_chacha::ChaCha8Rng;
11629
11630 info!("Audit FSM: generating audit data via FSM engine");
11631
11632 let fsm_config = self
11633 .config
11634 .audit
11635 .fsm
11636 .as_ref()
11637 .expect("FSM config must be present when FSM is enabled");
11638
11639 let bwp = match fsm_config.blueprint.as_str() {
11641 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11642 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11643 _ => {
11644 warn!(
11645 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11646 fsm_config.blueprint
11647 );
11648 BlueprintWithPreconditions::load_builtin_fsa()
11649 }
11650 }
11651 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11652
11653 let overlay = match fsm_config.overlay.as_str() {
11655 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11656 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11657 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11658 _ => {
11659 warn!(
11660 "Unknown FSM overlay '{}', falling back to builtin:default",
11661 fsm_config.overlay
11662 );
11663 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11664 }
11665 }
11666 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11667
11668 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11670 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11671 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11672
11673 let company = self.config.companies.first();
11675 let company_code = company
11676 .map(|c| c.code.clone())
11677 .unwrap_or_else(|| "UNKNOWN".to_string());
11678 let company_name = company
11679 .map(|c| c.name.clone())
11680 .unwrap_or_else(|| "Unknown Company".to_string());
11681 let currency = company
11682 .map(|c| c.currency.clone())
11683 .unwrap_or_else(|| "USD".to_string());
11684
11685 let entity_entries: Vec<_> = entries
11687 .iter()
11688 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11689 .cloned()
11690 .collect();
11691 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
11695 .iter()
11696 .flat_map(|e| e.lines.iter())
11697 .filter(|l| l.account_code.starts_with('4'))
11698 .map(|l| l.credit_amount - l.debit_amount)
11699 .sum();
11700
11701 let total_assets: rust_decimal::Decimal = entries
11702 .iter()
11703 .flat_map(|e| e.lines.iter())
11704 .filter(|l| l.account_code.starts_with('1'))
11705 .map(|l| l.debit_amount - l.credit_amount)
11706 .sum();
11707
11708 let total_expenses: rust_decimal::Decimal = entries
11709 .iter()
11710 .flat_map(|e| e.lines.iter())
11711 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11712 .map(|l| l.debit_amount)
11713 .sum();
11714
11715 let equity: rust_decimal::Decimal = entries
11716 .iter()
11717 .flat_map(|e| e.lines.iter())
11718 .filter(|l| l.account_code.starts_with('3'))
11719 .map(|l| l.credit_amount - l.debit_amount)
11720 .sum();
11721
11722 let total_debt: rust_decimal::Decimal = entries
11723 .iter()
11724 .flat_map(|e| e.lines.iter())
11725 .filter(|l| l.account_code.starts_with('2'))
11726 .map(|l| l.credit_amount - l.debit_amount)
11727 .sum();
11728
11729 let pretax_income = total_revenue - total_expenses;
11730
11731 let cogs: rust_decimal::Decimal = entries
11732 .iter()
11733 .flat_map(|e| e.lines.iter())
11734 .filter(|l| l.account_code.starts_with('5'))
11735 .map(|l| l.debit_amount)
11736 .sum();
11737 let gross_profit = total_revenue - cogs;
11738
11739 let current_assets: rust_decimal::Decimal = entries
11740 .iter()
11741 .flat_map(|e| e.lines.iter())
11742 .filter(|l| {
11743 l.account_code.starts_with("10")
11744 || l.account_code.starts_with("11")
11745 || l.account_code.starts_with("12")
11746 || l.account_code.starts_with("13")
11747 })
11748 .map(|l| l.debit_amount - l.credit_amount)
11749 .sum();
11750 let current_liabilities: rust_decimal::Decimal = entries
11751 .iter()
11752 .flat_map(|e| e.lines.iter())
11753 .filter(|l| {
11754 l.account_code.starts_with("20")
11755 || l.account_code.starts_with("21")
11756 || l.account_code.starts_with("22")
11757 })
11758 .map(|l| l.credit_amount - l.debit_amount)
11759 .sum();
11760 let working_capital = current_assets - current_liabilities;
11761
11762 let depreciation: rust_decimal::Decimal = entries
11763 .iter()
11764 .flat_map(|e| e.lines.iter())
11765 .filter(|l| l.account_code.starts_with("60"))
11766 .map(|l| l.debit_amount)
11767 .sum();
11768 let operating_cash_flow = pretax_income + depreciation;
11769
11770 let accounts: Vec<String> = self
11772 .coa
11773 .as_ref()
11774 .map(|coa| {
11775 coa.get_postable_accounts()
11776 .iter()
11777 .map(|acc| acc.account_code().to_string())
11778 .collect()
11779 })
11780 .unwrap_or_default();
11781
11782 let team_member_ids: Vec<String> = self
11784 .master_data
11785 .employees
11786 .iter()
11787 .take(8) .map(|e| e.employee_id.clone())
11789 .collect();
11790 let team_member_pairs: Vec<(String, String)> = self
11791 .master_data
11792 .employees
11793 .iter()
11794 .take(8)
11795 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11796 .collect();
11797
11798 let vendor_names: Vec<String> = self
11799 .master_data
11800 .vendors
11801 .iter()
11802 .map(|v| v.name.clone())
11803 .collect();
11804 let customer_names: Vec<String> = self
11805 .master_data
11806 .customers
11807 .iter()
11808 .map(|c| c.name.clone())
11809 .collect();
11810
11811 let entity_codes: Vec<String> = self
11812 .config
11813 .companies
11814 .iter()
11815 .map(|c| c.code.clone())
11816 .collect();
11817
11818 let journal_entry_ids: Vec<String> = entries
11820 .iter()
11821 .take(50)
11822 .map(|e| e.header.document_id.to_string())
11823 .collect();
11824
11825 let mut account_balances = std::collections::HashMap::<String, f64>::new();
11827 for entry in entries {
11828 for line in &entry.lines {
11829 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
11830 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
11831 *account_balances
11832 .entry(line.account_code.clone())
11833 .or_insert(0.0) += debit_f64 - credit_f64;
11834 }
11835 }
11836
11837 let control_ids: Vec<String> = Vec::new();
11842 let anomaly_refs: Vec<String> = Vec::new();
11843
11844 let mut context = EngagementContext {
11845 company_code,
11846 company_name,
11847 fiscal_year: start_date.year(),
11848 currency,
11849 total_revenue,
11850 total_assets,
11851 engagement_start: start_date,
11852 report_date: period_end,
11853 pretax_income,
11854 equity,
11855 gross_profit,
11856 working_capital,
11857 operating_cash_flow,
11858 total_debt,
11859 team_member_ids,
11860 team_member_pairs,
11861 accounts,
11862 vendor_names,
11863 customer_names,
11864 journal_entry_ids,
11865 account_balances,
11866 control_ids,
11867 anomaly_refs,
11868 journal_entries: entries.to_vec(),
11869 is_us_listed: false,
11870 entity_codes,
11871 auditor_firm_name: "DataSynth Audit LLP".into(),
11872 accounting_framework: self
11873 .config
11874 .accounting_standards
11875 .framework
11876 .map(|f| match f {
11877 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
11878 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
11879 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
11880 "French GAAP"
11881 }
11882 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
11883 "German GAAP"
11884 }
11885 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
11886 "Dual Reporting"
11887 }
11888 })
11889 .unwrap_or("IFRS")
11890 .into(),
11891 };
11892
11893 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
11895 let rng = ChaCha8Rng::seed_from_u64(seed);
11896 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
11897
11898 let mut result = engine
11899 .run_engagement(&context)
11900 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
11901
11902 info!(
11903 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
11904 {} phases completed, duration {:.1}h",
11905 result.event_log.len(),
11906 result.artifacts.total_artifacts(),
11907 result.anomalies.len(),
11908 result.phases_completed.len(),
11909 result.total_duration_hours,
11910 );
11911
11912 let tb_entity = context.company_code.clone();
11914 let tb_fy = context.fiscal_year;
11915 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
11916 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
11917 entries,
11918 &tb_entity,
11919 tb_fy,
11920 self.coa.as_ref().map(|c| c.as_ref()),
11921 );
11922
11923 let bag = result.artifacts;
11925 let mut snapshot = AuditSnapshot {
11926 engagements: bag.engagements,
11927 engagement_letters: bag.engagement_letters,
11928 materiality_calculations: bag.materiality_calculations,
11929 risk_assessments: bag.risk_assessments,
11930 combined_risk_assessments: bag.combined_risk_assessments,
11931 workpapers: bag.workpapers,
11932 evidence: bag.evidence,
11933 findings: bag.findings,
11934 judgments: bag.judgments,
11935 sampling_plans: bag.sampling_plans,
11936 sampled_items: bag.sampled_items,
11937 analytical_results: bag.analytical_results,
11938 going_concern_assessments: bag.going_concern_assessments,
11939 subsequent_events: bag.subsequent_events,
11940 audit_opinions: bag.audit_opinions,
11941 key_audit_matters: bag.key_audit_matters,
11942 procedure_steps: bag.procedure_steps,
11943 samples: bag.samples,
11944 confirmations: bag.confirmations,
11945 confirmation_responses: bag.confirmation_responses,
11946 fsm_event_trail: Some(result.event_log),
11948 ..Default::default()
11950 };
11951
11952 {
11954 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11955 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11956 }
11957 {
11958 use datasynth_standards::audit::isa_reference::IsaStandard;
11959 snapshot.isa_mappings = IsaStandard::standard_entries();
11960 }
11961
11962 info!(
11963 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
11964 {} risk assessments, {} findings, {} materiality calcs",
11965 snapshot.engagements.len(),
11966 snapshot.workpapers.len(),
11967 snapshot.evidence.len(),
11968 snapshot.risk_assessments.len(),
11969 snapshot.findings.len(),
11970 snapshot.materiality_calculations.len(),
11971 );
11972
11973 Ok(snapshot)
11974 }
11975
11976 fn export_graphs(
11983 &mut self,
11984 entries: &[JournalEntry],
11985 _coa: &Arc<ChartOfAccounts>,
11986 stats: &mut EnhancedGenerationStatistics,
11987 ) -> SynthResult<GraphExportSnapshot> {
11988 let pb = self.create_progress_bar(100, "Exporting Graphs");
11989
11990 let mut snapshot = GraphExportSnapshot::default();
11991
11992 let output_dir = self
11994 .output_path
11995 .clone()
11996 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11997 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11998
11999 for graph_type in &self.config.graph_export.graph_types {
12001 if let Some(pb) = &pb {
12002 pb.inc(10);
12003 }
12004
12005 let graph_config = TransactionGraphConfig {
12007 include_vendors: false,
12008 include_customers: false,
12009 create_debit_credit_edges: true,
12010 include_document_nodes: graph_type.include_document_nodes,
12011 min_edge_weight: graph_type.min_edge_weight,
12012 aggregate_parallel_edges: graph_type.aggregate_edges,
12013 framework: None,
12014 };
12015
12016 let mut builder = TransactionGraphBuilder::new(graph_config);
12017 builder.add_journal_entries(entries);
12018 let graph = builder.build();
12019
12020 stats.graph_node_count += graph.node_count();
12022 stats.graph_edge_count += graph.edge_count();
12023
12024 if let Some(pb) = &pb {
12025 pb.inc(40);
12026 }
12027
12028 for format in &self.config.graph_export.formats {
12030 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12031
12032 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12034 warn!("Failed to create graph output directory: {}", e);
12035 continue;
12036 }
12037
12038 match format {
12039 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12040 let pyg_config = PyGExportConfig {
12041 common: datasynth_graph::CommonExportConfig {
12042 export_node_features: true,
12043 export_edge_features: true,
12044 export_node_labels: true,
12045 export_edge_labels: true,
12046 export_masks: true,
12047 train_ratio: self.config.graph_export.train_ratio,
12048 val_ratio: self.config.graph_export.validation_ratio,
12049 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12050 },
12051 one_hot_categoricals: false,
12052 };
12053
12054 let exporter = PyGExporter::new(pyg_config);
12055 match exporter.export(&graph, &format_dir) {
12056 Ok(metadata) => {
12057 snapshot.exports.insert(
12058 format!("{}_{}", graph_type.name, "pytorch_geometric"),
12059 GraphExportInfo {
12060 name: graph_type.name.clone(),
12061 format: "pytorch_geometric".to_string(),
12062 output_path: format_dir.clone(),
12063 node_count: metadata.num_nodes,
12064 edge_count: metadata.num_edges,
12065 },
12066 );
12067 snapshot.graph_count += 1;
12068 }
12069 Err(e) => {
12070 warn!("Failed to export PyTorch Geometric graph: {}", e);
12071 }
12072 }
12073 }
12074 datasynth_config::schema::GraphExportFormat::Neo4j => {
12075 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12076
12077 let neo4j_config = Neo4jExportConfig {
12078 export_node_properties: true,
12079 export_edge_properties: true,
12080 export_features: true,
12081 generate_cypher: true,
12082 generate_admin_import: true,
12083 database_name: "synth".to_string(),
12084 cypher_batch_size: 1000,
12085 };
12086
12087 let exporter = Neo4jExporter::new(neo4j_config);
12088 match exporter.export(&graph, &format_dir) {
12089 Ok(metadata) => {
12090 snapshot.exports.insert(
12091 format!("{}_{}", graph_type.name, "neo4j"),
12092 GraphExportInfo {
12093 name: graph_type.name.clone(),
12094 format: "neo4j".to_string(),
12095 output_path: format_dir.clone(),
12096 node_count: metadata.num_nodes,
12097 edge_count: metadata.num_edges,
12098 },
12099 );
12100 snapshot.graph_count += 1;
12101 }
12102 Err(e) => {
12103 warn!("Failed to export Neo4j graph: {}", e);
12104 }
12105 }
12106 }
12107 datasynth_config::schema::GraphExportFormat::Dgl => {
12108 use datasynth_graph::{DGLExportConfig, DGLExporter};
12109
12110 let dgl_config = DGLExportConfig {
12111 common: datasynth_graph::CommonExportConfig {
12112 export_node_features: true,
12113 export_edge_features: true,
12114 export_node_labels: true,
12115 export_edge_labels: true,
12116 export_masks: true,
12117 train_ratio: self.config.graph_export.train_ratio,
12118 val_ratio: self.config.graph_export.validation_ratio,
12119 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12120 },
12121 heterogeneous: self.config.graph_export.dgl.heterogeneous,
12122 include_pickle_script: true, };
12124
12125 let exporter = DGLExporter::new(dgl_config);
12126 match exporter.export(&graph, &format_dir) {
12127 Ok(metadata) => {
12128 snapshot.exports.insert(
12129 format!("{}_{}", graph_type.name, "dgl"),
12130 GraphExportInfo {
12131 name: graph_type.name.clone(),
12132 format: "dgl".to_string(),
12133 output_path: format_dir.clone(),
12134 node_count: metadata.common.num_nodes,
12135 edge_count: metadata.common.num_edges,
12136 },
12137 );
12138 snapshot.graph_count += 1;
12139 }
12140 Err(e) => {
12141 warn!("Failed to export DGL graph: {}", e);
12142 }
12143 }
12144 }
12145 datasynth_config::schema::GraphExportFormat::RustGraph => {
12146 use datasynth_graph::{
12147 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12148 };
12149
12150 let rustgraph_config = RustGraphExportConfig {
12151 include_features: true,
12152 include_temporal: true,
12153 include_labels: true,
12154 source_name: "datasynth".to_string(),
12155 batch_id: None,
12156 output_format: RustGraphOutputFormat::JsonLines,
12157 export_node_properties: true,
12158 export_edge_properties: true,
12159 pretty_print: false,
12160 };
12161
12162 let exporter = RustGraphExporter::new(rustgraph_config);
12163 match exporter.export(&graph, &format_dir) {
12164 Ok(metadata) => {
12165 snapshot.exports.insert(
12166 format!("{}_{}", graph_type.name, "rustgraph"),
12167 GraphExportInfo {
12168 name: graph_type.name.clone(),
12169 format: "rustgraph".to_string(),
12170 output_path: format_dir.clone(),
12171 node_count: metadata.num_nodes,
12172 edge_count: metadata.num_edges,
12173 },
12174 );
12175 snapshot.graph_count += 1;
12176 }
12177 Err(e) => {
12178 warn!("Failed to export RustGraph: {}", e);
12179 }
12180 }
12181 }
12182 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12183 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12185 }
12186 }
12187 }
12188
12189 if let Some(pb) = &pb {
12190 pb.inc(40);
12191 }
12192 }
12193
12194 stats.graph_export_count = snapshot.graph_count;
12195 snapshot.exported = snapshot.graph_count > 0;
12196
12197 if let Some(pb) = pb {
12198 pb.finish_with_message(format!(
12199 "Graphs exported: {} graphs ({} nodes, {} edges)",
12200 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12201 ));
12202 }
12203
12204 Ok(snapshot)
12205 }
12206
12207 fn build_additional_graphs(
12212 &self,
12213 banking: &BankingSnapshot,
12214 intercompany: &IntercompanySnapshot,
12215 entries: &[JournalEntry],
12216 stats: &mut EnhancedGenerationStatistics,
12217 ) {
12218 let output_dir = self
12219 .output_path
12220 .clone()
12221 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12222 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12223
12224 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12226 info!("Phase 10c: Building banking network graph");
12227 let config = BankingGraphConfig::default();
12228 let mut builder = BankingGraphBuilder::new(config);
12229 builder.add_customers(&banking.customers);
12230 builder.add_accounts(&banking.accounts, &banking.customers);
12231 builder.add_transactions(&banking.transactions);
12232 let graph = builder.build();
12233
12234 let node_count = graph.node_count();
12235 let edge_count = graph.edge_count();
12236 stats.graph_node_count += node_count;
12237 stats.graph_edge_count += edge_count;
12238
12239 for format in &self.config.graph_export.formats {
12241 if matches!(
12242 format,
12243 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12244 ) {
12245 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12246 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12247 warn!("Failed to create banking graph output dir: {}", e);
12248 continue;
12249 }
12250 let pyg_config = PyGExportConfig::default();
12251 let exporter = PyGExporter::new(pyg_config);
12252 if let Err(e) = exporter.export(&graph, &format_dir) {
12253 warn!("Failed to export banking graph as PyG: {}", e);
12254 } else {
12255 info!(
12256 "Banking network graph exported: {} nodes, {} edges",
12257 node_count, edge_count
12258 );
12259 }
12260 }
12261 }
12262 }
12263
12264 let approval_entries: Vec<_> = entries
12266 .iter()
12267 .filter(|je| je.header.approval_workflow.is_some())
12268 .collect();
12269
12270 if !approval_entries.is_empty() {
12271 info!(
12272 "Phase 10c: Building approval network graph ({} entries with approvals)",
12273 approval_entries.len()
12274 );
12275 let config = ApprovalGraphConfig::default();
12276 let mut builder = ApprovalGraphBuilder::new(config);
12277
12278 for je in &approval_entries {
12279 if let Some(ref wf) = je.header.approval_workflow {
12280 for action in &wf.actions {
12281 let record = datasynth_core::models::ApprovalRecord {
12282 approval_id: format!(
12283 "APR-{}-{}",
12284 je.header.document_id, action.approval_level
12285 ),
12286 document_number: je.header.document_id.to_string(),
12287 document_type: "JE".to_string(),
12288 company_code: je.company_code().to_string(),
12289 requester_id: wf.preparer_id.clone(),
12290 requester_name: Some(wf.preparer_name.clone()),
12291 approver_id: action.actor_id.clone(),
12292 approver_name: action.actor_name.clone(),
12293 approval_date: je.posting_date(),
12294 action: format!("{:?}", action.action),
12295 amount: wf.amount,
12296 approval_limit: None,
12297 comments: action.comments.clone(),
12298 delegation_from: None,
12299 is_auto_approved: false,
12300 };
12301 builder.add_approval(&record);
12302 }
12303 }
12304 }
12305
12306 let graph = builder.build();
12307 let node_count = graph.node_count();
12308 let edge_count = graph.edge_count();
12309 stats.graph_node_count += node_count;
12310 stats.graph_edge_count += edge_count;
12311
12312 for format in &self.config.graph_export.formats {
12314 if matches!(
12315 format,
12316 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12317 ) {
12318 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12319 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12320 warn!("Failed to create approval graph output dir: {}", e);
12321 continue;
12322 }
12323 let pyg_config = PyGExportConfig::default();
12324 let exporter = PyGExporter::new(pyg_config);
12325 if let Err(e) = exporter.export(&graph, &format_dir) {
12326 warn!("Failed to export approval graph as PyG: {}", e);
12327 } else {
12328 info!(
12329 "Approval network graph exported: {} nodes, {} edges",
12330 node_count, edge_count
12331 );
12332 }
12333 }
12334 }
12335 }
12336
12337 if self.config.companies.len() >= 2 {
12339 info!(
12340 "Phase 10c: Building entity relationship graph ({} companies)",
12341 self.config.companies.len()
12342 );
12343
12344 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12345 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12346
12347 let parent_code = &self.config.companies[0].code;
12349 let mut companies: Vec<datasynth_core::models::Company> =
12350 Vec::with_capacity(self.config.companies.len());
12351
12352 let first = &self.config.companies[0];
12354 companies.push(datasynth_core::models::Company::parent(
12355 &first.code,
12356 &first.name,
12357 &first.country,
12358 &first.currency,
12359 ));
12360
12361 for cc in self.config.companies.iter().skip(1) {
12363 companies.push(datasynth_core::models::Company::subsidiary(
12364 &cc.code,
12365 &cc.name,
12366 &cc.country,
12367 &cc.currency,
12368 parent_code,
12369 rust_decimal::Decimal::from(100),
12370 ));
12371 }
12372
12373 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12375 self.config
12376 .companies
12377 .iter()
12378 .skip(1)
12379 .enumerate()
12380 .map(|(i, cc)| {
12381 let mut rel =
12382 datasynth_core::models::intercompany::IntercompanyRelationship::new(
12383 format!("REL{:03}", i + 1),
12384 parent_code.clone(),
12385 cc.code.clone(),
12386 rust_decimal::Decimal::from(100),
12387 start_date,
12388 );
12389 rel.functional_currency = cc.currency.clone();
12390 rel
12391 })
12392 .collect();
12393
12394 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12395 builder.add_companies(&companies);
12396 builder.add_ownership_relationships(&relationships);
12397
12398 for pair in &intercompany.matched_pairs {
12400 builder.add_intercompany_edge(
12401 &pair.seller_company,
12402 &pair.buyer_company,
12403 pair.amount,
12404 &format!("{:?}", pair.transaction_type),
12405 );
12406 }
12407
12408 let graph = builder.build();
12409 let node_count = graph.node_count();
12410 let edge_count = graph.edge_count();
12411 stats.graph_node_count += node_count;
12412 stats.graph_edge_count += edge_count;
12413
12414 for format in &self.config.graph_export.formats {
12416 if matches!(
12417 format,
12418 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12419 ) {
12420 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12421 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12422 warn!("Failed to create entity graph output dir: {}", e);
12423 continue;
12424 }
12425 let pyg_config = PyGExportConfig::default();
12426 let exporter = PyGExporter::new(pyg_config);
12427 if let Err(e) = exporter.export(&graph, &format_dir) {
12428 warn!("Failed to export entity graph as PyG: {}", e);
12429 } else {
12430 info!(
12431 "Entity relationship graph exported: {} nodes, {} edges",
12432 node_count, edge_count
12433 );
12434 }
12435 }
12436 }
12437 } else {
12438 debug!(
12439 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12440 self.config.companies.len()
12441 );
12442 }
12443 }
12444
12445 #[allow(clippy::too_many_arguments)]
12452 fn export_hypergraph(
12453 &self,
12454 coa: &Arc<ChartOfAccounts>,
12455 entries: &[JournalEntry],
12456 document_flows: &DocumentFlowSnapshot,
12457 sourcing: &SourcingSnapshot,
12458 hr: &HrSnapshot,
12459 manufacturing: &ManufacturingSnapshot,
12460 banking: &BankingSnapshot,
12461 audit: &AuditSnapshot,
12462 financial_reporting: &FinancialReportingSnapshot,
12463 ocpm: &OcpmSnapshot,
12464 compliance: &ComplianceRegulationsSnapshot,
12465 stats: &mut EnhancedGenerationStatistics,
12466 ) -> SynthResult<HypergraphExportInfo> {
12467 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12468 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12469 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12470 use datasynth_graph::models::hypergraph::AggregationStrategy;
12471
12472 let hg_settings = &self.config.graph_export.hypergraph;
12473
12474 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12476 "truncate" => AggregationStrategy::Truncate,
12477 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12478 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12479 "importance_sample" => AggregationStrategy::ImportanceSample,
12480 _ => AggregationStrategy::PoolByCounterparty,
12481 };
12482
12483 let builder_config = HypergraphConfig {
12484 max_nodes: hg_settings.max_nodes,
12485 aggregation_strategy,
12486 include_coso: hg_settings.governance_layer.include_coso,
12487 include_controls: hg_settings.governance_layer.include_controls,
12488 include_sox: hg_settings.governance_layer.include_sox,
12489 include_vendors: hg_settings.governance_layer.include_vendors,
12490 include_customers: hg_settings.governance_layer.include_customers,
12491 include_employees: hg_settings.governance_layer.include_employees,
12492 include_p2p: hg_settings.process_layer.include_p2p,
12493 include_o2c: hg_settings.process_layer.include_o2c,
12494 include_s2c: hg_settings.process_layer.include_s2c,
12495 include_h2r: hg_settings.process_layer.include_h2r,
12496 include_mfg: hg_settings.process_layer.include_mfg,
12497 include_bank: hg_settings.process_layer.include_bank,
12498 include_audit: hg_settings.process_layer.include_audit,
12499 include_r2r: hg_settings.process_layer.include_r2r,
12500 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12501 docs_per_counterparty_threshold: hg_settings
12502 .process_layer
12503 .docs_per_counterparty_threshold,
12504 include_accounts: hg_settings.accounting_layer.include_accounts,
12505 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12506 include_cross_layer_edges: hg_settings.cross_layer.enabled,
12507 include_compliance: self.config.compliance_regulations.enabled,
12508 include_tax: true,
12509 include_treasury: true,
12510 include_esg: true,
12511 include_project: true,
12512 include_intercompany: true,
12513 include_temporal_events: true,
12514 };
12515
12516 let mut builder = HypergraphBuilder::new(builder_config);
12517
12518 builder.add_coso_framework();
12520
12521 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12524 let controls = InternalControl::standard_controls();
12525 builder.add_controls(&controls);
12526 }
12527
12528 builder.add_vendors(&self.master_data.vendors);
12530 builder.add_customers(&self.master_data.customers);
12531 builder.add_employees(&self.master_data.employees);
12532
12533 builder.add_p2p_documents(
12535 &document_flows.purchase_orders,
12536 &document_flows.goods_receipts,
12537 &document_flows.vendor_invoices,
12538 &document_flows.payments,
12539 );
12540 builder.add_o2c_documents(
12541 &document_flows.sales_orders,
12542 &document_flows.deliveries,
12543 &document_flows.customer_invoices,
12544 );
12545 builder.add_s2c_documents(
12546 &sourcing.sourcing_projects,
12547 &sourcing.qualifications,
12548 &sourcing.rfx_events,
12549 &sourcing.bids,
12550 &sourcing.bid_evaluations,
12551 &sourcing.contracts,
12552 );
12553 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12554 builder.add_mfg_documents(
12555 &manufacturing.production_orders,
12556 &manufacturing.quality_inspections,
12557 &manufacturing.cycle_counts,
12558 );
12559 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12560 builder.add_audit_documents(
12561 &audit.engagements,
12562 &audit.workpapers,
12563 &audit.findings,
12564 &audit.evidence,
12565 &audit.risk_assessments,
12566 &audit.judgments,
12567 &audit.materiality_calculations,
12568 &audit.audit_opinions,
12569 &audit.going_concern_assessments,
12570 );
12571 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12572
12573 if let Some(ref event_log) = ocpm.event_log {
12575 builder.add_ocpm_events(event_log);
12576 }
12577
12578 if self.config.compliance_regulations.enabled
12580 && hg_settings.governance_layer.include_controls
12581 {
12582 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12584 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12585 .standard_records
12586 .iter()
12587 .filter_map(|r| {
12588 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12589 registry.get(&sid).cloned()
12590 })
12591 .collect();
12592
12593 builder.add_compliance_regulations(
12594 &standards,
12595 &compliance.findings,
12596 &compliance.filings,
12597 );
12598 }
12599
12600 builder.add_accounts(coa);
12602 builder.add_journal_entries_as_hyperedges(entries);
12603
12604 let hypergraph = builder.build();
12606
12607 let output_dir = self
12609 .output_path
12610 .clone()
12611 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12612 let hg_dir = output_dir
12613 .join(&self.config.graph_export.output_subdirectory)
12614 .join(&hg_settings.output_subdirectory);
12615
12616 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12618 "unified" => {
12619 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12620 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12621 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12622 })?;
12623 (
12624 metadata.num_nodes,
12625 metadata.num_edges,
12626 metadata.num_hyperedges,
12627 )
12628 }
12629 _ => {
12630 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12632 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12633 SynthError::generation(format!("Hypergraph export failed: {e}"))
12634 })?;
12635 (
12636 metadata.num_nodes,
12637 metadata.num_edges,
12638 metadata.num_hyperedges,
12639 )
12640 }
12641 };
12642
12643 #[cfg(feature = "streaming")]
12645 if let Some(ref target_url) = hg_settings.stream_target {
12646 use crate::stream_client::{StreamClient, StreamConfig};
12647 use std::io::Write as _;
12648
12649 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12650 let stream_config = StreamConfig {
12651 target_url: target_url.clone(),
12652 batch_size: hg_settings.stream_batch_size,
12653 api_key,
12654 ..StreamConfig::default()
12655 };
12656
12657 match StreamClient::new(stream_config) {
12658 Ok(mut client) => {
12659 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12660 match exporter.export_to_writer(&hypergraph, &mut client) {
12661 Ok(_) => {
12662 if let Err(e) = client.flush() {
12663 warn!("Failed to flush stream client: {}", e);
12664 } else {
12665 info!("Streamed {} records to {}", client.total_sent(), target_url);
12666 }
12667 }
12668 Err(e) => {
12669 warn!("Streaming export failed: {}", e);
12670 }
12671 }
12672 }
12673 Err(e) => {
12674 warn!("Failed to create stream client: {}", e);
12675 }
12676 }
12677 }
12678
12679 stats.graph_node_count += num_nodes;
12681 stats.graph_edge_count += num_edges;
12682 stats.graph_export_count += 1;
12683
12684 Ok(HypergraphExportInfo {
12685 node_count: num_nodes,
12686 edge_count: num_edges,
12687 hyperedge_count: num_hyperedges,
12688 output_path: hg_dir,
12689 })
12690 }
12691
12692 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12697 let pb = self.create_progress_bar(100, "Generating Banking Data");
12698
12699 let orchestrator = BankingOrchestratorBuilder::new()
12701 .config(self.config.banking.clone())
12702 .seed(self.seed + 9000)
12703 .country_pack(self.primary_pack().clone())
12704 .build();
12705
12706 if let Some(pb) = &pb {
12707 pb.inc(10);
12708 }
12709
12710 let result = orchestrator.generate();
12712
12713 if let Some(pb) = &pb {
12714 pb.inc(90);
12715 pb.finish_with_message(format!(
12716 "Banking: {} customers, {} transactions",
12717 result.customers.len(),
12718 result.transactions.len()
12719 ));
12720 }
12721
12722 let mut banking_customers = result.customers;
12727 let core_customers = &self.master_data.customers;
12728 if !core_customers.is_empty() {
12729 for (i, bc) in banking_customers.iter_mut().enumerate() {
12730 let core = &core_customers[i % core_customers.len()];
12731 bc.name = CustomerName::business(&core.name);
12732 bc.residence_country = core.country.clone();
12733 bc.enterprise_customer_id = Some(core.customer_id.clone());
12734 }
12735 debug!(
12736 "Cross-referenced {} banking customers with {} core customers",
12737 banking_customers.len(),
12738 core_customers.len()
12739 );
12740 }
12741
12742 Ok(BankingSnapshot {
12743 customers: banking_customers,
12744 accounts: result.accounts,
12745 transactions: result.transactions,
12746 transaction_labels: result.transaction_labels,
12747 customer_labels: result.customer_labels,
12748 account_labels: result.account_labels,
12749 relationship_labels: result.relationship_labels,
12750 narratives: result.narratives,
12751 suspicious_count: result.stats.suspicious_count,
12752 scenario_count: result.scenarios.len(),
12753 })
12754 }
12755
12756 fn calculate_total_transactions(&self) -> u64 {
12758 let months = self.config.global.period_months as f64;
12759 self.config
12760 .companies
12761 .iter()
12762 .map(|c| {
12763 let annual = c.annual_transaction_volume.count() as f64;
12764 let weighted = annual * c.volume_weight;
12765 (weighted * months / 12.0) as u64
12766 })
12767 .sum()
12768 }
12769
12770 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12772 if !self.phase_config.show_progress {
12773 return None;
12774 }
12775
12776 let pb = if let Some(mp) = &self.multi_progress {
12777 mp.add(ProgressBar::new(total))
12778 } else {
12779 ProgressBar::new(total)
12780 };
12781
12782 pb.set_style(
12783 ProgressStyle::default_bar()
12784 .template(&format!(
12785 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12786 ))
12787 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12788 .progress_chars("#>-"),
12789 );
12790
12791 Some(pb)
12792 }
12793
12794 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12796 self.coa.clone()
12797 }
12798
12799 pub fn get_master_data(&self) -> &MasterDataSnapshot {
12801 &self.master_data
12802 }
12803
12804 fn phase_compliance_regulations(
12806 &mut self,
12807 _stats: &mut EnhancedGenerationStatistics,
12808 ) -> SynthResult<ComplianceRegulationsSnapshot> {
12809 if !self.phase_config.generate_compliance_regulations {
12810 return Ok(ComplianceRegulationsSnapshot::default());
12811 }
12812
12813 info!("Phase: Generating Compliance Regulations Data");
12814
12815 let cr_config = &self.config.compliance_regulations;
12816
12817 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
12819 self.config
12820 .companies
12821 .iter()
12822 .map(|c| c.country.clone())
12823 .collect::<std::collections::HashSet<_>>()
12824 .into_iter()
12825 .collect()
12826 } else {
12827 cr_config.jurisdictions.clone()
12828 };
12829
12830 let fallback_date =
12832 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
12833 let reference_date = cr_config
12834 .reference_date
12835 .as_ref()
12836 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
12837 .unwrap_or_else(|| {
12838 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12839 .unwrap_or(fallback_date)
12840 });
12841
12842 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
12844 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
12845 let cross_reference_records = reg_gen.generate_cross_reference_records();
12846 let jurisdiction_records =
12847 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
12848
12849 info!(
12850 " Standards: {} records, {} cross-references, {} jurisdictions",
12851 standard_records.len(),
12852 cross_reference_records.len(),
12853 jurisdiction_records.len()
12854 );
12855
12856 let audit_procedures = if cr_config.audit_procedures.enabled {
12858 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
12859 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
12860 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
12861 confidence_level: cr_config.audit_procedures.confidence_level,
12862 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
12863 };
12864 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
12865 self.seed + 9000,
12866 proc_config,
12867 );
12868 let registry = reg_gen.registry();
12869 let mut all_procs = Vec::new();
12870 for jurisdiction in &jurisdictions {
12871 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
12872 all_procs.extend(procs);
12873 }
12874 info!(" Audit procedures: {}", all_procs.len());
12875 all_procs
12876 } else {
12877 Vec::new()
12878 };
12879
12880 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
12882 let finding_config =
12883 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
12884 finding_rate: cr_config.findings.finding_rate,
12885 material_weakness_rate: cr_config.findings.material_weakness_rate,
12886 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
12887 generate_remediation: cr_config.findings.generate_remediation,
12888 };
12889 let mut finding_gen =
12890 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
12891 self.seed + 9100,
12892 finding_config,
12893 );
12894 let mut all_findings = Vec::new();
12895 for company in &self.config.companies {
12896 let company_findings =
12897 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
12898 all_findings.extend(company_findings);
12899 }
12900 info!(" Compliance findings: {}", all_findings.len());
12901 all_findings
12902 } else {
12903 Vec::new()
12904 };
12905
12906 let filings = if cr_config.filings.enabled {
12908 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
12909 filing_types: cr_config.filings.filing_types.clone(),
12910 generate_status_progression: cr_config.filings.generate_status_progression,
12911 };
12912 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
12913 self.seed + 9200,
12914 filing_config,
12915 );
12916 let company_codes: Vec<String> = self
12917 .config
12918 .companies
12919 .iter()
12920 .map(|c| c.code.clone())
12921 .collect();
12922 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12923 .unwrap_or(fallback_date);
12924 let filings = filing_gen.generate_filings(
12925 &company_codes,
12926 &jurisdictions,
12927 start_date,
12928 self.config.global.period_months,
12929 );
12930 info!(" Regulatory filings: {}", filings.len());
12931 filings
12932 } else {
12933 Vec::new()
12934 };
12935
12936 let compliance_graph = if cr_config.graph.enabled {
12938 let graph_config = datasynth_graph::ComplianceGraphConfig {
12939 include_standard_nodes: cr_config.graph.include_compliance_nodes,
12940 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
12941 include_cross_references: cr_config.graph.include_cross_references,
12942 include_supersession_edges: cr_config.graph.include_supersession_edges,
12943 include_account_links: cr_config.graph.include_account_links,
12944 include_control_links: cr_config.graph.include_control_links,
12945 include_company_links: cr_config.graph.include_company_links,
12946 };
12947 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
12948
12949 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
12951 .iter()
12952 .map(|r| datasynth_graph::StandardNodeInput {
12953 standard_id: r.standard_id.clone(),
12954 title: r.title.clone(),
12955 category: r.category.clone(),
12956 domain: r.domain.clone(),
12957 is_active: r.is_active,
12958 features: vec![if r.is_active { 1.0 } else { 0.0 }],
12959 applicable_account_types: r.applicable_account_types.clone(),
12960 applicable_processes: r.applicable_processes.clone(),
12961 })
12962 .collect();
12963 builder.add_standards(&standard_inputs);
12964
12965 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
12967 jurisdiction_records
12968 .iter()
12969 .map(|r| datasynth_graph::JurisdictionNodeInput {
12970 country_code: r.country_code.clone(),
12971 country_name: r.country_name.clone(),
12972 framework: r.accounting_framework.clone(),
12973 standard_count: r.standard_count,
12974 tax_rate: r.statutory_tax_rate,
12975 })
12976 .collect();
12977 builder.add_jurisdictions(&jurisdiction_inputs);
12978
12979 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
12981 cross_reference_records
12982 .iter()
12983 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
12984 from_standard: r.from_standard.clone(),
12985 to_standard: r.to_standard.clone(),
12986 relationship: r.relationship.clone(),
12987 convergence_level: r.convergence_level,
12988 })
12989 .collect();
12990 builder.add_cross_references(&xref_inputs);
12991
12992 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
12994 .iter()
12995 .map(|r| datasynth_graph::JurisdictionMappingInput {
12996 country_code: r.jurisdiction.clone(),
12997 standard_id: r.standard_id.clone(),
12998 })
12999 .collect();
13000 builder.add_jurisdiction_mappings(&mapping_inputs);
13001
13002 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13004 .iter()
13005 .map(|p| datasynth_graph::ProcedureNodeInput {
13006 procedure_id: p.procedure_id.clone(),
13007 standard_id: p.standard_id.clone(),
13008 procedure_type: p.procedure_type.clone(),
13009 sample_size: p.sample_size,
13010 confidence_level: p.confidence_level,
13011 })
13012 .collect();
13013 builder.add_procedures(&proc_inputs);
13014
13015 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13017 .iter()
13018 .map(|f| datasynth_graph::FindingNodeInput {
13019 finding_id: f.finding_id.to_string(),
13020 standard_id: f
13021 .related_standards
13022 .first()
13023 .map(|s| s.as_str().to_string())
13024 .unwrap_or_default(),
13025 severity: f.severity.to_string(),
13026 deficiency_level: f.deficiency_level.to_string(),
13027 severity_score: f.deficiency_level.severity_score(),
13028 control_id: f.control_id.clone(),
13029 affected_accounts: f.affected_accounts.clone(),
13030 })
13031 .collect();
13032 builder.add_findings(&finding_inputs);
13033
13034 if cr_config.graph.include_account_links {
13036 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13037 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13038 for std_record in &standard_records {
13039 if let Some(std_obj) =
13040 registry.get(&datasynth_core::models::compliance::StandardId::parse(
13041 &std_record.standard_id,
13042 ))
13043 {
13044 for acct_type in &std_obj.applicable_account_types {
13045 account_links.push(datasynth_graph::AccountLinkInput {
13046 standard_id: std_record.standard_id.clone(),
13047 account_code: acct_type.clone(),
13048 account_name: acct_type.clone(),
13049 });
13050 }
13051 }
13052 }
13053 builder.add_account_links(&account_links);
13054 }
13055
13056 if cr_config.graph.include_control_links {
13058 let mut control_links = Vec::new();
13059 let sox_like_ids: Vec<String> = standard_records
13061 .iter()
13062 .filter(|r| {
13063 r.standard_id.starts_with("SOX")
13064 || r.standard_id.starts_with("PCAOB-AS-2201")
13065 })
13066 .map(|r| r.standard_id.clone())
13067 .collect();
13068 let control_ids = [
13070 ("C001", "Cash Controls"),
13071 ("C002", "Large Transaction Approval"),
13072 ("C010", "PO Approval"),
13073 ("C011", "Three-Way Match"),
13074 ("C020", "Revenue Recognition"),
13075 ("C021", "Credit Check"),
13076 ("C030", "Manual JE Approval"),
13077 ("C031", "Period Close Review"),
13078 ("C032", "Account Reconciliation"),
13079 ("C040", "Payroll Processing"),
13080 ("C050", "Fixed Asset Capitalization"),
13081 ("C060", "Intercompany Elimination"),
13082 ];
13083 for sox_id in &sox_like_ids {
13084 for (ctrl_id, ctrl_name) in &control_ids {
13085 control_links.push(datasynth_graph::ControlLinkInput {
13086 standard_id: sox_id.clone(),
13087 control_id: ctrl_id.to_string(),
13088 control_name: ctrl_name.to_string(),
13089 });
13090 }
13091 }
13092 builder.add_control_links(&control_links);
13093 }
13094
13095 if cr_config.graph.include_company_links {
13097 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13098 .iter()
13099 .enumerate()
13100 .map(|(i, f)| datasynth_graph::FilingNodeInput {
13101 filing_id: format!("F{:04}", i + 1),
13102 filing_type: f.filing_type.to_string(),
13103 company_code: f.company_code.clone(),
13104 jurisdiction: f.jurisdiction.clone(),
13105 status: format!("{:?}", f.status),
13106 })
13107 .collect();
13108 builder.add_filings(&filing_inputs);
13109 }
13110
13111 let graph = builder.build();
13112 info!(
13113 " Compliance graph: {} nodes, {} edges",
13114 graph.nodes.len(),
13115 graph.edges.len()
13116 );
13117 Some(graph)
13118 } else {
13119 None
13120 };
13121
13122 self.check_resources_with_log("post-compliance-regulations")?;
13123
13124 Ok(ComplianceRegulationsSnapshot {
13125 standard_records,
13126 cross_reference_records,
13127 jurisdiction_records,
13128 audit_procedures,
13129 findings,
13130 filings,
13131 compliance_graph,
13132 })
13133 }
13134
13135 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13137 use super::lineage::LineageGraphBuilder;
13138
13139 let mut builder = LineageGraphBuilder::new();
13140
13141 builder.add_config_section("config:global", "Global Config");
13143 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13144 builder.add_config_section("config:transactions", "Transaction Config");
13145
13146 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13148 builder.add_generator_phase("phase:je", "Journal Entry Generation");
13149
13150 builder.configured_by("phase:coa", "config:chart_of_accounts");
13152 builder.configured_by("phase:je", "config:transactions");
13153
13154 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13156 builder.produced_by("output:je", "phase:je");
13157
13158 if self.phase_config.generate_master_data {
13160 builder.add_config_section("config:master_data", "Master Data Config");
13161 builder.add_generator_phase("phase:master_data", "Master Data Generation");
13162 builder.configured_by("phase:master_data", "config:master_data");
13163 builder.input_to("phase:master_data", "phase:je");
13164 }
13165
13166 if self.phase_config.generate_document_flows {
13167 builder.add_config_section("config:document_flows", "Document Flow Config");
13168 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13169 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13170 builder.configured_by("phase:p2p", "config:document_flows");
13171 builder.configured_by("phase:o2c", "config:document_flows");
13172
13173 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13174 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13175 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13176 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13177 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13178
13179 builder.produced_by("output:po", "phase:p2p");
13180 builder.produced_by("output:gr", "phase:p2p");
13181 builder.produced_by("output:vi", "phase:p2p");
13182 builder.produced_by("output:so", "phase:o2c");
13183 builder.produced_by("output:ci", "phase:o2c");
13184 }
13185
13186 if self.phase_config.inject_anomalies {
13187 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13188 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13189 builder.configured_by("phase:anomaly", "config:fraud");
13190 builder.add_output_file(
13191 "output:labels",
13192 "Anomaly Labels",
13193 "labels/anomaly_labels.csv",
13194 );
13195 builder.produced_by("output:labels", "phase:anomaly");
13196 }
13197
13198 if self.phase_config.generate_audit {
13199 builder.add_config_section("config:audit", "Audit Config");
13200 builder.add_generator_phase("phase:audit", "Audit Data Generation");
13201 builder.configured_by("phase:audit", "config:audit");
13202 }
13203
13204 if self.phase_config.generate_banking {
13205 builder.add_config_section("config:banking", "Banking Config");
13206 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13207 builder.configured_by("phase:banking", "config:banking");
13208 }
13209
13210 if self.config.llm.enabled {
13211 builder.add_config_section("config:llm", "LLM Enrichment Config");
13212 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13213 builder.configured_by("phase:llm_enrichment", "config:llm");
13214 }
13215
13216 if self.config.diffusion.enabled {
13217 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13218 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13219 builder.configured_by("phase:diffusion", "config:diffusion");
13220 }
13221
13222 if self.config.causal.enabled {
13223 builder.add_config_section("config:causal", "Causal Generation Config");
13224 builder.add_generator_phase("phase:causal", "Causal Overlay");
13225 builder.configured_by("phase:causal", "config:causal");
13226 }
13227
13228 builder.build()
13229 }
13230
13231 fn compute_company_revenue(
13240 entries: &[JournalEntry],
13241 company_code: &str,
13242 ) -> rust_decimal::Decimal {
13243 use rust_decimal::Decimal;
13244 let mut revenue = Decimal::ZERO;
13245 for je in entries {
13246 if je.header.company_code != company_code {
13247 continue;
13248 }
13249 for line in &je.lines {
13250 if line.gl_account.starts_with('4') {
13251 revenue += line.credit_amount - line.debit_amount;
13253 }
13254 }
13255 }
13256 revenue.max(Decimal::ZERO)
13257 }
13258
13259 fn compute_entity_net_assets(
13263 entries: &[JournalEntry],
13264 entity_code: &str,
13265 ) -> rust_decimal::Decimal {
13266 use rust_decimal::Decimal;
13267 let mut asset_net = Decimal::ZERO;
13268 let mut liability_net = Decimal::ZERO;
13269 for je in entries {
13270 if je.header.company_code != entity_code {
13271 continue;
13272 }
13273 for line in &je.lines {
13274 if line.gl_account.starts_with('1') {
13275 asset_net += line.debit_amount - line.credit_amount;
13276 } else if line.gl_account.starts_with('2') {
13277 liability_net += line.credit_amount - line.debit_amount;
13278 }
13279 }
13280 }
13281 asset_net - liability_net
13282 }
13283}
13284
13285fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13287 match format {
13288 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13289 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13290 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13291 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13292 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13293 }
13294}
13295
13296fn compute_trial_balance_entries(
13301 entries: &[JournalEntry],
13302 entity_code: &str,
13303 fiscal_year: i32,
13304 coa: Option<&ChartOfAccounts>,
13305) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13306 use std::collections::BTreeMap;
13307
13308 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13309 BTreeMap::new();
13310
13311 for je in entries {
13312 for line in &je.lines {
13313 let entry = balances.entry(line.account_code.clone()).or_default();
13314 entry.0 += line.debit_amount;
13315 entry.1 += line.credit_amount;
13316 }
13317 }
13318
13319 balances
13320 .into_iter()
13321 .map(
13322 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13323 account_description: coa
13324 .and_then(|c| c.get_account(&account_code))
13325 .map(|a| a.description().to_string())
13326 .unwrap_or_else(|| account_code.clone()),
13327 account_code,
13328 debit_balance: debit,
13329 credit_balance: credit,
13330 net_balance: debit - credit,
13331 entity_code: entity_code.to_string(),
13332 period: format!("FY{}", fiscal_year),
13333 },
13334 )
13335 .collect()
13336}
13337
13338#[cfg(test)]
13339#[allow(clippy::unwrap_used)]
13340mod tests {
13341 use super::*;
13342 use datasynth_config::schema::*;
13343
13344 fn create_test_config() -> GeneratorConfig {
13345 GeneratorConfig {
13346 global: GlobalConfig {
13347 industry: IndustrySector::Manufacturing,
13348 start_date: "2024-01-01".to_string(),
13349 period_months: 1,
13350 seed: Some(42),
13351 parallel: false,
13352 group_currency: "USD".to_string(),
13353 presentation_currency: None,
13354 worker_threads: 0,
13355 memory_limit_mb: 0,
13356 fiscal_year_months: None,
13357 },
13358 companies: vec![CompanyConfig {
13359 code: "1000".to_string(),
13360 name: "Test Company".to_string(),
13361 currency: "USD".to_string(),
13362 functional_currency: None,
13363 country: "US".to_string(),
13364 annual_transaction_volume: TransactionVolume::TenK,
13365 volume_weight: 1.0,
13366 fiscal_year_variant: "K4".to_string(),
13367 }],
13368 chart_of_accounts: ChartOfAccountsConfig {
13369 complexity: CoAComplexity::Small,
13370 industry_specific: true,
13371 custom_accounts: None,
13372 min_hierarchy_depth: 2,
13373 max_hierarchy_depth: 4,
13374 },
13375 transactions: TransactionConfig::default(),
13376 output: OutputConfig::default(),
13377 fraud: FraudConfig::default(),
13378 internal_controls: InternalControlsConfig::default(),
13379 business_processes: BusinessProcessConfig::default(),
13380 user_personas: UserPersonaConfig::default(),
13381 templates: TemplateConfig::default(),
13382 approval: ApprovalConfig::default(),
13383 departments: DepartmentConfig::default(),
13384 master_data: MasterDataConfig::default(),
13385 document_flows: DocumentFlowConfig::default(),
13386 intercompany: IntercompanyConfig::default(),
13387 balance: BalanceConfig::default(),
13388 ocpm: OcpmConfig::default(),
13389 audit: AuditGenerationConfig::default(),
13390 banking: datasynth_banking::BankingConfig::default(),
13391 data_quality: DataQualitySchemaConfig::default(),
13392 scenario: ScenarioConfig::default(),
13393 temporal: TemporalDriftConfig::default(),
13394 graph_export: GraphExportConfig::default(),
13395 streaming: StreamingSchemaConfig::default(),
13396 rate_limit: RateLimitSchemaConfig::default(),
13397 temporal_attributes: TemporalAttributeSchemaConfig::default(),
13398 relationships: RelationshipSchemaConfig::default(),
13399 accounting_standards: AccountingStandardsConfig::default(),
13400 audit_standards: AuditStandardsConfig::default(),
13401 distributions: Default::default(),
13402 temporal_patterns: Default::default(),
13403 vendor_network: VendorNetworkSchemaConfig::default(),
13404 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13405 relationship_strength: RelationshipStrengthSchemaConfig::default(),
13406 cross_process_links: CrossProcessLinksSchemaConfig::default(),
13407 organizational_events: OrganizationalEventsSchemaConfig::default(),
13408 behavioral_drift: BehavioralDriftSchemaConfig::default(),
13409 market_drift: MarketDriftSchemaConfig::default(),
13410 drift_labeling: DriftLabelingSchemaConfig::default(),
13411 anomaly_injection: Default::default(),
13412 industry_specific: Default::default(),
13413 fingerprint_privacy: Default::default(),
13414 quality_gates: Default::default(),
13415 compliance: Default::default(),
13416 webhooks: Default::default(),
13417 llm: Default::default(),
13418 diffusion: Default::default(),
13419 causal: Default::default(),
13420 source_to_pay: Default::default(),
13421 financial_reporting: Default::default(),
13422 hr: Default::default(),
13423 manufacturing: Default::default(),
13424 sales_quotes: Default::default(),
13425 tax: Default::default(),
13426 treasury: Default::default(),
13427 project_accounting: Default::default(),
13428 esg: Default::default(),
13429 country_packs: None,
13430 scenarios: Default::default(),
13431 session: Default::default(),
13432 compliance_regulations: Default::default(),
13433 }
13434 }
13435
13436 #[test]
13437 fn test_enhanced_orchestrator_creation() {
13438 let config = create_test_config();
13439 let orchestrator = EnhancedOrchestrator::with_defaults(config);
13440 assert!(orchestrator.is_ok());
13441 }
13442
13443 #[test]
13444 fn test_minimal_generation() {
13445 let config = create_test_config();
13446 let phase_config = PhaseConfig {
13447 generate_master_data: false,
13448 generate_document_flows: false,
13449 generate_journal_entries: true,
13450 inject_anomalies: false,
13451 show_progress: false,
13452 ..Default::default()
13453 };
13454
13455 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13456 let result = orchestrator.generate();
13457
13458 assert!(result.is_ok());
13459 let result = result.unwrap();
13460 assert!(!result.journal_entries.is_empty());
13461 }
13462
13463 #[test]
13464 fn test_master_data_generation() {
13465 let config = create_test_config();
13466 let phase_config = PhaseConfig {
13467 generate_master_data: true,
13468 generate_document_flows: false,
13469 generate_journal_entries: false,
13470 inject_anomalies: false,
13471 show_progress: false,
13472 vendors_per_company: 5,
13473 customers_per_company: 5,
13474 materials_per_company: 10,
13475 assets_per_company: 5,
13476 employees_per_company: 10,
13477 ..Default::default()
13478 };
13479
13480 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13481 let result = orchestrator.generate().unwrap();
13482
13483 assert!(!result.master_data.vendors.is_empty());
13484 assert!(!result.master_data.customers.is_empty());
13485 assert!(!result.master_data.materials.is_empty());
13486 }
13487
13488 #[test]
13489 fn test_document_flow_generation() {
13490 let config = create_test_config();
13491 let phase_config = PhaseConfig {
13492 generate_master_data: true,
13493 generate_document_flows: true,
13494 generate_journal_entries: false,
13495 inject_anomalies: false,
13496 inject_data_quality: false,
13497 validate_balances: false,
13498 generate_ocpm_events: false,
13499 show_progress: false,
13500 vendors_per_company: 5,
13501 customers_per_company: 5,
13502 materials_per_company: 10,
13503 assets_per_company: 5,
13504 employees_per_company: 10,
13505 p2p_chains: 5,
13506 o2c_chains: 5,
13507 ..Default::default()
13508 };
13509
13510 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13511 let result = orchestrator.generate().unwrap();
13512
13513 assert!(!result.document_flows.p2p_chains.is_empty());
13515 assert!(!result.document_flows.o2c_chains.is_empty());
13516
13517 assert!(!result.document_flows.purchase_orders.is_empty());
13519 assert!(!result.document_flows.sales_orders.is_empty());
13520 }
13521
13522 #[test]
13523 fn test_anomaly_injection() {
13524 let config = create_test_config();
13525 let phase_config = PhaseConfig {
13526 generate_master_data: false,
13527 generate_document_flows: false,
13528 generate_journal_entries: true,
13529 inject_anomalies: true,
13530 show_progress: false,
13531 ..Default::default()
13532 };
13533
13534 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13535 let result = orchestrator.generate().unwrap();
13536
13537 assert!(!result.journal_entries.is_empty());
13539
13540 assert!(result.anomaly_labels.summary.is_some());
13543 }
13544
13545 #[test]
13546 fn test_full_generation_pipeline() {
13547 let config = create_test_config();
13548 let phase_config = PhaseConfig {
13549 generate_master_data: true,
13550 generate_document_flows: true,
13551 generate_journal_entries: true,
13552 inject_anomalies: false,
13553 inject_data_quality: false,
13554 validate_balances: true,
13555 generate_ocpm_events: false,
13556 show_progress: false,
13557 vendors_per_company: 3,
13558 customers_per_company: 3,
13559 materials_per_company: 5,
13560 assets_per_company: 3,
13561 employees_per_company: 5,
13562 p2p_chains: 3,
13563 o2c_chains: 3,
13564 ..Default::default()
13565 };
13566
13567 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13568 let result = orchestrator.generate().unwrap();
13569
13570 assert!(!result.master_data.vendors.is_empty());
13572 assert!(!result.master_data.customers.is_empty());
13573 assert!(!result.document_flows.p2p_chains.is_empty());
13574 assert!(!result.document_flows.o2c_chains.is_empty());
13575 assert!(!result.journal_entries.is_empty());
13576 assert!(result.statistics.accounts_count > 0);
13577
13578 assert!(!result.subledger.ap_invoices.is_empty());
13580 assert!(!result.subledger.ar_invoices.is_empty());
13581
13582 assert!(result.balance_validation.validated);
13584 assert!(result.balance_validation.entries_processed > 0);
13585 }
13586
13587 #[test]
13588 fn test_subledger_linking() {
13589 let config = create_test_config();
13590 let phase_config = PhaseConfig {
13591 generate_master_data: true,
13592 generate_document_flows: true,
13593 generate_journal_entries: false,
13594 inject_anomalies: false,
13595 inject_data_quality: false,
13596 validate_balances: false,
13597 generate_ocpm_events: false,
13598 show_progress: false,
13599 vendors_per_company: 5,
13600 customers_per_company: 5,
13601 materials_per_company: 10,
13602 assets_per_company: 3,
13603 employees_per_company: 5,
13604 p2p_chains: 5,
13605 o2c_chains: 5,
13606 ..Default::default()
13607 };
13608
13609 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13610 let result = orchestrator.generate().unwrap();
13611
13612 assert!(!result.document_flows.vendor_invoices.is_empty());
13614 assert!(!result.document_flows.customer_invoices.is_empty());
13615
13616 assert!(!result.subledger.ap_invoices.is_empty());
13618 assert!(!result.subledger.ar_invoices.is_empty());
13619
13620 assert_eq!(
13622 result.subledger.ap_invoices.len(),
13623 result.document_flows.vendor_invoices.len()
13624 );
13625
13626 assert_eq!(
13628 result.subledger.ar_invoices.len(),
13629 result.document_flows.customer_invoices.len()
13630 );
13631
13632 assert_eq!(
13634 result.statistics.ap_invoice_count,
13635 result.subledger.ap_invoices.len()
13636 );
13637 assert_eq!(
13638 result.statistics.ar_invoice_count,
13639 result.subledger.ar_invoices.len()
13640 );
13641 }
13642
13643 #[test]
13644 fn test_balance_validation() {
13645 let config = create_test_config();
13646 let phase_config = PhaseConfig {
13647 generate_master_data: false,
13648 generate_document_flows: false,
13649 generate_journal_entries: true,
13650 inject_anomalies: false,
13651 validate_balances: true,
13652 show_progress: false,
13653 ..Default::default()
13654 };
13655
13656 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13657 let result = orchestrator.generate().unwrap();
13658
13659 assert!(result.balance_validation.validated);
13661 assert!(result.balance_validation.entries_processed > 0);
13662
13663 assert!(!result.balance_validation.has_unbalanced_entries);
13665
13666 assert_eq!(
13668 result.balance_validation.total_debits,
13669 result.balance_validation.total_credits
13670 );
13671 }
13672
13673 #[test]
13674 fn test_statistics_accuracy() {
13675 let config = create_test_config();
13676 let phase_config = PhaseConfig {
13677 generate_master_data: true,
13678 generate_document_flows: false,
13679 generate_journal_entries: true,
13680 inject_anomalies: false,
13681 show_progress: false,
13682 vendors_per_company: 10,
13683 customers_per_company: 20,
13684 materials_per_company: 15,
13685 assets_per_company: 5,
13686 employees_per_company: 8,
13687 ..Default::default()
13688 };
13689
13690 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13691 let result = orchestrator.generate().unwrap();
13692
13693 assert_eq!(
13695 result.statistics.vendor_count,
13696 result.master_data.vendors.len()
13697 );
13698 assert_eq!(
13699 result.statistics.customer_count,
13700 result.master_data.customers.len()
13701 );
13702 assert_eq!(
13703 result.statistics.material_count,
13704 result.master_data.materials.len()
13705 );
13706 assert_eq!(
13707 result.statistics.total_entries as usize,
13708 result.journal_entries.len()
13709 );
13710 }
13711
13712 #[test]
13713 fn test_phase_config_defaults() {
13714 let config = PhaseConfig::default();
13715 assert!(config.generate_master_data);
13716 assert!(config.generate_document_flows);
13717 assert!(config.generate_journal_entries);
13718 assert!(!config.inject_anomalies);
13719 assert!(config.validate_balances);
13720 assert!(config.show_progress);
13721 assert!(config.vendors_per_company > 0);
13722 assert!(config.customers_per_company > 0);
13723 }
13724
13725 #[test]
13726 fn test_get_coa_before_generation() {
13727 let config = create_test_config();
13728 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13729
13730 assert!(orchestrator.get_coa().is_none());
13732 }
13733
13734 #[test]
13735 fn test_get_coa_after_generation() {
13736 let config = create_test_config();
13737 let phase_config = PhaseConfig {
13738 generate_master_data: false,
13739 generate_document_flows: false,
13740 generate_journal_entries: true,
13741 inject_anomalies: false,
13742 show_progress: false,
13743 ..Default::default()
13744 };
13745
13746 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13747 let _ = orchestrator.generate().unwrap();
13748
13749 assert!(orchestrator.get_coa().is_some());
13751 }
13752
13753 #[test]
13754 fn test_get_master_data() {
13755 let config = create_test_config();
13756 let phase_config = PhaseConfig {
13757 generate_master_data: true,
13758 generate_document_flows: false,
13759 generate_journal_entries: false,
13760 inject_anomalies: false,
13761 show_progress: false,
13762 vendors_per_company: 5,
13763 customers_per_company: 5,
13764 materials_per_company: 5,
13765 assets_per_company: 5,
13766 employees_per_company: 5,
13767 ..Default::default()
13768 };
13769
13770 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13771 let result = orchestrator.generate().unwrap();
13772
13773 assert!(!result.master_data.vendors.is_empty());
13775 }
13776
13777 #[test]
13778 fn test_with_progress_builder() {
13779 let config = create_test_config();
13780 let orchestrator = EnhancedOrchestrator::with_defaults(config)
13781 .unwrap()
13782 .with_progress(false);
13783
13784 assert!(!orchestrator.phase_config.show_progress);
13786 }
13787
13788 #[test]
13789 fn test_multi_company_generation() {
13790 let mut config = create_test_config();
13791 config.companies.push(CompanyConfig {
13792 code: "2000".to_string(),
13793 name: "Subsidiary".to_string(),
13794 currency: "EUR".to_string(),
13795 functional_currency: None,
13796 country: "DE".to_string(),
13797 annual_transaction_volume: TransactionVolume::TenK,
13798 volume_weight: 0.5,
13799 fiscal_year_variant: "K4".to_string(),
13800 });
13801
13802 let phase_config = PhaseConfig {
13803 generate_master_data: true,
13804 generate_document_flows: false,
13805 generate_journal_entries: true,
13806 inject_anomalies: false,
13807 show_progress: false,
13808 vendors_per_company: 5,
13809 customers_per_company: 5,
13810 materials_per_company: 5,
13811 assets_per_company: 5,
13812 employees_per_company: 5,
13813 ..Default::default()
13814 };
13815
13816 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13817 let result = orchestrator.generate().unwrap();
13818
13819 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
13822 assert!(result.statistics.companies_count == 2);
13823 }
13824
13825 #[test]
13826 fn test_empty_master_data_skips_document_flows() {
13827 let config = create_test_config();
13828 let phase_config = PhaseConfig {
13829 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
13832 inject_anomalies: false,
13833 show_progress: false,
13834 ..Default::default()
13835 };
13836
13837 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13838 let result = orchestrator.generate().unwrap();
13839
13840 assert!(result.document_flows.p2p_chains.is_empty());
13842 assert!(result.document_flows.o2c_chains.is_empty());
13843 }
13844
13845 #[test]
13846 fn test_journal_entry_line_item_count() {
13847 let config = create_test_config();
13848 let phase_config = PhaseConfig {
13849 generate_master_data: false,
13850 generate_document_flows: false,
13851 generate_journal_entries: true,
13852 inject_anomalies: false,
13853 show_progress: false,
13854 ..Default::default()
13855 };
13856
13857 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13858 let result = orchestrator.generate().unwrap();
13859
13860 let calculated_line_items: u64 = result
13862 .journal_entries
13863 .iter()
13864 .map(|e| e.line_count() as u64)
13865 .sum();
13866 assert_eq!(result.statistics.total_line_items, calculated_line_items);
13867 }
13868
13869 #[test]
13870 fn test_audit_generation() {
13871 let config = create_test_config();
13872 let phase_config = PhaseConfig {
13873 generate_master_data: false,
13874 generate_document_flows: false,
13875 generate_journal_entries: true,
13876 inject_anomalies: false,
13877 show_progress: false,
13878 generate_audit: true,
13879 audit_engagements: 2,
13880 workpapers_per_engagement: 5,
13881 evidence_per_workpaper: 2,
13882 risks_per_engagement: 3,
13883 findings_per_engagement: 2,
13884 judgments_per_engagement: 2,
13885 ..Default::default()
13886 };
13887
13888 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13889 let result = orchestrator.generate().unwrap();
13890
13891 assert_eq!(result.audit.engagements.len(), 2);
13893 assert!(!result.audit.workpapers.is_empty());
13894 assert!(!result.audit.evidence.is_empty());
13895 assert!(!result.audit.risk_assessments.is_empty());
13896 assert!(!result.audit.findings.is_empty());
13897 assert!(!result.audit.judgments.is_empty());
13898
13899 assert!(
13901 !result.audit.confirmations.is_empty(),
13902 "ISA 505 confirmations should be generated"
13903 );
13904 assert!(
13905 !result.audit.confirmation_responses.is_empty(),
13906 "ISA 505 confirmation responses should be generated"
13907 );
13908 assert!(
13909 !result.audit.procedure_steps.is_empty(),
13910 "ISA 330 procedure steps should be generated"
13911 );
13912 assert!(
13914 !result.audit.analytical_results.is_empty(),
13915 "ISA 520 analytical procedures should be generated"
13916 );
13917 assert!(
13918 !result.audit.ia_functions.is_empty(),
13919 "ISA 610 IA functions should be generated (one per engagement)"
13920 );
13921 assert!(
13922 !result.audit.related_parties.is_empty(),
13923 "ISA 550 related parties should be generated"
13924 );
13925
13926 assert_eq!(
13928 result.statistics.audit_engagement_count,
13929 result.audit.engagements.len()
13930 );
13931 assert_eq!(
13932 result.statistics.audit_workpaper_count,
13933 result.audit.workpapers.len()
13934 );
13935 assert_eq!(
13936 result.statistics.audit_evidence_count,
13937 result.audit.evidence.len()
13938 );
13939 assert_eq!(
13940 result.statistics.audit_risk_count,
13941 result.audit.risk_assessments.len()
13942 );
13943 assert_eq!(
13944 result.statistics.audit_finding_count,
13945 result.audit.findings.len()
13946 );
13947 assert_eq!(
13948 result.statistics.audit_judgment_count,
13949 result.audit.judgments.len()
13950 );
13951 assert_eq!(
13952 result.statistics.audit_confirmation_count,
13953 result.audit.confirmations.len()
13954 );
13955 assert_eq!(
13956 result.statistics.audit_confirmation_response_count,
13957 result.audit.confirmation_responses.len()
13958 );
13959 assert_eq!(
13960 result.statistics.audit_procedure_step_count,
13961 result.audit.procedure_steps.len()
13962 );
13963 assert_eq!(
13964 result.statistics.audit_sample_count,
13965 result.audit.samples.len()
13966 );
13967 assert_eq!(
13968 result.statistics.audit_analytical_result_count,
13969 result.audit.analytical_results.len()
13970 );
13971 assert_eq!(
13972 result.statistics.audit_ia_function_count,
13973 result.audit.ia_functions.len()
13974 );
13975 assert_eq!(
13976 result.statistics.audit_ia_report_count,
13977 result.audit.ia_reports.len()
13978 );
13979 assert_eq!(
13980 result.statistics.audit_related_party_count,
13981 result.audit.related_parties.len()
13982 );
13983 assert_eq!(
13984 result.statistics.audit_related_party_transaction_count,
13985 result.audit.related_party_transactions.len()
13986 );
13987 }
13988
13989 #[test]
13990 fn test_new_phases_disabled_by_default() {
13991 let config = create_test_config();
13992 assert!(!config.llm.enabled);
13994 assert!(!config.diffusion.enabled);
13995 assert!(!config.causal.enabled);
13996
13997 let phase_config = PhaseConfig {
13998 generate_master_data: false,
13999 generate_document_flows: false,
14000 generate_journal_entries: true,
14001 inject_anomalies: false,
14002 show_progress: false,
14003 ..Default::default()
14004 };
14005
14006 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14007 let result = orchestrator.generate().unwrap();
14008
14009 assert_eq!(result.statistics.llm_enrichment_ms, 0);
14011 assert_eq!(result.statistics.llm_vendors_enriched, 0);
14012 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14013 assert_eq!(result.statistics.diffusion_samples_generated, 0);
14014 assert_eq!(result.statistics.causal_generation_ms, 0);
14015 assert_eq!(result.statistics.causal_samples_generated, 0);
14016 assert!(result.statistics.causal_validation_passed.is_none());
14017 assert_eq!(result.statistics.counterfactual_pair_count, 0);
14018 assert!(result.counterfactual_pairs.is_empty());
14019 }
14020
14021 #[test]
14022 fn test_counterfactual_generation_enabled() {
14023 let config = create_test_config();
14024 let phase_config = PhaseConfig {
14025 generate_master_data: false,
14026 generate_document_flows: false,
14027 generate_journal_entries: true,
14028 inject_anomalies: false,
14029 show_progress: false,
14030 generate_counterfactuals: true,
14031 generate_period_close: false, ..Default::default()
14033 };
14034
14035 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14036 let result = orchestrator.generate().unwrap();
14037
14038 if !result.journal_entries.is_empty() {
14040 assert_eq!(
14041 result.counterfactual_pairs.len(),
14042 result.journal_entries.len()
14043 );
14044 assert_eq!(
14045 result.statistics.counterfactual_pair_count,
14046 result.journal_entries.len()
14047 );
14048 let ids: std::collections::HashSet<_> = result
14050 .counterfactual_pairs
14051 .iter()
14052 .map(|p| p.pair_id.clone())
14053 .collect();
14054 assert_eq!(ids.len(), result.counterfactual_pairs.len());
14055 }
14056 }
14057
14058 #[test]
14059 fn test_llm_enrichment_enabled() {
14060 let mut config = create_test_config();
14061 config.llm.enabled = true;
14062 config.llm.max_vendor_enrichments = 3;
14063
14064 let phase_config = PhaseConfig {
14065 generate_master_data: true,
14066 generate_document_flows: false,
14067 generate_journal_entries: false,
14068 inject_anomalies: false,
14069 show_progress: false,
14070 vendors_per_company: 5,
14071 customers_per_company: 3,
14072 materials_per_company: 3,
14073 assets_per_company: 3,
14074 employees_per_company: 3,
14075 ..Default::default()
14076 };
14077
14078 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14079 let result = orchestrator.generate().unwrap();
14080
14081 assert!(result.statistics.llm_vendors_enriched > 0);
14083 assert!(result.statistics.llm_vendors_enriched <= 3);
14084 }
14085
14086 #[test]
14087 fn test_diffusion_enhancement_enabled() {
14088 let mut config = create_test_config();
14089 config.diffusion.enabled = true;
14090 config.diffusion.n_steps = 50;
14091 config.diffusion.sample_size = 20;
14092
14093 let phase_config = PhaseConfig {
14094 generate_master_data: false,
14095 generate_document_flows: false,
14096 generate_journal_entries: true,
14097 inject_anomalies: false,
14098 show_progress: false,
14099 ..Default::default()
14100 };
14101
14102 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14103 let result = orchestrator.generate().unwrap();
14104
14105 assert_eq!(result.statistics.diffusion_samples_generated, 20);
14107 }
14108
14109 #[test]
14110 fn test_causal_overlay_enabled() {
14111 let mut config = create_test_config();
14112 config.causal.enabled = true;
14113 config.causal.template = "fraud_detection".to_string();
14114 config.causal.sample_size = 100;
14115 config.causal.validate = true;
14116
14117 let phase_config = PhaseConfig {
14118 generate_master_data: false,
14119 generate_document_flows: false,
14120 generate_journal_entries: true,
14121 inject_anomalies: false,
14122 show_progress: false,
14123 ..Default::default()
14124 };
14125
14126 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14127 let result = orchestrator.generate().unwrap();
14128
14129 assert_eq!(result.statistics.causal_samples_generated, 100);
14131 assert!(result.statistics.causal_validation_passed.is_some());
14133 }
14134
14135 #[test]
14136 fn test_causal_overlay_revenue_cycle_template() {
14137 let mut config = create_test_config();
14138 config.causal.enabled = true;
14139 config.causal.template = "revenue_cycle".to_string();
14140 config.causal.sample_size = 50;
14141 config.causal.validate = false;
14142
14143 let phase_config = PhaseConfig {
14144 generate_master_data: false,
14145 generate_document_flows: false,
14146 generate_journal_entries: true,
14147 inject_anomalies: false,
14148 show_progress: false,
14149 ..Default::default()
14150 };
14151
14152 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14153 let result = orchestrator.generate().unwrap();
14154
14155 assert_eq!(result.statistics.causal_samples_generated, 50);
14157 assert!(result.statistics.causal_validation_passed.is_none());
14159 }
14160
14161 #[test]
14162 fn test_all_new_phases_enabled_together() {
14163 let mut config = create_test_config();
14164 config.llm.enabled = true;
14165 config.llm.max_vendor_enrichments = 2;
14166 config.diffusion.enabled = true;
14167 config.diffusion.n_steps = 20;
14168 config.diffusion.sample_size = 10;
14169 config.causal.enabled = true;
14170 config.causal.sample_size = 50;
14171 config.causal.validate = true;
14172
14173 let phase_config = PhaseConfig {
14174 generate_master_data: true,
14175 generate_document_flows: false,
14176 generate_journal_entries: true,
14177 inject_anomalies: false,
14178 show_progress: false,
14179 vendors_per_company: 5,
14180 customers_per_company: 3,
14181 materials_per_company: 3,
14182 assets_per_company: 3,
14183 employees_per_company: 3,
14184 ..Default::default()
14185 };
14186
14187 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14188 let result = orchestrator.generate().unwrap();
14189
14190 assert!(result.statistics.llm_vendors_enriched > 0);
14192 assert_eq!(result.statistics.diffusion_samples_generated, 10);
14193 assert_eq!(result.statistics.causal_samples_generated, 50);
14194 assert!(result.statistics.causal_validation_passed.is_some());
14195 }
14196
14197 #[test]
14198 fn test_statistics_serialization_with_new_fields() {
14199 let stats = EnhancedGenerationStatistics {
14200 total_entries: 100,
14201 total_line_items: 500,
14202 llm_enrichment_ms: 42,
14203 llm_vendors_enriched: 10,
14204 diffusion_enhancement_ms: 100,
14205 diffusion_samples_generated: 50,
14206 causal_generation_ms: 200,
14207 causal_samples_generated: 100,
14208 causal_validation_passed: Some(true),
14209 ..Default::default()
14210 };
14211
14212 let json = serde_json::to_string(&stats).unwrap();
14213 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14214
14215 assert_eq!(deserialized.llm_enrichment_ms, 42);
14216 assert_eq!(deserialized.llm_vendors_enriched, 10);
14217 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14218 assert_eq!(deserialized.diffusion_samples_generated, 50);
14219 assert_eq!(deserialized.causal_generation_ms, 200);
14220 assert_eq!(deserialized.causal_samples_generated, 100);
14221 assert_eq!(deserialized.causal_validation_passed, Some(true));
14222 }
14223
14224 #[test]
14225 fn test_statistics_backward_compat_deserialization() {
14226 let old_json = r#"{
14228 "total_entries": 100,
14229 "total_line_items": 500,
14230 "accounts_count": 50,
14231 "companies_count": 1,
14232 "period_months": 12,
14233 "vendor_count": 10,
14234 "customer_count": 20,
14235 "material_count": 15,
14236 "asset_count": 5,
14237 "employee_count": 8,
14238 "p2p_chain_count": 5,
14239 "o2c_chain_count": 5,
14240 "ap_invoice_count": 5,
14241 "ar_invoice_count": 5,
14242 "ocpm_event_count": 0,
14243 "ocpm_object_count": 0,
14244 "ocpm_case_count": 0,
14245 "audit_engagement_count": 0,
14246 "audit_workpaper_count": 0,
14247 "audit_evidence_count": 0,
14248 "audit_risk_count": 0,
14249 "audit_finding_count": 0,
14250 "audit_judgment_count": 0,
14251 "anomalies_injected": 0,
14252 "data_quality_issues": 0,
14253 "banking_customer_count": 0,
14254 "banking_account_count": 0,
14255 "banking_transaction_count": 0,
14256 "banking_suspicious_count": 0,
14257 "graph_export_count": 0,
14258 "graph_node_count": 0,
14259 "graph_edge_count": 0
14260 }"#;
14261
14262 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14263
14264 assert_eq!(stats.llm_enrichment_ms, 0);
14266 assert_eq!(stats.llm_vendors_enriched, 0);
14267 assert_eq!(stats.diffusion_enhancement_ms, 0);
14268 assert_eq!(stats.diffusion_samples_generated, 0);
14269 assert_eq!(stats.causal_generation_ms, 0);
14270 assert_eq!(stats.causal_samples_generated, 0);
14271 assert!(stats.causal_validation_passed.is_none());
14272 }
14273}