use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use chrono::{Datelike, NaiveDate};
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use rand::SeedableRng;
use serde::{Deserialize, Serialize};
use tracing::{debug, info, warn};
use datasynth_banking::{
models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
BankingOrchestratorBuilder,
};
use datasynth_config::schema::GeneratorConfig;
use datasynth_core::error::{SynthError, SynthResult};
use datasynth_core::models::audit::{
AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
UserEntityControl, Workpaper,
};
use datasynth_core::models::sourcing::{
BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
SupplierBid, SupplierQualification, SupplierScorecard,
};
use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
use datasynth_core::models::*;
use datasynth_core::traits::Generator;
use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
use datasynth_fingerprint::{
io::FingerprintReader,
models::Fingerprint,
synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
};
use datasynth_generators::{
apply_ap_settlements,
apply_ar_settlements,
opening_balance_to_jes,
AnomalyInjector,
AnomalyInjectorConfig,
AssetGenerator,
AuditEngagementGenerator,
BalanceTrackerConfig,
BankReconciliationGenerator,
BidEvaluationGenerator,
BidGenerator,
BusinessCombinationGenerator,
CatalogGenerator,
ChartOfAccountsGenerator,
ConsolidationGenerator,
ContractGenerator,
ControlGenerator,
ControlGeneratorConfig,
CustomerGenerator,
DataQualityConfig,
DataQualityInjector,
DataQualityStats,
DocumentFlowJeConfig,
DocumentFlowJeGenerator,
DocumentFlowLinker,
EclGenerator,
EmployeeGenerator,
EsgAnomalyLabel,
EvidenceGenerator,
FaDepreciationScheduleConfig,
FaDepreciationScheduleGenerator,
FinancialStatementGenerator,
FindingGenerator,
InventoryValuationGenerator,
InventoryValuationGeneratorConfig,
JournalEntryGenerator,
JudgmentGenerator,
LatePaymentDistribution,
ManufacturingCostAccounting,
MaterialGenerator,
O2CDocumentChain,
O2CGenerator,
O2CGeneratorConfig,
O2CPaymentBehavior,
P2PDocumentChain,
P2PGenerator,
P2PGeneratorConfig,
P2PPaymentBehavior,
PaymentReference,
ProvisionGenerator,
QualificationGenerator,
RfxGenerator,
RiskAssessmentGenerator,
RunningBalanceTracker,
ScorecardGenerator,
SegmentGenerator,
SegmentSeed,
SourcingProjectGenerator,
SpendAnalysisGenerator,
ValidationError,
VendorGenerator,
WarrantyProvisionGenerator,
WorkpaperGenerator,
};
use datasynth_graph::{
ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
TransactionGraphConfig,
};
use datasynth_ocpm::{
AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
OcpmUuidFactory, P2pDocuments, S2cDocuments,
};
use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
use datasynth_core::models::documents::PaymentMethod;
use datasynth_core::models::IndustrySector;
use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
use datasynth_generators::audit::sample_generator::SampleGenerator;
use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
use datasynth_generators::coa_generator::CoAFramework;
use datasynth_generators::llm_enrichment::VendorLlmEnricher;
use rayon::prelude::*;
fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
let payment_behavior = &schema_config.payment_behavior;
let late_dist = &payment_behavior.late_payment_days_distribution;
P2PGeneratorConfig {
three_way_match_rate: schema_config.three_way_match_rate,
partial_delivery_rate: schema_config.partial_delivery_rate,
over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
price_variance_rate: schema_config.price_variance_rate,
max_price_variance_percent: schema_config.max_price_variance_percent,
avg_days_po_to_gr: schema_config.average_po_to_gr_days,
avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
payment_method_distribution: vec![
(PaymentMethod::BankTransfer, 0.60),
(PaymentMethod::Check, 0.25),
(PaymentMethod::Wire, 0.10),
(PaymentMethod::CreditCard, 0.05),
],
early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
payment_behavior: P2PPaymentBehavior {
late_payment_rate: payment_behavior.late_payment_rate,
late_payment_distribution: LatePaymentDistribution {
slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
late_8_to_14: late_dist.late_8_to_14,
very_late_15_to_30: late_dist.very_late_15_to_30,
severely_late_31_to_60: late_dist.severely_late_31_to_60,
extremely_late_over_60: late_dist.extremely_late_over_60,
},
partial_payment_rate: payment_behavior.partial_payment_rate,
payment_correction_rate: payment_behavior.payment_correction_rate,
avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
},
}
}
fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
let payment_behavior = &schema_config.payment_behavior;
O2CGeneratorConfig {
credit_check_failure_rate: schema_config.credit_check_failure_rate,
partial_shipment_rate: schema_config.partial_shipment_rate,
avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
bad_debt_rate: schema_config.bad_debt_rate,
returns_rate: schema_config.return_rate,
cash_discount_take_rate: schema_config.cash_discount.taken_rate,
payment_method_distribution: vec![
(PaymentMethod::BankTransfer, 0.50),
(PaymentMethod::Check, 0.30),
(PaymentMethod::Wire, 0.15),
(PaymentMethod::CreditCard, 0.05),
],
payment_behavior: O2CPaymentBehavior {
partial_payment_rate: payment_behavior.partial_payments.rate,
short_payment_rate: payment_behavior.short_payments.rate,
max_short_percent: payment_behavior.short_payments.max_short_percent,
on_account_rate: payment_behavior.on_account_payments.rate,
payment_correction_rate: payment_behavior.payment_corrections.rate,
avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
},
}
}
#[derive(Debug, Clone)]
pub struct PhaseConfig {
pub generate_master_data: bool,
pub generate_document_flows: bool,
pub generate_ocpm_events: bool,
pub generate_journal_entries: bool,
pub inject_anomalies: bool,
pub inject_data_quality: bool,
pub validate_balances: bool,
pub show_progress: bool,
pub vendors_per_company: usize,
pub customers_per_company: usize,
pub materials_per_company: usize,
pub assets_per_company: usize,
pub employees_per_company: usize,
pub p2p_chains: usize,
pub o2c_chains: usize,
pub generate_audit: bool,
pub audit_engagements: usize,
pub workpapers_per_engagement: usize,
pub evidence_per_workpaper: usize,
pub risks_per_engagement: usize,
pub findings_per_engagement: usize,
pub judgments_per_engagement: usize,
pub generate_banking: bool,
pub generate_graph_export: bool,
pub generate_sourcing: bool,
pub generate_bank_reconciliation: bool,
pub generate_financial_statements: bool,
pub generate_accounting_standards: bool,
pub generate_manufacturing: bool,
pub generate_sales_kpi_budgets: bool,
pub generate_tax: bool,
pub generate_esg: bool,
pub generate_intercompany: bool,
pub generate_evolution_events: bool,
pub generate_counterfactuals: bool,
pub generate_compliance_regulations: bool,
pub generate_period_close: bool,
pub generate_hr: bool,
pub generate_treasury: bool,
pub generate_project_accounting: bool,
}
impl Default for PhaseConfig {
fn default() -> Self {
Self {
generate_master_data: true,
generate_document_flows: true,
generate_ocpm_events: false, generate_journal_entries: true,
inject_anomalies: false,
inject_data_quality: false, validate_balances: true,
show_progress: true,
vendors_per_company: 50,
customers_per_company: 100,
materials_per_company: 200,
assets_per_company: 50,
employees_per_company: 100,
p2p_chains: 100,
o2c_chains: 100,
generate_audit: false, audit_engagements: 5,
workpapers_per_engagement: 20,
evidence_per_workpaper: 5,
risks_per_engagement: 15,
findings_per_engagement: 8,
judgments_per_engagement: 10,
generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
}
}
impl PhaseConfig {
pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
Self {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: true,
validate_balances: true,
generate_period_close: true,
generate_evolution_events: true,
show_progress: true,
generate_audit: cfg.audit.enabled,
generate_banking: cfg.banking.enabled,
generate_graph_export: cfg.graph_export.enabled,
generate_sourcing: cfg.source_to_pay.enabled,
generate_intercompany: cfg.intercompany.enabled,
generate_financial_statements: cfg.financial_reporting.enabled,
generate_bank_reconciliation: cfg.financial_reporting.enabled,
generate_accounting_standards: cfg.accounting_standards.enabled,
generate_manufacturing: cfg.manufacturing.enabled,
generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
generate_tax: cfg.tax.enabled,
generate_esg: cfg.esg.enabled,
generate_ocpm_events: cfg.ocpm.enabled,
generate_compliance_regulations: cfg.compliance_regulations.enabled,
generate_hr: cfg.hr.enabled,
generate_treasury: cfg.treasury.enabled,
generate_project_accounting: cfg.project_accounting.enabled,
generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
inject_data_quality: cfg.data_quality.enabled,
vendors_per_company: 50,
customers_per_company: 100,
materials_per_company: 200,
assets_per_company: 50,
employees_per_company: 100,
p2p_chains: 100,
o2c_chains: 100,
audit_engagements: 5,
workpapers_per_engagement: 20,
evidence_per_workpaper: 5,
risks_per_engagement: 15,
findings_per_engagement: 8,
judgments_per_engagement: 10,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct MasterDataSnapshot {
pub vendors: Vec<Vendor>,
pub customers: Vec<Customer>,
pub materials: Vec<Material>,
pub assets: Vec<FixedAsset>,
pub employees: Vec<Employee>,
pub cost_centers: Vec<datasynth_core::models::CostCenter>,
pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
}
#[derive(Debug, Clone)]
pub struct HypergraphExportInfo {
pub node_count: usize,
pub edge_count: usize,
pub hyperedge_count: usize,
pub output_path: PathBuf,
}
#[derive(Debug, Clone, Default)]
pub struct DocumentFlowSnapshot {
pub p2p_chains: Vec<P2PDocumentChain>,
pub o2c_chains: Vec<O2CDocumentChain>,
pub purchase_orders: Vec<documents::PurchaseOrder>,
pub goods_receipts: Vec<documents::GoodsReceipt>,
pub vendor_invoices: Vec<documents::VendorInvoice>,
pub sales_orders: Vec<documents::SalesOrder>,
pub deliveries: Vec<documents::Delivery>,
pub customer_invoices: Vec<documents::CustomerInvoice>,
pub payments: Vec<documents::Payment>,
pub document_references: Vec<documents::DocumentReference>,
}
#[derive(Debug, Clone, Default)]
pub struct SubledgerSnapshot {
pub ap_invoices: Vec<APInvoice>,
pub ar_invoices: Vec<ARInvoice>,
pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
pub ar_aging_reports: Vec<ARAgingReport>,
pub ap_aging_reports: Vec<APAgingReport>,
pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
}
#[derive(Debug, Clone, Default)]
pub struct OcpmSnapshot {
pub event_log: Option<OcpmEventLog>,
pub event_count: usize,
pub object_count: usize,
pub case_count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct AuditSnapshot {
pub engagements: Vec<AuditEngagement>,
pub workpapers: Vec<Workpaper>,
pub evidence: Vec<AuditEvidence>,
pub risk_assessments: Vec<RiskAssessment>,
pub findings: Vec<AuditFinding>,
pub judgments: Vec<ProfessionalJudgment>,
pub confirmations: Vec<ExternalConfirmation>,
pub confirmation_responses: Vec<ConfirmationResponse>,
pub procedure_steps: Vec<AuditProcedureStep>,
pub samples: Vec<AuditSample>,
pub analytical_results: Vec<AnalyticalProcedureResult>,
pub ia_functions: Vec<InternalAuditFunction>,
pub ia_reports: Vec<InternalAuditReport>,
pub related_parties: Vec<RelatedParty>,
pub related_party_transactions: Vec<RelatedPartyTransaction>,
pub component_auditors: Vec<ComponentAuditor>,
pub group_audit_plan: Option<GroupAuditPlan>,
pub component_instructions: Vec<ComponentInstruction>,
pub component_reports: Vec<ComponentAuditorReport>,
pub engagement_letters: Vec<EngagementLetter>,
pub subsequent_events: Vec<SubsequentEvent>,
pub service_organizations: Vec<ServiceOrganization>,
pub soc_reports: Vec<SocReport>,
pub user_entity_controls: Vec<UserEntityControl>,
pub going_concern_assessments:
Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
pub accounting_estimates:
Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
pub materiality_calculations:
Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
pub combined_risk_assessments:
Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
pub significant_transaction_classes:
Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
pub analytical_relationships:
Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
}
#[derive(Debug, Clone, Default)]
pub struct BankingSnapshot {
pub customers: Vec<BankingCustomer>,
pub accounts: Vec<BankAccount>,
pub transactions: Vec<BankTransaction>,
pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
pub suspicious_count: usize,
pub scenario_count: usize,
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct GraphExportSnapshot {
pub exported: bool,
pub graph_count: usize,
pub exports: HashMap<String, GraphExportInfo>,
}
#[derive(Debug, Clone, Serialize)]
pub struct GraphExportInfo {
pub name: String,
pub format: String,
pub output_path: PathBuf,
pub node_count: usize,
pub edge_count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct SourcingSnapshot {
pub spend_analyses: Vec<SpendAnalysis>,
pub sourcing_projects: Vec<SourcingProject>,
pub qualifications: Vec<SupplierQualification>,
pub rfx_events: Vec<RfxEvent>,
pub bids: Vec<SupplierBid>,
pub bid_evaluations: Vec<BidEvaluation>,
pub contracts: Vec<ProcurementContract>,
pub catalog_items: Vec<CatalogItem>,
pub scorecards: Vec<SupplierScorecard>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PeriodTrialBalance {
pub fiscal_year: u16,
pub fiscal_period: u8,
pub period_start: NaiveDate,
pub period_end: NaiveDate,
pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
}
#[derive(Debug, Clone, Default)]
pub struct FinancialReportingSnapshot {
pub financial_statements: Vec<FinancialStatement>,
pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
pub consolidated_statements: Vec<FinancialStatement>,
pub consolidation_schedules: Vec<ConsolidationSchedule>,
pub bank_reconciliations: Vec<BankReconciliation>,
pub trial_balances: Vec<PeriodTrialBalance>,
pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
}
#[derive(Debug, Clone, Default)]
pub struct HrSnapshot {
pub payroll_runs: Vec<PayrollRun>,
pub payroll_line_items: Vec<PayrollLineItem>,
pub time_entries: Vec<TimeEntry>,
pub expense_reports: Vec<ExpenseReport>,
pub benefit_enrollments: Vec<BenefitEnrollment>,
pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
pub pension_journal_entries: Vec<JournalEntry>,
pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
pub stock_comp_journal_entries: Vec<JournalEntry>,
pub payroll_run_count: usize,
pub payroll_line_item_count: usize,
pub time_entry_count: usize,
pub expense_report_count: usize,
pub benefit_enrollment_count: usize,
pub pension_plan_count: usize,
pub stock_grant_count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct AccountingStandardsSnapshot {
pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
pub business_combinations:
Vec<datasynth_core::models::business_combination::BusinessCombination>,
pub business_combination_journal_entries: Vec<JournalEntry>,
pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
pub ecl_provision_movements:
Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
pub ecl_journal_entries: Vec<JournalEntry>,
pub provisions: Vec<datasynth_core::models::provision::Provision>,
pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
pub provision_journal_entries: Vec<JournalEntry>,
pub currency_translation_results:
Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
pub revenue_contract_count: usize,
pub impairment_test_count: usize,
pub business_combination_count: usize,
pub ecl_model_count: usize,
pub provision_count: usize,
pub currency_translation_count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct ComplianceRegulationsSnapshot {
pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
pub compliance_graph: Option<datasynth_graph::Graph>,
}
#[derive(Debug, Clone, Default)]
pub struct ManufacturingSnapshot {
pub production_orders: Vec<ProductionOrder>,
pub quality_inspections: Vec<QualityInspection>,
pub cycle_counts: Vec<CycleCount>,
pub bom_components: Vec<BomComponent>,
pub inventory_movements: Vec<InventoryMovement>,
pub production_order_count: usize,
pub quality_inspection_count: usize,
pub cycle_count_count: usize,
pub bom_component_count: usize,
pub inventory_movement_count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct SalesKpiBudgetsSnapshot {
pub sales_quotes: Vec<SalesQuote>,
pub kpis: Vec<ManagementKpi>,
pub budgets: Vec<Budget>,
pub sales_quote_count: usize,
pub kpi_count: usize,
pub budget_line_count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct AnomalyLabels {
pub labels: Vec<LabeledAnomaly>,
pub summary: Option<AnomalySummary>,
pub by_type: HashMap<String, usize>,
}
#[derive(Debug, Clone, Default)]
pub struct BalanceValidationResult {
pub validated: bool,
pub is_balanced: bool,
pub entries_processed: u64,
pub total_debits: rust_decimal::Decimal,
pub total_credits: rust_decimal::Decimal,
pub accounts_tracked: usize,
pub companies_tracked: usize,
pub validation_errors: Vec<ValidationError>,
pub has_unbalanced_entries: bool,
}
#[derive(Debug, Clone, Default)]
pub struct TaxSnapshot {
pub jurisdictions: Vec<TaxJurisdiction>,
pub codes: Vec<TaxCode>,
pub tax_lines: Vec<TaxLine>,
pub tax_returns: Vec<TaxReturn>,
pub tax_provisions: Vec<TaxProvision>,
pub withholding_records: Vec<WithholdingTaxRecord>,
pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
pub jurisdiction_count: usize,
pub code_count: usize,
pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
pub tax_posting_journal_entries: Vec<JournalEntry>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct IntercompanySnapshot {
pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
pub seller_journal_entries: Vec<JournalEntry>,
pub buyer_journal_entries: Vec<JournalEntry>,
pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
#[serde(skip)]
pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
pub matched_pair_count: usize,
pub elimination_entry_count: usize,
pub match_rate: f64,
}
#[derive(Debug, Clone, Default)]
pub struct EsgSnapshot {
pub emissions: Vec<EmissionRecord>,
pub energy: Vec<EnergyConsumption>,
pub water: Vec<WaterUsage>,
pub waste: Vec<WasteRecord>,
pub diversity: Vec<WorkforceDiversityMetric>,
pub pay_equity: Vec<PayEquityMetric>,
pub safety_incidents: Vec<SafetyIncident>,
pub safety_metrics: Vec<SafetyMetric>,
pub governance: Vec<GovernanceMetric>,
pub supplier_assessments: Vec<SupplierEsgAssessment>,
pub materiality: Vec<MaterialityAssessment>,
pub disclosures: Vec<EsgDisclosure>,
pub climate_scenarios: Vec<ClimateScenario>,
pub anomaly_labels: Vec<EsgAnomalyLabel>,
pub emission_count: usize,
pub disclosure_count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct TreasurySnapshot {
pub cash_positions: Vec<CashPosition>,
pub cash_forecasts: Vec<CashForecast>,
pub cash_pools: Vec<CashPool>,
pub cash_pool_sweeps: Vec<CashPoolSweep>,
pub hedging_instruments: Vec<HedgingInstrument>,
pub hedge_relationships: Vec<HedgeRelationship>,
pub debt_instruments: Vec<DebtInstrument>,
pub bank_guarantees: Vec<BankGuarantee>,
pub netting_runs: Vec<NettingRun>,
pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
pub journal_entries: Vec<JournalEntry>,
}
#[derive(Debug, Clone, Default)]
pub struct ProjectAccountingSnapshot {
pub projects: Vec<Project>,
pub cost_lines: Vec<ProjectCostLine>,
pub revenue_records: Vec<ProjectRevenue>,
pub earned_value_metrics: Vec<EarnedValueMetric>,
pub change_orders: Vec<ChangeOrder>,
pub milestones: Vec<ProjectMilestone>,
}
#[derive(Debug, Default)]
pub struct EnhancedGenerationResult {
pub chart_of_accounts: ChartOfAccounts,
pub master_data: MasterDataSnapshot,
pub document_flows: DocumentFlowSnapshot,
pub subledger: SubledgerSnapshot,
pub ocpm: OcpmSnapshot,
pub audit: AuditSnapshot,
pub banking: BankingSnapshot,
pub graph_export: GraphExportSnapshot,
pub sourcing: SourcingSnapshot,
pub financial_reporting: FinancialReportingSnapshot,
pub hr: HrSnapshot,
pub accounting_standards: AccountingStandardsSnapshot,
pub manufacturing: ManufacturingSnapshot,
pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
pub tax: TaxSnapshot,
pub esg: EsgSnapshot,
pub treasury: TreasurySnapshot,
pub project_accounting: ProjectAccountingSnapshot,
pub process_evolution: Vec<ProcessEvolutionEvent>,
pub organizational_events: Vec<OrganizationalEvent>,
pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
pub intercompany: IntercompanySnapshot,
pub journal_entries: Vec<JournalEntry>,
pub anomaly_labels: AnomalyLabels,
pub balance_validation: BalanceValidationResult,
pub data_quality_stats: DataQualityStats,
pub quality_issues: Vec<datasynth_generators::QualityIssue>,
pub statistics: EnhancedGenerationStatistics,
pub lineage: Option<super::lineage::LineageGraph>,
pub gate_result: Option<datasynth_eval::gates::GateResult>,
pub internal_controls: Vec<InternalControl>,
pub sod_violations: Vec<datasynth_core::models::SodViolation>,
pub opening_balances: Vec<GeneratedOpeningBalance>,
pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
pub temporal_vendor_chains:
Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
pub compliance_regulations: ComplianceRegulationsSnapshot,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct EnhancedGenerationStatistics {
pub total_entries: u64,
pub total_line_items: u64,
pub accounts_count: usize,
pub companies_count: usize,
pub period_months: u32,
pub vendor_count: usize,
pub customer_count: usize,
pub material_count: usize,
pub asset_count: usize,
pub employee_count: usize,
pub p2p_chain_count: usize,
pub o2c_chain_count: usize,
pub ap_invoice_count: usize,
pub ar_invoice_count: usize,
pub ocpm_event_count: usize,
pub ocpm_object_count: usize,
pub ocpm_case_count: usize,
pub audit_engagement_count: usize,
pub audit_workpaper_count: usize,
pub audit_evidence_count: usize,
pub audit_risk_count: usize,
pub audit_finding_count: usize,
pub audit_judgment_count: usize,
#[serde(default)]
pub audit_confirmation_count: usize,
#[serde(default)]
pub audit_confirmation_response_count: usize,
#[serde(default)]
pub audit_procedure_step_count: usize,
#[serde(default)]
pub audit_sample_count: usize,
#[serde(default)]
pub audit_analytical_result_count: usize,
#[serde(default)]
pub audit_ia_function_count: usize,
#[serde(default)]
pub audit_ia_report_count: usize,
#[serde(default)]
pub audit_related_party_count: usize,
#[serde(default)]
pub audit_related_party_transaction_count: usize,
pub anomalies_injected: usize,
pub data_quality_issues: usize,
pub banking_customer_count: usize,
pub banking_account_count: usize,
pub banking_transaction_count: usize,
pub banking_suspicious_count: usize,
pub graph_export_count: usize,
pub graph_node_count: usize,
pub graph_edge_count: usize,
#[serde(default)]
pub llm_enrichment_ms: u64,
#[serde(default)]
pub llm_vendors_enriched: usize,
#[serde(default)]
pub diffusion_enhancement_ms: u64,
#[serde(default)]
pub diffusion_samples_generated: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub neural_hybrid_weight: Option<f64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub neural_hybrid_strategy: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub neural_routed_column_count: Option<usize>,
#[serde(default)]
pub causal_generation_ms: u64,
#[serde(default)]
pub causal_samples_generated: usize,
#[serde(default)]
pub causal_validation_passed: Option<bool>,
#[serde(default)]
pub sourcing_project_count: usize,
#[serde(default)]
pub rfx_event_count: usize,
#[serde(default)]
pub bid_count: usize,
#[serde(default)]
pub contract_count: usize,
#[serde(default)]
pub catalog_item_count: usize,
#[serde(default)]
pub scorecard_count: usize,
#[serde(default)]
pub financial_statement_count: usize,
#[serde(default)]
pub bank_reconciliation_count: usize,
#[serde(default)]
pub payroll_run_count: usize,
#[serde(default)]
pub time_entry_count: usize,
#[serde(default)]
pub expense_report_count: usize,
#[serde(default)]
pub benefit_enrollment_count: usize,
#[serde(default)]
pub pension_plan_count: usize,
#[serde(default)]
pub stock_grant_count: usize,
#[serde(default)]
pub revenue_contract_count: usize,
#[serde(default)]
pub impairment_test_count: usize,
#[serde(default)]
pub business_combination_count: usize,
#[serde(default)]
pub ecl_model_count: usize,
#[serde(default)]
pub provision_count: usize,
#[serde(default)]
pub production_order_count: usize,
#[serde(default)]
pub quality_inspection_count: usize,
#[serde(default)]
pub cycle_count_count: usize,
#[serde(default)]
pub bom_component_count: usize,
#[serde(default)]
pub inventory_movement_count: usize,
#[serde(default)]
pub sales_quote_count: usize,
#[serde(default)]
pub kpi_count: usize,
#[serde(default)]
pub budget_line_count: usize,
#[serde(default)]
pub tax_jurisdiction_count: usize,
#[serde(default)]
pub tax_code_count: usize,
#[serde(default)]
pub esg_emission_count: usize,
#[serde(default)]
pub esg_disclosure_count: usize,
#[serde(default)]
pub ic_matched_pair_count: usize,
#[serde(default)]
pub ic_elimination_count: usize,
#[serde(default)]
pub ic_transaction_count: usize,
#[serde(default)]
pub fa_subledger_count: usize,
#[serde(default)]
pub inventory_subledger_count: usize,
#[serde(default)]
pub treasury_debt_instrument_count: usize,
#[serde(default)]
pub treasury_hedging_instrument_count: usize,
#[serde(default)]
pub project_count: usize,
#[serde(default)]
pub project_change_order_count: usize,
#[serde(default)]
pub tax_provision_count: usize,
#[serde(default)]
pub opening_balance_count: usize,
#[serde(default)]
pub subledger_reconciliation_count: usize,
#[serde(default)]
pub tax_line_count: usize,
#[serde(default)]
pub project_cost_line_count: usize,
#[serde(default)]
pub cash_position_count: usize,
#[serde(default)]
pub cash_forecast_count: usize,
#[serde(default)]
pub cash_pool_count: usize,
#[serde(default)]
pub process_evolution_event_count: usize,
#[serde(default)]
pub organizational_event_count: usize,
#[serde(default)]
pub counterfactual_pair_count: usize,
#[serde(default)]
pub red_flag_count: usize,
#[serde(default)]
pub collusion_ring_count: usize,
#[serde(default)]
pub temporal_version_chain_count: usize,
#[serde(default)]
pub entity_relationship_node_count: usize,
#[serde(default)]
pub entity_relationship_edge_count: usize,
#[serde(default)]
pub cross_process_link_count: usize,
#[serde(default)]
pub disruption_event_count: usize,
#[serde(default)]
pub industry_gl_account_count: usize,
#[serde(default)]
pub period_close_je_count: usize,
}
pub struct EnhancedOrchestrator {
config: GeneratorConfig,
phase_config: PhaseConfig,
coa: Option<Arc<ChartOfAccounts>>,
master_data: MasterDataSnapshot,
seed: u64,
multi_progress: Option<MultiProgress>,
resource_guard: ResourceGuard,
output_path: Option<PathBuf>,
copula_generators: Vec<CopulaGeneratorSpec>,
country_pack_registry: datasynth_core::CountryPackRegistry,
phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
}
impl EnhancedOrchestrator {
pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
datasynth_config::validate_config(&config)?;
let seed = config.global.seed.unwrap_or_else(rand::random);
let resource_guard = Self::build_resource_guard(&config, None);
let country_pack_registry = match &config.country_packs {
Some(cp) => {
datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
.map_err(|e| SynthError::config(e.to_string()))?
}
None => datasynth_core::CountryPackRegistry::builtin_only()
.map_err(|e| SynthError::config(e.to_string()))?,
};
Ok(Self {
config,
phase_config,
coa: None,
master_data: MasterDataSnapshot::default(),
seed,
multi_progress: None,
resource_guard,
output_path: None,
copula_generators: Vec::new(),
country_pack_registry,
phase_sink: None,
})
}
pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
Self::new(config, PhaseConfig::default())
}
pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
self.phase_sink = Some(sink);
self
}
pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
self.phase_sink = Some(sink);
}
fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
if let Some(ref sink) = self.phase_sink {
for item in items {
if let Ok(value) = serde_json::to_value(item) {
if let Err(e) = sink.emit(phase, type_name, &value) {
warn!(
"Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
);
}
}
}
if let Err(e) = sink.phase_complete(phase) {
warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
}
}
}
pub fn with_progress(mut self, show: bool) -> Self {
self.phase_config.show_progress = show;
if show {
self.multi_progress = Some(MultiProgress::new());
}
self
}
pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
let path = path.into();
self.output_path = Some(path.clone());
self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
self
}
pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
&self.country_pack_registry
}
pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
self.country_pack_registry.get_by_str(country)
}
fn primary_country_code(&self) -> &str {
self.config
.companies
.first()
.map(|c| c.country.as_str())
.unwrap_or("US")
}
fn primary_pack(&self) -> &datasynth_core::CountryPack {
self.country_pack_for(self.primary_country_code())
}
fn resolve_coa_framework(&self) -> CoAFramework {
if self.config.accounting_standards.enabled {
match self.config.accounting_standards.framework {
Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
return CoAFramework::FrenchPcg;
}
Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
return CoAFramework::GermanSkr04;
}
_ => {}
}
}
let pack = self.primary_pack();
match pack.accounting.framework.as_str() {
"french_gaap" => CoAFramework::FrenchPcg,
"german_gaap" | "hgb" => CoAFramework::GermanSkr04,
_ => CoAFramework::UsGaap,
}
}
pub fn has_copulas(&self) -> bool {
!self.copula_generators.is_empty()
}
pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
&self.copula_generators
}
pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
&mut self.copula_generators
}
pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
self.copula_generators
.iter_mut()
.find(|c| c.name == copula_name)
.map(|c| c.generator.sample())
}
pub fn from_fingerprint(
fingerprint_path: &std::path::Path,
phase_config: PhaseConfig,
scale: f64,
) -> SynthResult<Self> {
info!("Loading fingerprint from: {}", fingerprint_path.display());
let reader = FingerprintReader::new();
let fingerprint = reader
.read_from_file(fingerprint_path)
.map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
Self::from_fingerprint_data(fingerprint, phase_config, scale)
}
pub fn from_fingerprint_data(
fingerprint: Fingerprint,
phase_config: PhaseConfig,
scale: f64,
) -> SynthResult<Self> {
info!(
"Synthesizing config from fingerprint (version: {}, tables: {})",
fingerprint.manifest.version,
fingerprint.schema.tables.len()
);
let seed: u64 = rand::random();
info!("Fingerprint synthesis seed: {}", seed);
let options = SynthesisOptions {
scale,
seed: Some(seed),
preserve_correlations: true,
inject_anomalies: true,
};
let synthesizer = ConfigSynthesizer::with_options(options);
let synthesis_result = synthesizer
.synthesize_full(&fingerprint, seed)
.map_err(|e| {
SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
})?;
let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
Self::base_config_for_industry(industry)
} else {
Self::base_config_for_industry("manufacturing")
};
config = Self::apply_config_patch(config, &synthesis_result.config_patch);
info!(
"Config synthesized: {} tables, scale={:.2}, copula generators: {}",
fingerprint.schema.tables.len(),
scale,
synthesis_result.copula_generators.len()
);
if !synthesis_result.copula_generators.is_empty() {
for spec in &synthesis_result.copula_generators {
info!(
" Copula '{}' for table '{}': {} columns",
spec.name,
spec.table,
spec.columns.len()
);
}
}
let mut orchestrator = Self::new(config, phase_config)?;
orchestrator.copula_generators = synthesis_result.copula_generators;
Ok(orchestrator)
}
fn base_config_for_industry(industry: &str) -> GeneratorConfig {
use datasynth_config::presets::create_preset;
use datasynth_config::TransactionVolume;
use datasynth_core::models::{CoAComplexity, IndustrySector};
let sector = match industry.to_lowercase().as_str() {
"manufacturing" => IndustrySector::Manufacturing,
"retail" => IndustrySector::Retail,
"financial" | "financial_services" => IndustrySector::FinancialServices,
"healthcare" => IndustrySector::Healthcare,
"technology" | "tech" => IndustrySector::Technology,
_ => IndustrySector::Manufacturing,
};
create_preset(
sector,
1, 12, CoAComplexity::Medium,
TransactionVolume::TenK,
)
}
fn apply_config_patch(
mut config: GeneratorConfig,
patch: &datasynth_fingerprint::synthesis::ConfigPatch,
) -> GeneratorConfig {
use datasynth_fingerprint::synthesis::ConfigValue;
for (key, value) in patch.values() {
match (key.as_str(), value) {
("transactions.count", ConfigValue::Integer(n)) => {
info!(
"Fingerprint suggests {} transactions (apply via company volumes)",
n
);
}
("global.period_months", ConfigValue::Integer(n)) => {
config.global.period_months = (*n).clamp(1, 120) as u32;
}
("global.start_date", ConfigValue::String(s)) => {
config.global.start_date = s.clone();
}
("global.seed", ConfigValue::Integer(n)) => {
config.global.seed = Some(*n as u64);
}
("fraud.enabled", ConfigValue::Bool(b)) => {
config.fraud.enabled = *b;
}
("fraud.fraud_rate", ConfigValue::Float(f)) => {
config.fraud.fraud_rate = *f;
}
("data_quality.enabled", ConfigValue::Bool(b)) => {
config.data_quality.enabled = *b;
}
("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
config.fraud.enabled = *b;
}
("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
config.fraud.fraud_rate = *f;
}
_ => {
debug!("Ignoring unknown config patch key: {}", key);
}
}
}
config
}
fn build_resource_guard(
config: &GeneratorConfig,
output_path: Option<PathBuf>,
) -> ResourceGuard {
let mut builder = ResourceGuardBuilder::new();
if config.global.memory_limit_mb > 0 {
builder = builder.memory_limit(config.global.memory_limit_mb);
}
if let Some(path) = output_path {
builder = builder.output_path(path).min_free_disk(100); }
builder = builder.conservative();
builder.build()
}
fn check_resources(&self) -> SynthResult<DegradationLevel> {
self.resource_guard.check()
}
fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
let level = self.resource_guard.check()?;
if level != DegradationLevel::Normal {
warn!(
"Resource degradation at {}: level={}, memory={}MB, disk={}MB",
phase,
level,
self.resource_guard.current_memory_mb(),
self.resource_guard.available_disk_mb()
);
}
Ok(level)
}
fn get_degradation_actions(&self) -> DegradationActions {
self.resource_guard.get_actions()
}
fn check_memory_limit(&self) -> SynthResult<()> {
self.check_resources()?;
Ok(())
}
pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
info!("Starting enhanced generation workflow");
info!(
"Config: industry={:?}, period_months={}, companies={}",
self.config.global.industry,
self.config.global.period_months,
self.config.companies.len()
);
let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
datasynth_core::serde_decimal::set_numeric_native(is_native);
struct NumericModeGuard;
impl Drop for NumericModeGuard {
fn drop(&mut self) {
datasynth_core::serde_decimal::set_numeric_native(false);
}
}
let _numeric_guard = if is_native {
Some(NumericModeGuard)
} else {
None
};
let initial_level = self.check_resources_with_log("initial")?;
if initial_level == DegradationLevel::Emergency {
return Err(SynthError::resource(
"Insufficient resources to start generation",
));
}
let mut stats = EnhancedGenerationStatistics {
companies_count: self.config.companies.len(),
period_months: self.config.global.period_months,
..Default::default()
};
let coa = self.phase_chart_of_accounts(&mut stats)?;
self.phase_master_data(&mut stats)?;
self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
self.emit_phase_items("master_data", "Material", &self.master_data.materials);
let (mut document_flows, mut subledger, fa_journal_entries) =
self.phase_document_flows(&mut stats)?;
self.emit_phase_items(
"document_flows",
"PurchaseOrder",
&document_flows.purchase_orders,
);
self.emit_phase_items(
"document_flows",
"GoodsReceipt",
&document_flows.goods_receipts,
);
self.emit_phase_items(
"document_flows",
"VendorInvoice",
&document_flows.vendor_invoices,
);
self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
let opening_balance_jes: Vec<JournalEntry> = opening_balances
.iter()
.flat_map(|ob| opening_balance_to_jes(ob, &coa))
.collect();
if !opening_balance_jes.is_empty() {
debug!(
"Prepending {} opening balance JEs to entries",
opening_balance_jes.len()
);
}
let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
if !opening_balance_jes.is_empty() {
let mut combined = opening_balance_jes;
combined.extend(entries);
entries = combined;
}
if !fa_journal_entries.is_empty() {
debug!(
"Appending {} FA acquisition JEs to main entries",
fa_journal_entries.len()
);
entries.extend(fa_journal_entries);
}
let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
let actions = self.get_degradation_actions();
let mut sourcing = self.phase_sourcing_data(&mut stats)?;
if !sourcing.contracts.is_empty() {
let mut linked_count = 0usize;
let po_vendor_pairs: Vec<(String, String)> = document_flows
.p2p_chains
.iter()
.map(|chain| {
(
chain.purchase_order.vendor_id.clone(),
chain.purchase_order.header.document_id.clone(),
)
})
.collect();
for chain in &mut document_flows.p2p_chains {
if chain.purchase_order.contract_id.is_none() {
if let Some(contract) = sourcing
.contracts
.iter()
.find(|c| c.vendor_id == chain.purchase_order.vendor_id)
{
chain.purchase_order.contract_id = Some(contract.contract_id.clone());
linked_count += 1;
}
}
}
for contract in &mut sourcing.contracts {
let po_ids: Vec<String> = po_vendor_pairs
.iter()
.filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
.map(|(_, po_id)| po_id.clone())
.collect();
if !po_ids.is_empty() {
contract.purchase_order_ids = po_ids;
}
}
if linked_count > 0 {
debug!(
"Linked {} purchase orders to S2C contracts by vendor match",
linked_count
);
}
}
let intercompany = self.phase_intercompany(&entries, &mut stats)?;
if !intercompany.seller_journal_entries.is_empty()
|| !intercompany.buyer_journal_entries.is_empty()
{
let ic_je_count = intercompany.seller_journal_entries.len()
+ intercompany.buyer_journal_entries.len();
entries.extend(intercompany.seller_journal_entries.iter().cloned());
entries.extend(intercompany.buyer_journal_entries.iter().cloned());
debug!(
"Appended {} IC journal entries to main entries",
ic_je_count
);
}
if !intercompany.elimination_entries.is_empty() {
let elim_jes = datasynth_generators::elimination_to_journal_entries(
&intercompany.elimination_entries,
);
if !elim_jes.is_empty() {
debug!(
"Appended {} elimination journal entries to main entries",
elim_jes.len()
);
let elim_debit: rust_decimal::Decimal =
elim_jes.iter().map(|je| je.total_debit()).sum();
let elim_credit: rust_decimal::Decimal =
elim_jes.iter().map(|je| je.total_credit()).sum();
let elim_diff = (elim_debit - elim_credit).abs();
let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
return Err(datasynth_core::error::SynthError::generation(format!(
"IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
elim_debit, elim_credit, elim_diff, tolerance
)));
}
debug!(
"IC elimination balance verified: debits={}, credits={} (diff={})",
elim_debit, elim_credit, elim_diff
);
entries.extend(elim_jes);
}
}
if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
document_flows
.customer_invoices
.extend(ic_docs.seller_invoices.iter().cloned());
document_flows
.purchase_orders
.extend(ic_docs.buyer_orders.iter().cloned());
document_flows
.goods_receipts
.extend(ic_docs.buyer_goods_receipts.iter().cloned());
document_flows
.vendor_invoices
.extend(ic_docs.buyer_invoices.iter().cloned());
debug!(
"Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
ic_docs.seller_invoices.len(),
ic_docs.buyer_orders.len(),
ic_docs.buyer_goods_receipts.len(),
ic_docs.buyer_invoices.len(),
);
}
}
let hr = self.phase_hr_data(&mut stats)?;
if !hr.payroll_runs.is_empty() {
let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
debug!("Generated {} JEs from payroll runs", payroll_jes.len());
entries.extend(payroll_jes);
}
if !hr.pension_journal_entries.is_empty() {
debug!(
"Generated {} JEs from pension plans",
hr.pension_journal_entries.len()
);
entries.extend(hr.pension_journal_entries.iter().cloned());
}
if !hr.stock_comp_journal_entries.is_empty() {
debug!(
"Generated {} JEs from stock-based compensation",
hr.stock_comp_journal_entries.len()
);
entries.extend(hr.stock_comp_journal_entries.iter().cloned());
}
let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
if !manufacturing_snap.production_orders.is_empty() {
let currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
&manufacturing_snap.production_orders,
&manufacturing_snap.quality_inspections,
currency,
);
debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
entries.extend(mfg_jes);
}
if !manufacturing_snap.quality_inspections.is_empty() {
let framework = match self.config.accounting_standards.framework {
Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
_ => "US_GAAP",
};
for company in &self.config.companies {
let company_orders: Vec<_> = manufacturing_snap
.production_orders
.iter()
.filter(|o| o.company_code == company.code)
.cloned()
.collect();
let company_inspections: Vec<_> = manufacturing_snap
.quality_inspections
.iter()
.filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
.cloned()
.collect();
if company_inspections.is_empty() {
continue;
}
let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
let warranty_result = warranty_gen.generate(
&company.code,
&company_orders,
&company_inspections,
&company.currency,
framework,
);
if !warranty_result.journal_entries.is_empty() {
debug!(
"Generated {} warranty provision JEs for {}",
warranty_result.journal_entries.len(),
company.code
);
entries.extend(warranty_result.journal_entries);
}
}
}
if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
{
let cogs_currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
&document_flows.deliveries,
&manufacturing_snap.production_orders,
cogs_currency,
);
if !cogs_jes.is_empty() {
debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
entries.extend(cogs_jes);
}
}
if !manufacturing_snap.inventory_movements.is_empty()
&& !subledger.inventory_positions.is_empty()
{
use datasynth_core::models::MovementType as MfgMovementType;
let mut receipt_count = 0usize;
let mut issue_count = 0usize;
for movement in &manufacturing_snap.inventory_movements {
if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
p.material_id == movement.material_code
&& p.company_code == movement.entity_code
}) {
match movement.movement_type {
MfgMovementType::GoodsReceipt => {
pos.add_quantity(
movement.quantity,
movement.value,
movement.movement_date,
);
receipt_count += 1;
}
MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
issue_count += 1;
}
_ => {}
}
}
}
debug!(
"Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
manufacturing_snap.inventory_movements.len(),
receipt_count,
issue_count,
);
}
if !entries.is_empty() {
stats.total_entries = entries.len() as u64;
stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
debug!(
"Final entry count: {}, line items: {} (after all JE-generating phases)",
stats.total_entries, stats.total_line_items
);
}
if self.config.internal_controls.enabled && !entries.is_empty() {
info!("Phase 7b: Applying internal controls to journal entries");
let control_config = ControlGeneratorConfig {
exception_rate: self.config.internal_controls.exception_rate,
sod_violation_rate: self.config.internal_controls.sod_violation_rate,
enable_sox_marking: true,
sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
self.config.internal_controls.sox_materiality_threshold,
)
.unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
..Default::default()
};
let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
for entry in &mut entries {
control_gen.apply_controls(entry, &coa);
}
let with_controls = entries
.iter()
.filter(|e| !e.header.control_ids.is_empty())
.count();
info!(
"Applied controls to {} entries ({} with control IDs assigned)",
entries.len(),
with_controls
);
}
let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
.iter()
.filter(|e| e.header.sod_violation)
.filter_map(|e| {
e.header.sod_conflict_type.map(|ct| {
use datasynth_core::models::{RiskLevel, SodViolation};
let severity = match ct {
datasynth_core::models::SodConflictType::PaymentReleaser
| datasynth_core::models::SodConflictType::RequesterApprover => {
RiskLevel::Critical
}
datasynth_core::models::SodConflictType::PreparerApprover
| datasynth_core::models::SodConflictType::MasterDataMaintainer
| datasynth_core::models::SodConflictType::JournalEntryPoster
| datasynth_core::models::SodConflictType::SystemAccessConflict => {
RiskLevel::High
}
datasynth_core::models::SodConflictType::ReconcilerPoster => {
RiskLevel::Medium
}
};
let action = format!(
"SoD conflict {:?} on entry {} ({})",
ct, e.header.document_id, e.header.company_code
);
SodViolation::new(ct, e.header.created_by.clone(), action, severity)
})
})
.collect();
if !sod_violations.is_empty() {
info!(
"Phase 7c: Extracted {} SoD violations from {} entries",
sod_violations.len(),
entries.len()
);
}
self.emit_phase_items("journal_entries", "JournalEntry", &entries);
{
let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
if self.config.fraud.enabled && doc_rate > 0.0 {
use datasynth_core::fraud_propagation::{
inject_document_fraud, propagate_documents_to_entries,
};
use datasynth_core::utils::weighted_select;
use datasynth_core::FraudType;
use rand_chacha::rand_core::SeedableRng;
let dist = &self.config.fraud.fraud_type_distribution;
let fraud_type_weights: [(FraudType, f64); 8] = [
(FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
(FraudType::FictitiousEntry, dist.fictitious_transaction),
(FraudType::RevenueManipulation, dist.revenue_manipulation),
(
FraudType::ImproperCapitalization,
dist.expense_capitalization,
),
(FraudType::SplitTransaction, dist.split_transaction),
(FraudType::TimingAnomaly, dist.timing_anomaly),
(FraudType::UnauthorizedAccess, dist.unauthorized_access),
(FraudType::DuplicatePayment, dist.duplicate_payment),
];
let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
if weights_sum <= 0.0 {
FraudType::FictitiousEntry
} else {
*weighted_select(rng, &fraud_type_weights)
}
};
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
let mut doc_tagged = 0usize;
macro_rules! inject_into {
($collection:expr) => {{
let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
$collection.iter_mut().map(|d| &mut d.header).collect();
doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
}};
}
inject_into!(document_flows.purchase_orders);
inject_into!(document_flows.goods_receipts);
inject_into!(document_flows.vendor_invoices);
inject_into!(document_flows.payments);
inject_into!(document_flows.sales_orders);
inject_into!(document_flows.deliveries);
inject_into!(document_flows.customer_invoices);
if doc_tagged > 0 {
info!(
"Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
);
}
if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
Vec::new();
headers.extend(
document_flows
.purchase_orders
.iter()
.map(|d| d.header.clone()),
);
headers.extend(
document_flows
.goods_receipts
.iter()
.map(|d| d.header.clone()),
);
headers.extend(
document_flows
.vendor_invoices
.iter()
.map(|d| d.header.clone()),
);
headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
headers.extend(
document_flows
.customer_invoices
.iter()
.map(|d| d.header.clone()),
);
let propagated = propagate_documents_to_entries(&headers, &mut entries);
if propagated > 0 {
info!(
"Propagated document-level fraud to {propagated} derived journal entries"
);
}
}
}
}
let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
{
use datasynth_core::fraud_bias::{
apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
};
use rand_chacha::rand_core::SeedableRng;
let cfg = FraudBehavioralBiasConfig::default();
if cfg.enabled {
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
let mut swept = 0usize;
for entry in entries.iter_mut() {
if entry.header.is_fraud && !entry.header.is_anomaly {
apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
swept += 1;
}
}
if swept > 0 {
info!(
"Applied behavioral biases to {swept} non-anomaly fraud entries \
(doc-propagated + je_generator intrinsic fraud)"
);
}
}
}
self.emit_phase_items(
"anomaly_injection",
"LabeledAnomaly",
&anomaly_labels.labels,
);
if self.config.fraud.propagate_to_document {
use std::collections::HashMap;
let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
for je in &entries {
if je.header.is_fraud {
if let Some(ref fraud_type) = je.header.fraud_type {
if let Some(ref reference) = je.header.reference {
fraud_map.insert(reference.clone(), *fraud_type);
if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
if !bare.is_empty() {
fraud_map.insert(bare.to_string(), *fraud_type);
}
}
}
fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
}
}
}
if !fraud_map.is_empty() {
let mut propagated = 0usize;
macro_rules! propagate_to {
($collection:expr) => {
for doc in &mut $collection {
if doc.header.propagate_fraud(&fraud_map) {
propagated += 1;
}
}
};
}
propagate_to!(document_flows.purchase_orders);
propagate_to!(document_flows.goods_receipts);
propagate_to!(document_flows.vendor_invoices);
propagate_to!(document_flows.payments);
propagate_to!(document_flows.sales_orders);
propagate_to!(document_flows.deliveries);
propagate_to!(document_flows.customer_invoices);
if propagated > 0 {
info!(
"Propagated fraud labels to {} document flow records",
propagated
);
}
}
}
let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
self.emit_phase_items("red_flags", "RedFlag", &red_flags);
let collusion_rings = self.phase_collusion_rings(&mut stats)?;
self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
let balance_validation = self.phase_balance_validation(&entries)?;
let subledger_reconciliation =
self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
let (data_quality_stats, quality_issues) =
self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
self.phase_period_close(&mut entries, &subledger, &mut stats)?;
{
let tolerance = rust_decimal::Decimal::new(1, 2);
let mut unbalanced_clean = 0usize;
for je in &entries {
if je.header.is_fraud || je.header.is_anomaly {
continue;
}
let diff = (je.total_debit() - je.total_credit()).abs();
if diff > tolerance {
unbalanced_clean += 1;
if unbalanced_clean <= 3 {
warn!(
"Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
je.header.document_id,
je.total_debit(),
je.total_credit(),
diff
);
}
}
}
if unbalanced_clean > 0 {
return Err(datasynth_core::error::SynthError::generation(format!(
"{} non-anomaly JEs are unbalanced (debits != credits). \
First few logged above. Tolerance={}",
unbalanced_clean, tolerance
)));
}
debug!(
"Phase 10c: All {} non-anomaly JEs individually balanced",
entries
.iter()
.filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
.count()
);
let company_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
for company_code in &company_codes {
let mut assets = rust_decimal::Decimal::ZERO;
let mut liab_equity = rust_decimal::Decimal::ZERO;
for entry in &entries {
if entry.header.company_code != *company_code {
continue;
}
for line in &entry.lines {
let acct = &line.gl_account;
let net = line.debit_amount - line.credit_amount;
if acct.starts_with('1') {
assets += net;
}
else if acct.starts_with('2') || acct.starts_with('3') {
liab_equity -= net; }
}
}
let bs_diff = (assets - liab_equity).abs();
if bs_diff > tolerance {
warn!(
"Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
revenue/expense closing entries may not fully offset",
company_code, assets, liab_equity, bs_diff
);
} else {
debug!(
"Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
company_code, assets, liab_equity, bs_diff
);
}
}
info!("Phase 10c: All generation-time accounting assertions passed");
}
let audit = self.phase_audit_data(&entries, &mut stats)?;
let mut banking = self.phase_banking_data(&mut stats)?;
if self.phase_config.generate_banking
&& !document_flows.payments.is_empty()
&& !banking.accounts.is_empty()
{
let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
if bridge_rate > 0.0 {
let mut bridge =
datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
self.seed,
);
let (bridged_txns, bridge_stats) = bridge.bridge_payments(
&document_flows.payments,
&banking.customers,
&banking.accounts,
bridge_rate,
);
info!(
"Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
bridge_stats.bridged_count,
bridge_stats.transactions_emitted,
bridge_stats.fraud_propagated,
);
let bridged_count = bridged_txns.len();
banking.transactions.extend(bridged_txns);
if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
datasynth_banking::generators::velocity_computer::compute_velocity_features(
&mut banking.transactions,
);
}
banking.suspicious_count = banking
.transactions
.iter()
.filter(|t| t.is_suspicious)
.count();
stats.banking_transaction_count = banking.transactions.len();
stats.banking_suspicious_count = banking.suspicious_count;
}
}
let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
self.phase_llm_enrichment(&mut stats);
self.phase_diffusion_enhancement(&mut stats);
self.phase_causal_overlay(&mut stats);
let mut financial_reporting = self.phase_financial_reporting(
&document_flows,
&entries,
&coa,
&hr,
&audit,
&mut stats,
)?;
{
use datasynth_core::models::StatementType;
for stmt in &financial_reporting.consolidated_statements {
if stmt.statement_type == StatementType::BalanceSheet {
let total_assets: rust_decimal::Decimal = stmt
.line_items
.iter()
.filter(|li| li.section.to_uppercase().contains("ASSET"))
.map(|li| li.amount)
.sum();
let total_le: rust_decimal::Decimal = stmt
.line_items
.iter()
.filter(|li| !li.section.to_uppercase().contains("ASSET"))
.map(|li| li.amount)
.sum();
if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
warn!(
"BS equation imbalance: assets={}, L+E={}",
total_assets, total_le
);
}
}
}
}
let accounting_standards =
self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
if !accounting_standards.ecl_journal_entries.is_empty() {
debug!(
"Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
accounting_standards.ecl_journal_entries.len()
);
entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
}
if !accounting_standards.provision_journal_entries.is_empty() {
debug!(
"Generated {} JEs from provisions (IAS 37 / ASC 450)",
accounting_standards.provision_journal_entries.len()
);
entries.extend(
accounting_standards
.provision_journal_entries
.iter()
.cloned(),
);
}
let mut ocpm = self.phase_ocpm_events(
&document_flows,
&sourcing,
&hr,
&manufacturing_snap,
&banking,
&audit,
&financial_reporting,
&mut stats,
)?;
if let Some(ref event_log) = ocpm.event_log {
self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
}
if let Some(ref event_log) = ocpm.event_log {
let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
std::collections::HashMap::new();
for (idx, event) in event_log.events.iter().enumerate() {
if let Some(ref doc_ref) = event.document_ref {
doc_index.entry(doc_ref.as_str()).or_default().push(idx);
}
}
if !doc_index.is_empty() {
let mut annotated = 0usize;
for entry in &mut entries {
let doc_id_str = entry.header.document_id.to_string();
let mut matched_indices: Vec<usize> = Vec::new();
if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
matched_indices.extend(indices);
}
if let Some(ref reference) = entry.header.reference {
let bare_ref = reference
.find(':')
.map(|i| &reference[i + 1..])
.unwrap_or(reference.as_str());
if let Some(indices) = doc_index.get(bare_ref) {
for &idx in indices {
if !matched_indices.contains(&idx) {
matched_indices.push(idx);
}
}
}
}
if !matched_indices.is_empty() {
for &idx in &matched_indices {
let event = &event_log.events[idx];
if !entry.header.ocpm_event_ids.contains(&event.event_id) {
entry.header.ocpm_event_ids.push(event.event_id);
}
for obj_ref in &event.object_refs {
if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
entry.header.ocpm_object_ids.push(obj_ref.object_id);
}
}
if entry.header.ocpm_case_id.is_none() {
entry.header.ocpm_case_id = event.case_id;
}
}
annotated += 1;
}
}
debug!(
"Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
annotated
);
}
}
if let Some(ref mut event_log) = ocpm.event_log {
let synthesized =
datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
if synthesized > 0 {
info!(
"Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
);
}
let anomaly_events =
datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
if anomaly_events > 0 {
info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
}
let p2p_cfg = &self.config.ocpm.p2p_process;
let any_imperfection = p2p_cfg.rework_probability > 0.0
|| p2p_cfg.skip_step_probability > 0.0
|| p2p_cfg.out_of_order_probability > 0.0;
if any_imperfection {
use rand_chacha::rand_core::SeedableRng;
let imp_cfg = datasynth_ocpm::ImperfectionConfig {
rework_rate: p2p_cfg.rework_probability,
skip_rate: p2p_cfg.skip_step_probability,
out_of_order_rate: p2p_cfg.out_of_order_probability,
};
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
let stats =
datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
if stats.rework + stats.skipped + stats.out_of_order > 0 {
info!(
"Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
stats.rework, stats.skipped, stats.out_of_order
);
}
}
}
let sales_kpi_budgets =
self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
let treasury =
self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
if !treasury.journal_entries.is_empty() {
debug!(
"Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
treasury.journal_entries.len()
);
entries.extend(treasury.journal_entries.iter().cloned());
}
let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
if !tax.tax_posting_journal_entries.is_empty() {
debug!(
"Merging {} tax posting JEs into GL",
tax.tax_posting_journal_entries.len()
);
entries.extend(tax.tax_posting_journal_entries.iter().cloned());
}
{
use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
let framework_str = {
use datasynth_config::schema::AccountingFrameworkConfig;
match self
.config
.accounting_standards
.framework
.unwrap_or_default()
{
AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
"IFRS"
}
_ => "US_GAAP",
}
};
let depreciation_total: rust_decimal::Decimal = entries
.iter()
.filter(|je| je.header.document_type == "CL")
.flat_map(|je| je.lines.iter())
.filter(|l| l.gl_account.starts_with("6000"))
.map(|l| l.debit_amount)
.fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
let interest_paid: rust_decimal::Decimal = entries
.iter()
.flat_map(|je| je.lines.iter())
.filter(|l| l.gl_account.starts_with("7100"))
.map(|l| l.debit_amount)
.fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
let tax_paid: rust_decimal::Decimal = entries
.iter()
.flat_map(|je| je.lines.iter())
.filter(|l| l.gl_account.starts_with("8000"))
.map(|l| l.debit_amount)
.fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
let capex: rust_decimal::Decimal = entries
.iter()
.flat_map(|je| je.lines.iter())
.filter(|l| l.gl_account.starts_with("1500"))
.map(|l| l.debit_amount)
.fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
let dividends_paid: rust_decimal::Decimal = entries
.iter()
.flat_map(|je| je.lines.iter())
.filter(|l| l.gl_account == "2170")
.map(|l| l.debit_amount)
.fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
let cf_data = CashFlowSourceData {
depreciation_total,
provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
delta_ap: rust_decimal::Decimal::ZERO,
delta_inventory: rust_decimal::Decimal::ZERO,
capex,
debt_issuance: rust_decimal::Decimal::ZERO,
debt_repayment: rust_decimal::Decimal::ZERO,
interest_paid,
tax_paid,
dividends_paid,
framework: framework_str.to_string(),
};
let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
if !enhanced_cf_items.is_empty() {
use datasynth_core::models::StatementType;
let merge_count = enhanced_cf_items.len();
for stmt in financial_reporting
.financial_statements
.iter_mut()
.chain(financial_reporting.consolidated_statements.iter_mut())
.chain(
financial_reporting
.standalone_statements
.values_mut()
.flat_map(|v| v.iter_mut()),
)
{
if stmt.statement_type == StatementType::CashFlowStatement {
stmt.cash_flow_items.extend(enhanced_cf_items.clone());
}
}
info!(
"Enhanced cash flow: {} supplementary items merged into CF statements",
merge_count
);
}
}
self.generate_notes_to_financial_statements(
&mut financial_reporting,
&accounting_standards,
&tax,
&hr,
&audit,
&treasury,
);
if self.config.companies.len() >= 2 && !entries.is_empty() {
let companies: Vec<(String, String)> = self
.config
.companies
.iter()
.map(|c| (c.code.clone(), c.name.clone()))
.collect();
let ic_elim: rust_decimal::Decimal =
intercompany.matched_pairs.iter().map(|p| p.amount).sum();
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.unwrap_or(NaiveDate::MIN);
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let period_label = format!(
"{}-{:02}",
end_date.year(),
(end_date - chrono::Days::new(1)).month()
);
let mut seg_gen = SegmentGenerator::new(self.seed + 31);
let (je_segments, je_recon) =
seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
if !je_segments.is_empty() {
info!(
"Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
je_segments.len(),
ic_elim,
);
if financial_reporting.segment_reports.is_empty() {
financial_reporting.segment_reports = je_segments;
financial_reporting.segment_reconciliations = vec![je_recon];
} else {
financial_reporting.segment_reports.extend(je_segments);
financial_reporting.segment_reconciliations.push(je_recon);
}
}
}
let esg_snap =
self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
let disruption_events = self.phase_disruption_events(&mut stats)?;
let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
let (entity_relationship_graph, cross_process_links) =
self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
let industry_output = self.phase_industry_data(&mut stats);
let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
if self.config.diffusion.enabled
&& (self.config.diffusion.backend == "neural"
|| self.config.diffusion.backend == "hybrid")
{
let neural = &self.config.diffusion.neural;
const VALID_STRATEGIES: &[&str] = &["weighted_average", "column_select", "threshold"];
if !VALID_STRATEGIES.contains(&neural.hybrid_strategy.as_str()) {
warn!(
"Unknown diffusion.neural.hybrid_strategy='{}' — expected one of {:?}; \
falling back to 'weighted_average'.",
neural.hybrid_strategy, VALID_STRATEGIES
);
}
let weight = neural.hybrid_weight.clamp(0.0, 1.0);
if (weight - neural.hybrid_weight).abs() > f64::EPSILON {
warn!(
"diffusion.neural.hybrid_weight={} clamped to [0,1] → {}",
neural.hybrid_weight, weight
);
}
info!(
"Phase neural enhancement: backend={} strategy={} weight={:.2} columns={} \
(neural_columns: {:?})",
self.config.diffusion.backend,
neural.hybrid_strategy,
weight,
neural.neural_columns.len(),
neural.neural_columns,
);
stats.neural_hybrid_weight = Some(weight);
stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
stats.neural_routed_column_count = Some(neural.neural_columns.len());
}
self.phase_hypergraph_export(
&coa,
&entries,
&document_flows,
&sourcing,
&hr,
&manufacturing_snap,
&banking,
&audit,
&financial_reporting,
&ocpm,
&compliance_regulations,
&mut stats,
)?;
if self.phase_config.generate_graph_export {
self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
}
if self.config.streaming.enabled {
info!("Note: streaming config is enabled but batch mode does not use it");
}
if self.config.vendor_network.enabled {
debug!("Vendor network config available; relationship graph generation is partial");
}
if self.config.customer_segmentation.enabled {
debug!("Customer segmentation config available; segment-aware generation is partial");
}
let resource_stats = self.resource_guard.stats();
info!(
"Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
resource_stats.memory.peak_resident_bytes / (1024 * 1024),
resource_stats.disk.estimated_bytes_written,
resource_stats.degradation_level
);
if let Some(ref sink) = self.phase_sink {
if let Err(e) = sink.flush() {
warn!("Stream sink flush failed: {e}");
}
}
let lineage = self.build_lineage_graph();
let gate_result = if self.config.quality_gates.enabled {
let profile_name = &self.config.quality_gates.profile;
match datasynth_eval::gates::get_profile(profile_name) {
Some(profile) => {
let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
if balance_validation.validated {
eval.coherence.balance =
Some(datasynth_eval::coherence::BalanceSheetEvaluation {
equation_balanced: balance_validation.is_balanced,
max_imbalance: (balance_validation.total_debits
- balance_validation.total_credits)
.abs(),
periods_evaluated: 1,
periods_imbalanced: if balance_validation.is_balanced {
0
} else {
1
},
period_results: Vec::new(),
companies_evaluated: self.config.companies.len(),
});
}
eval.coherence.passes = balance_validation.is_balanced;
if !balance_validation.is_balanced {
eval.coherence
.failures
.push("Balance sheet equation not satisfied".to_string());
}
eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
eval.statistical.passes = !entries.is_empty();
eval.quality.overall_score = 0.9; eval.quality.passes = true;
let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
info!(
"Quality gates evaluated (profile '{}'): {}/{} passed — {}",
profile_name, result.gates_passed, result.gates_total, result.summary
);
Some(result)
}
None => {
warn!(
"Quality gates enabled but profile '{}' not found; skipping gate evaluation",
profile_name
);
None
}
}
} else {
None
};
let internal_controls = if self.config.internal_controls.enabled {
InternalControl::standard_controls()
} else {
Vec::new()
};
Ok(EnhancedGenerationResult {
chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
master_data: std::mem::take(&mut self.master_data),
document_flows,
subledger,
ocpm,
audit,
banking,
graph_export,
sourcing,
financial_reporting,
hr,
accounting_standards,
manufacturing: manufacturing_snap,
sales_kpi_budgets,
tax,
esg: esg_snap,
treasury,
project_accounting,
process_evolution,
organizational_events,
disruption_events,
intercompany,
journal_entries: entries,
anomaly_labels,
balance_validation,
data_quality_stats,
quality_issues,
statistics: stats,
lineage: Some(lineage),
gate_result,
internal_controls,
sod_violations,
opening_balances,
subledger_reconciliation,
counterfactual_pairs,
red_flags,
collusion_rings,
temporal_vendor_chains,
entity_relationship_graph,
cross_process_links,
industry_output,
compliance_regulations,
})
}
fn phase_chart_of_accounts(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<Arc<ChartOfAccounts>> {
info!("Phase 1: Generating Chart of Accounts");
let coa = self.generate_coa()?;
stats.accounts_count = coa.account_count();
info!(
"Chart of Accounts generated: {} accounts",
stats.accounts_count
);
self.check_resources_with_log("post-coa")?;
Ok(coa)
}
fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
if self.phase_config.generate_master_data {
info!("Phase 2: Generating Master Data");
self.generate_master_data()?;
stats.vendor_count = self.master_data.vendors.len();
stats.customer_count = self.master_data.customers.len();
stats.material_count = self.master_data.materials.len();
stats.asset_count = self.master_data.assets.len();
stats.employee_count = self.master_data.employees.len();
info!(
"Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
stats.vendor_count, stats.customer_count, stats.material_count,
stats.asset_count, stats.employee_count
);
self.check_resources_with_log("post-master-data")?;
} else {
debug!("Phase 2: Skipped (master data generation disabled)");
}
Ok(())
}
fn phase_document_flows(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
let mut document_flows = DocumentFlowSnapshot::default();
let mut subledger = SubledgerSnapshot::default();
let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
info!("Phase 3: Generating Document Flows");
self.generate_document_flows(&mut document_flows)?;
stats.p2p_chain_count = document_flows.p2p_chains.len();
stats.o2c_chain_count = document_flows.o2c_chains.len();
info!(
"Document flows generated: {} P2P chains, {} O2C chains",
stats.p2p_chain_count, stats.o2c_chain_count
);
debug!("Phase 3b: Linking document flows to subledgers");
subledger = self.link_document_flows_to_subledgers(&document_flows)?;
stats.ap_invoice_count = subledger.ap_invoices.len();
stats.ar_invoice_count = subledger.ar_invoices.len();
debug!(
"Subledgers linked: {} AP invoices, {} AR invoices",
stats.ap_invoice_count, stats.ar_invoice_count
);
debug!("Phase 3b-settle: Applying payment settlements to subledgers");
apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
debug!("Payment settlements applied to AP and AR subledgers");
if let Ok(start_date) =
NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
{
let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
- chrono::Days::new(1);
debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
for company in &self.config.companies {
let ar_report = ARAgingReport::from_invoices(
company.code.clone(),
&subledger.ar_invoices,
as_of_date,
);
subledger.ar_aging_reports.push(ar_report);
let ap_report = APAgingReport::from_invoices(
company.code.clone(),
&subledger.ap_invoices,
as_of_date,
);
subledger.ap_aging_reports.push(ap_report);
}
debug!(
"AR/AP aging reports built: {} AR, {} AP",
subledger.ar_aging_reports.len(),
subledger.ap_aging_reports.len()
);
debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
{
use datasynth_generators::DunningGenerator;
let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
for company in &self.config.companies {
let currency = company.currency.as_str();
let mut company_invoices: Vec<
datasynth_core::models::subledger::ar::ARInvoice,
> = subledger
.ar_invoices
.iter()
.filter(|inv| inv.company_code == company.code)
.cloned()
.collect();
if company_invoices.is_empty() {
continue;
}
let result = dunning_gen.execute_dunning_run(
&company.code,
as_of_date,
&mut company_invoices,
currency,
);
for updated in &company_invoices {
if let Some(orig) = subledger
.ar_invoices
.iter_mut()
.find(|i| i.invoice_number == updated.invoice_number)
{
orig.dunning_info = updated.dunning_info.clone();
}
}
subledger.dunning_runs.push(result.dunning_run);
subledger.dunning_letters.extend(result.letters);
dunning_journal_entries.extend(result.journal_entries);
}
debug!(
"Dunning runs complete: {} runs, {} letters",
subledger.dunning_runs.len(),
subledger.dunning_letters.len()
);
}
}
self.check_resources_with_log("post-document-flows")?;
} else {
debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
}
let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
if !self.master_data.assets.is_empty() {
debug!("Generating FA subledger records");
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let mut fa_gen = datasynth_generators::FAGenerator::new(
datasynth_generators::FAGeneratorConfig::default(),
rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
);
for asset in &self.master_data.assets {
let (record, je) = fa_gen.generate_asset_acquisition(
company_code,
&format!("{:?}", asset.asset_class),
&asset.description,
asset.acquisition_date,
currency,
asset.cost_center.as_deref(),
);
subledger.fa_records.push(record);
fa_journal_entries.push(je);
}
stats.fa_subledger_count = subledger.fa_records.len();
debug!(
"FA subledger records generated: {} (with {} acquisition JEs)",
stats.fa_subledger_count,
fa_journal_entries.len()
);
}
if !self.master_data.materials.is_empty() {
debug!("Generating Inventory subledger records");
let first_company = self.config.companies.first();
let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
let inv_currency = first_company
.map(|c| c.currency.clone())
.unwrap_or_else(|| "USD".to_string());
let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
datasynth_generators::InventoryGeneratorConfig::default(),
rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
inv_currency.clone(),
);
for (i, material) in self.master_data.materials.iter().enumerate() {
let plant = format!("PLANT{:02}", (i % 3) + 1);
let storage_loc = format!("SL-{:03}", (i % 10) + 1);
let initial_qty = rust_decimal::Decimal::from(
material
.safety_stock
.to_string()
.parse::<i64>()
.unwrap_or(100),
);
let position = inv_gen.generate_position(
company_code,
&plant,
&storage_loc,
&material.material_id,
&material.description,
initial_qty,
Some(material.standard_cost),
&inv_currency,
);
subledger.inventory_positions.push(position);
}
stats.inventory_subledger_count = subledger.inventory_positions.len();
debug!(
"Inventory subledger records generated: {}",
stats.inventory_subledger_count
);
}
if !subledger.fa_records.is_empty() {
if let Ok(start_date) =
NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
{
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let fiscal_year = start_date.year();
let start_period = start_date.month();
let end_period =
(start_period + self.config.global.period_months.saturating_sub(1)).min(12);
let depr_cfg = FaDepreciationScheduleConfig {
fiscal_year,
start_period,
end_period,
seed_offset: 800,
};
let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
let runs = depr_gen.generate(company_code, &subledger.fa_records);
let run_count = runs.len();
subledger.depreciation_runs = runs;
debug!(
"Depreciation runs generated: {} runs for {} periods",
run_count, self.config.global.period_months
);
}
}
if !subledger.inventory_positions.is_empty() {
if let Ok(start_date) =
NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
{
let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
- chrono::Days::new(1);
let inv_val_cfg = InventoryValuationGeneratorConfig::default();
let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
for company in &self.config.companies {
let result = inv_val_gen.generate(
&company.code,
&subledger.inventory_positions,
as_of_date,
);
subledger.inventory_valuations.push(result);
}
debug!(
"Inventory valuations generated: {} company reports",
subledger.inventory_valuations.len()
);
}
}
Ok((document_flows, subledger, fa_journal_entries))
}
#[allow(clippy::too_many_arguments)]
fn phase_ocpm_events(
&mut self,
document_flows: &DocumentFlowSnapshot,
sourcing: &SourcingSnapshot,
hr: &HrSnapshot,
manufacturing: &ManufacturingSnapshot,
banking: &BankingSnapshot,
audit: &AuditSnapshot,
financial_reporting: &FinancialReportingSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<OcpmSnapshot> {
let degradation = self.check_resources()?;
if degradation >= DegradationLevel::Reduced {
debug!(
"Phase skipped due to resource pressure (degradation: {:?})",
degradation
);
return Ok(OcpmSnapshot::default());
}
if self.phase_config.generate_ocpm_events {
info!("Phase 3c: Generating OCPM Events");
let ocpm_snapshot = self.generate_ocpm_events(
document_flows,
sourcing,
hr,
manufacturing,
banking,
audit,
financial_reporting,
)?;
stats.ocpm_event_count = ocpm_snapshot.event_count;
stats.ocpm_object_count = ocpm_snapshot.object_count;
stats.ocpm_case_count = ocpm_snapshot.case_count;
info!(
"OCPM events generated: {} events, {} objects, {} cases",
stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
);
self.check_resources_with_log("post-ocpm")?;
Ok(ocpm_snapshot)
} else {
debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
Ok(OcpmSnapshot::default())
}
}
fn phase_journal_entries(
&mut self,
coa: &Arc<ChartOfAccounts>,
document_flows: &DocumentFlowSnapshot,
_stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<Vec<JournalEntry>> {
let mut entries = Vec::new();
if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
debug!("Phase 4a: Generating JEs from document flows");
let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
debug!("Generated {} JEs from document flows", flow_entries.len());
entries.extend(flow_entries);
}
if self.phase_config.generate_journal_entries {
info!("Phase 4: Generating Journal Entries");
let je_entries = self.generate_journal_entries(coa)?;
info!("Generated {} standalone journal entries", je_entries.len());
entries.extend(je_entries);
} else {
debug!("Phase 4: Skipped (journal entry generation disabled)");
}
if !entries.is_empty() {
self.check_resources_with_log("post-journal-entries")?;
}
Ok(entries)
}
fn phase_anomaly_injection(
&mut self,
entries: &mut [JournalEntry],
actions: &DegradationActions,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<AnomalyLabels> {
if self.phase_config.inject_anomalies
&& !entries.is_empty()
&& !actions.skip_anomaly_injection
{
info!("Phase 5: Injecting Anomalies");
let result = self.inject_anomalies(entries)?;
stats.anomalies_injected = result.labels.len();
info!("Injected {} anomalies", stats.anomalies_injected);
self.check_resources_with_log("post-anomaly-injection")?;
Ok(result)
} else if actions.skip_anomaly_injection {
warn!("Phase 5: Skipped due to resource degradation");
Ok(AnomalyLabels::default())
} else {
debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
Ok(AnomalyLabels::default())
}
}
fn phase_balance_validation(
&mut self,
entries: &[JournalEntry],
) -> SynthResult<BalanceValidationResult> {
if self.phase_config.validate_balances && !entries.is_empty() {
debug!("Phase 6: Validating Balances");
let balance_validation = self.validate_journal_entries(entries)?;
if balance_validation.is_balanced {
debug!("Balance validation passed");
} else {
warn!(
"Balance validation found {} errors",
balance_validation.validation_errors.len()
);
}
Ok(balance_validation)
} else {
Ok(BalanceValidationResult::default())
}
}
fn phase_data_quality_injection(
&mut self,
entries: &mut [JournalEntry],
actions: &DegradationActions,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
if self.phase_config.inject_data_quality
&& !entries.is_empty()
&& !actions.skip_data_quality
{
info!("Phase 7: Injecting Data Quality Variations");
let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
stats.data_quality_issues = dq_stats.records_with_issues;
info!("Injected {} data quality issues", stats.data_quality_issues);
self.check_resources_with_log("post-data-quality")?;
Ok((dq_stats, quality_issues))
} else if actions.skip_data_quality {
warn!("Phase 7: Skipped due to resource degradation");
Ok((DataQualityStats::default(), Vec::new()))
} else {
debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
Ok((DataQualityStats::default(), Vec::new()))
}
}
fn phase_period_close(
&mut self,
entries: &mut Vec<JournalEntry>,
subledger: &SubledgerSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<()> {
if !self.phase_config.generate_period_close || entries.is_empty() {
debug!("Phase 10b: Skipped (period close disabled or no entries)");
return Ok(());
}
info!("Phase 10b: Generating period-close journal entries");
use datasynth_core::accounts::{
control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
};
use rust_decimal::Decimal;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let close_date = end_date - chrono::Days::new(1);
let tax_rate = Decimal::new(21, 2);
let company_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
let period_months = self.config.global.period_months;
for asset in &subledger.fa_records {
use datasynth_core::models::subledger::fa::AssetStatus;
if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
continue;
}
let useful_life_months = asset.useful_life_months();
if useful_life_months == 0 {
continue;
}
let salvage_value = asset.salvage_value();
let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
if depreciable_base == Decimal::ZERO {
continue;
}
let period_depr = (depreciable_base / Decimal::from(useful_life_months)
* Decimal::from(period_months))
.round_dp(2);
if period_depr <= Decimal::ZERO {
continue;
}
let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
depr_header.document_type = "CL".to_string();
depr_header.header_text = Some(format!(
"Depreciation - {} {}",
asset.asset_number, asset.description
));
depr_header.created_by = "CLOSE_ENGINE".to_string();
depr_header.source = TransactionSource::Automated;
depr_header.business_process = Some(BusinessProcess::R2R);
let doc_id = depr_header.document_id;
let mut depr_je = JournalEntry::new(depr_header);
depr_je.add_line(JournalEntryLine::debit(
doc_id,
1,
expense_accounts::DEPRECIATION.to_string(),
period_depr,
));
depr_je.add_line(JournalEntryLine::credit(
doc_id,
2,
control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
period_depr,
));
debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
close_jes.push(depr_je);
}
if !subledger.fa_records.is_empty() {
debug!(
"Generated {} depreciation JEs from {} FA records",
close_jes.len(),
subledger.fa_records.len()
);
}
{
use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
let accrual_items: &[(&str, &str, &str)] = &[
("Accrued Utilities", "6200", "2100"),
("Accrued Rent", "6300", "2100"),
("Accrued Interest", "6100", "2150"),
];
for company_code in &company_codes {
let company_revenue: Decimal = entries
.iter()
.filter(|e| e.header.company_code == *company_code)
.flat_map(|e| e.lines.iter())
.filter(|l| l.gl_account.starts_with('4'))
.map(|l| l.credit_amount - l.debit_amount)
.fold(Decimal::ZERO, |acc, v| acc + v);
if company_revenue <= Decimal::ZERO {
continue;
}
let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
if accrual_base <= Decimal::ZERO {
continue;
}
for (description, expense_acct, liability_acct) in accrual_items {
let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
company_code,
description,
accrual_base,
expense_acct,
liability_acct,
close_date,
None,
);
close_jes.push(accrual_je);
if let Some(rev_je) = reversal_je {
close_jes.push(rev_je);
}
}
}
debug!(
"Generated accrual entries for {} companies",
company_codes.len()
);
}
for company_code in &company_codes {
let mut total_revenue = Decimal::ZERO;
let mut total_expenses = Decimal::ZERO;
for entry in entries.iter() {
if entry.header.company_code != *company_code {
continue;
}
for line in &entry.lines {
let category = AccountCategory::from_account(&line.gl_account);
match category {
AccountCategory::Revenue => {
total_revenue += line.credit_amount - line.debit_amount;
}
AccountCategory::Cogs
| AccountCategory::OperatingExpense
| AccountCategory::OtherIncomeExpense
| AccountCategory::Tax => {
total_expenses += line.debit_amount - line.credit_amount;
}
_ => {}
}
}
}
let pre_tax_income = total_revenue - total_expenses;
if pre_tax_income == Decimal::ZERO {
debug!(
"Company {}: no pre-tax income, skipping period close",
company_code
);
continue;
}
if pre_tax_income > Decimal::ZERO {
let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
tax_header.document_type = "CL".to_string();
tax_header.header_text = Some(format!("Tax provision - {}", company_code));
tax_header.created_by = "CLOSE_ENGINE".to_string();
tax_header.source = TransactionSource::Automated;
tax_header.business_process = Some(BusinessProcess::R2R);
let doc_id = tax_header.document_id;
let mut tax_je = JournalEntry::new(tax_header);
tax_je.add_line(JournalEntryLine::debit(
doc_id,
1,
tax_accounts::TAX_EXPENSE.to_string(),
tax_amount,
));
tax_je.add_line(JournalEntryLine::credit(
doc_id,
2,
tax_accounts::INCOME_TAX_PAYABLE.to_string(),
tax_amount,
));
debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
close_jes.push(tax_je);
} else {
let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
if dta_amount > Decimal::ZERO {
let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
dta_header.document_type = "CL".to_string();
dta_header.header_text =
Some(format!("Deferred tax asset (DTA) - {}", company_code));
dta_header.created_by = "CLOSE_ENGINE".to_string();
dta_header.source = TransactionSource::Automated;
dta_header.business_process = Some(BusinessProcess::R2R);
let doc_id = dta_header.document_id;
let mut dta_je = JournalEntry::new(dta_header);
dta_je.add_line(JournalEntryLine::debit(
doc_id,
1,
tax_accounts::DEFERRED_TAX_ASSET.to_string(),
dta_amount,
));
dta_je.add_line(JournalEntryLine::credit(
doc_id,
2,
tax_accounts::TAX_EXPENSE.to_string(),
dta_amount,
));
debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
close_jes.push(dta_je);
debug!(
"Company {}: loss year — recognised DTA of {}",
company_code, dta_amount
);
}
}
let tax_provision = if pre_tax_income > Decimal::ZERO {
(pre_tax_income * tax_rate).round_dp(2)
} else {
Decimal::ZERO
};
let net_income = pre_tax_income - tax_provision;
if net_income > Decimal::ZERO {
use datasynth_generators::DividendGenerator;
let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
let currency_str = self
.config
.companies
.iter()
.find(|c| c.code == *company_code)
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let div_result = div_gen.generate(
company_code,
close_date,
Decimal::new(1, 0), dividend_amount,
currency_str,
);
let div_je_count = div_result.journal_entries.len();
close_jes.extend(div_result.journal_entries);
debug!(
"Company {}: declared dividend of {} ({} JEs)",
company_code, dividend_amount, div_je_count
);
}
if net_income != Decimal::ZERO {
let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
close_header.document_type = "CL".to_string();
close_header.header_text =
Some(format!("Income statement close - {}", company_code));
close_header.created_by = "CLOSE_ENGINE".to_string();
close_header.source = TransactionSource::Automated;
close_header.business_process = Some(BusinessProcess::R2R);
let doc_id = close_header.document_id;
let mut close_je = JournalEntry::new(close_header);
let abs_net_income = net_income.abs();
if net_income > Decimal::ZERO {
close_je.add_line(JournalEntryLine::debit(
doc_id,
1,
equity_accounts::INCOME_SUMMARY.to_string(),
abs_net_income,
));
close_je.add_line(JournalEntryLine::credit(
doc_id,
2,
equity_accounts::RETAINED_EARNINGS.to_string(),
abs_net_income,
));
} else {
close_je.add_line(JournalEntryLine::debit(
doc_id,
1,
equity_accounts::RETAINED_EARNINGS.to_string(),
abs_net_income,
));
close_je.add_line(JournalEntryLine::credit(
doc_id,
2,
equity_accounts::INCOME_SUMMARY.to_string(),
abs_net_income,
));
}
debug_assert!(
close_je.is_balanced(),
"Income statement closing JE must be balanced"
);
close_jes.push(close_je);
}
}
let close_count = close_jes.len();
if close_count > 0 {
info!("Generated {} period-close journal entries", close_count);
self.emit_phase_items("period_close", "JournalEntry", &close_jes);
entries.extend(close_jes);
stats.period_close_je_count = close_count;
stats.total_entries = entries.len() as u64;
stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
} else {
debug!("No period-close entries generated (no income statement activity)");
}
Ok(())
}
fn phase_audit_data(
&mut self,
entries: &[JournalEntry],
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<AuditSnapshot> {
if self.phase_config.generate_audit {
info!("Phase 8: Generating Audit Data");
let audit_snapshot = self.generate_audit_data(entries)?;
stats.audit_engagement_count = audit_snapshot.engagements.len();
stats.audit_workpaper_count = audit_snapshot.workpapers.len();
stats.audit_evidence_count = audit_snapshot.evidence.len();
stats.audit_risk_count = audit_snapshot.risk_assessments.len();
stats.audit_finding_count = audit_snapshot.findings.len();
stats.audit_judgment_count = audit_snapshot.judgments.len();
stats.audit_confirmation_count = audit_snapshot.confirmations.len();
stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
stats.audit_sample_count = audit_snapshot.samples.len();
stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
stats.audit_related_party_count = audit_snapshot.related_parties.len();
stats.audit_related_party_transaction_count =
audit_snapshot.related_party_transactions.len();
info!(
"Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
{} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
{} analytical results, {} IA functions, {} IA reports, {} related parties, \
{} RP transactions",
stats.audit_engagement_count,
stats.audit_workpaper_count,
stats.audit_evidence_count,
stats.audit_risk_count,
stats.audit_finding_count,
stats.audit_judgment_count,
stats.audit_confirmation_count,
stats.audit_procedure_step_count,
stats.audit_sample_count,
stats.audit_analytical_result_count,
stats.audit_ia_function_count,
stats.audit_ia_report_count,
stats.audit_related_party_count,
stats.audit_related_party_transaction_count,
);
self.check_resources_with_log("post-audit")?;
Ok(audit_snapshot)
} else {
debug!("Phase 8: Skipped (audit generation disabled)");
Ok(AuditSnapshot::default())
}
}
fn phase_banking_data(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<BankingSnapshot> {
if self.phase_config.generate_banking {
info!("Phase 9: Generating Banking KYC/AML Data");
let banking_snapshot = self.generate_banking_data()?;
stats.banking_customer_count = banking_snapshot.customers.len();
stats.banking_account_count = banking_snapshot.accounts.len();
stats.banking_transaction_count = banking_snapshot.transactions.len();
stats.banking_suspicious_count = banking_snapshot.suspicious_count;
info!(
"Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
stats.banking_customer_count, stats.banking_account_count,
stats.banking_transaction_count, stats.banking_suspicious_count
);
self.check_resources_with_log("post-banking")?;
Ok(banking_snapshot)
} else {
debug!("Phase 9: Skipped (banking generation disabled)");
Ok(BankingSnapshot::default())
}
}
fn phase_graph_export(
&mut self,
entries: &[JournalEntry],
coa: &Arc<ChartOfAccounts>,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<GraphExportSnapshot> {
if self.phase_config.generate_graph_export && !entries.is_empty() {
info!("Phase 10: Exporting Accounting Network Graphs");
match self.export_graphs(entries, coa, stats) {
Ok(snapshot) => {
info!(
"Graph export complete: {} graphs ({} nodes, {} edges)",
snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
);
Ok(snapshot)
}
Err(e) => {
warn!("Phase 10: Graph export failed: {}", e);
Ok(GraphExportSnapshot::default())
}
}
} else {
debug!("Phase 10: Skipped (graph export disabled or no entries)");
Ok(GraphExportSnapshot::default())
}
}
#[allow(clippy::too_many_arguments)]
fn phase_hypergraph_export(
&self,
coa: &Arc<ChartOfAccounts>,
entries: &[JournalEntry],
document_flows: &DocumentFlowSnapshot,
sourcing: &SourcingSnapshot,
hr: &HrSnapshot,
manufacturing: &ManufacturingSnapshot,
banking: &BankingSnapshot,
audit: &AuditSnapshot,
financial_reporting: &FinancialReportingSnapshot,
ocpm: &OcpmSnapshot,
compliance: &ComplianceRegulationsSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<()> {
if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
info!("Phase 19b: Exporting Multi-Layer Hypergraph");
match self.export_hypergraph(
coa,
entries,
document_flows,
sourcing,
hr,
manufacturing,
banking,
audit,
financial_reporting,
ocpm,
compliance,
stats,
) {
Ok(info) => {
info!(
"Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
info.node_count, info.edge_count, info.hyperedge_count
);
}
Err(e) => {
warn!("Phase 10b: Hypergraph export failed: {}", e);
}
}
} else {
debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
}
Ok(())
}
fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
if !self.config.llm.enabled {
debug!("Phase 11: Skipped (LLM enrichment disabled)");
return;
}
info!("Phase 11: Starting LLM Enrichment");
let start = std::time::Instant::now();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
let schema_provider = &self.config.llm.provider;
let api_key_env = match schema_provider.as_str() {
"openai" => Some("OPENAI_API_KEY"),
"anthropic" => Some("ANTHROPIC_API_KEY"),
"custom" => Some("LLM_API_KEY"),
_ => None,
};
if let Some(key_env) = api_key_env {
if std::env::var(key_env).is_ok() {
let llm_config = datasynth_core::llm::LlmConfig {
model: self.config.llm.model.clone(),
api_key_env: key_env.to_string(),
..datasynth_core::llm::LlmConfig::default()
};
match HttpLlmProvider::new(llm_config) {
Ok(p) => Arc::new(p),
Err(e) => {
warn!(
"Failed to create HttpLlmProvider: {}; falling back to mock",
e
);
Arc::new(MockLlmProvider::new(self.seed))
}
}
} else {
Arc::new(MockLlmProvider::new(self.seed))
}
} else {
Arc::new(MockLlmProvider::new(self.seed))
}
};
let enricher = VendorLlmEnricher::new(provider);
let industry = format!("{:?}", self.config.global.industry);
let max_enrichments = self
.config
.llm
.max_vendor_enrichments
.min(self.master_data.vendors.len());
let mut enriched_count = 0usize;
for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
Ok(name) => {
vendor.name = name;
enriched_count += 1;
}
Err(e) => {
warn!(
"LLM vendor enrichment failed for {}: {}",
vendor.vendor_id, e
);
}
}
}
enriched_count
}));
match result {
Ok(enriched_count) => {
stats.llm_vendors_enriched = enriched_count;
let elapsed = start.elapsed();
stats.llm_enrichment_ms = elapsed.as_millis() as u64;
info!(
"Phase 11 complete: {} vendors enriched in {}ms",
enriched_count, stats.llm_enrichment_ms
);
}
Err(_) => {
let elapsed = start.elapsed();
stats.llm_enrichment_ms = elapsed.as_millis() as u64;
warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
}
}
}
fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
if !self.config.diffusion.enabled {
debug!("Phase 12: Skipped (diffusion enhancement disabled)");
return;
}
info!("Phase 12: Starting Diffusion Enhancement");
let start = std::time::Instant::now();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
let diffusion_config = DiffusionConfig {
n_steps: self.config.diffusion.n_steps,
seed: self.seed,
..Default::default()
};
let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
let n_samples = self.config.diffusion.sample_size;
let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
samples.len()
}));
match result {
Ok(sample_count) => {
stats.diffusion_samples_generated = sample_count;
let elapsed = start.elapsed();
stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
info!(
"Phase 12 complete: {} diffusion samples generated in {}ms",
sample_count, stats.diffusion_enhancement_ms
);
}
Err(_) => {
let elapsed = start.elapsed();
stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
}
}
}
fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
if !self.config.causal.enabled {
debug!("Phase 13: Skipped (causal generation disabled)");
return;
}
info!("Phase 13: Starting Causal Overlay");
let start = std::time::Instant::now();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let graph = match self.config.causal.template.as_str() {
"revenue_cycle" => CausalGraph::revenue_cycle_template(),
_ => CausalGraph::fraud_detection_template(),
};
let scm = StructuralCausalModel::new(graph.clone())
.map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
let n_samples = self.config.causal.sample_size;
let samples = scm
.generate(n_samples, self.seed)
.map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
let validation_passed = if self.config.causal.validate {
let report = CausalValidator::validate_causal_structure(&samples, &graph);
if report.valid {
info!(
"Causal validation passed: all {} checks OK",
report.checks.len()
);
} else {
warn!(
"Causal validation: {} violations detected: {:?}",
report.violations.len(),
report.violations
);
}
Some(report.valid)
} else {
None
};
Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
}));
match result {
Ok(Ok((sample_count, validation_passed))) => {
stats.causal_samples_generated = sample_count;
stats.causal_validation_passed = validation_passed;
let elapsed = start.elapsed();
stats.causal_generation_ms = elapsed.as_millis() as u64;
info!(
"Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
sample_count, stats.causal_generation_ms, validation_passed,
);
}
Ok(Err(e)) => {
let elapsed = start.elapsed();
stats.causal_generation_ms = elapsed.as_millis() as u64;
warn!("Phase 13: Causal generation failed: {}", e);
}
Err(_) => {
let elapsed = start.elapsed();
stats.causal_generation_ms = elapsed.as_millis() as u64;
warn!("Phase 13: Causal generation failed (panic caught), continuing");
}
}
}
fn phase_sourcing_data(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<SourcingSnapshot> {
if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
debug!("Phase 14: Skipped (sourcing generation disabled)");
return Ok(SourcingSnapshot::default());
}
let degradation = self.check_resources()?;
if degradation >= DegradationLevel::Reduced {
debug!(
"Phase skipped due to resource pressure (degradation: {:?})",
degradation
);
return Ok(SourcingSnapshot::default());
}
info!("Phase 14: Generating S2C Sourcing Data");
let seed = self.seed;
let vendor_ids: Vec<String> = self
.master_data
.vendors
.iter()
.map(|v| v.vendor_id.clone())
.collect();
if vendor_ids.is_empty() {
debug!("Phase 14: Skipped (no vendors available)");
return Ok(SourcingSnapshot::default());
}
let categories: Vec<(String, String)> = vec![
("CAT-RAW".to_string(), "Raw Materials".to_string()),
("CAT-OFF".to_string(), "Office Supplies".to_string()),
("CAT-IT".to_string(), "IT Equipment".to_string()),
("CAT-SVC".to_string(), "Professional Services".to_string()),
("CAT-LOG".to_string(), "Logistics".to_string()),
];
let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
.iter()
.map(|(id, name)| {
(
id.clone(),
name.clone(),
rust_decimal::Decimal::from(100_000),
)
})
.collect();
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let fiscal_year = start_date.year() as u16;
let owner_ids: Vec<String> = self
.master_data
.employees
.iter()
.take(5)
.map(|e| e.employee_id.clone())
.collect();
let owner_id = owner_ids
.first()
.map(std::string::String::as_str)
.unwrap_or("BUYER-001");
let mut spend_gen = SpendAnalysisGenerator::new(seed);
let spend_analyses =
spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
let mut project_gen = SourcingProjectGenerator::new(seed + 1);
let sourcing_projects = if owner_ids.is_empty() {
Vec::new()
} else {
project_gen.generate(
company_code,
&categories_with_spend,
&owner_ids,
start_date,
self.config.global.period_months,
)
};
stats.sourcing_project_count = sourcing_projects.len();
let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
let mut qual_gen = QualificationGenerator::new(seed + 2);
let qualifications = qual_gen.generate(
company_code,
&qual_vendor_ids,
sourcing_projects.first().map(|p| p.project_id.as_str()),
owner_id,
start_date,
);
let mut rfx_gen = RfxGenerator::new(seed + 3);
let rfx_events: Vec<RfxEvent> = sourcing_projects
.iter()
.map(|proj| {
let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
rfx_gen.generate(
company_code,
&proj.project_id,
&proj.category_id,
&qualified_vids,
owner_id,
start_date,
50000.0,
)
})
.collect();
stats.rfx_event_count = rfx_events.len();
let mut bid_gen = BidGenerator::new(seed + 4);
let mut all_bids = Vec::new();
for rfx in &rfx_events {
let bidder_count = vendor_ids.len().clamp(2, 5);
let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
let bids = bid_gen.generate(rfx, &responding, start_date);
all_bids.extend(bids);
}
stats.bid_count = all_bids.len();
let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
let bid_evaluations: Vec<BidEvaluation> = rfx_events
.iter()
.map(|rfx| {
let rfx_bids: Vec<SupplierBid> = all_bids
.iter()
.filter(|b| b.rfx_id == rfx.rfx_id)
.cloned()
.collect();
eval_gen.evaluate(rfx, &rfx_bids, owner_id)
})
.collect();
let mut contract_gen = ContractGenerator::new(seed + 6);
let contracts: Vec<ProcurementContract> = bid_evaluations
.iter()
.zip(rfx_events.iter())
.filter_map(|(eval, rfx)| {
eval.ranked_bids.first().and_then(|winner| {
all_bids
.iter()
.find(|b| b.bid_id == winner.bid_id)
.map(|winning_bid| {
contract_gen.generate_from_bid(
winning_bid,
Some(&rfx.sourcing_project_id),
&rfx.category_id,
owner_id,
start_date,
)
})
})
})
.collect();
stats.contract_count = contracts.len();
let mut catalog_gen = CatalogGenerator::new(seed + 7);
let catalog_items = catalog_gen.generate(&contracts);
stats.catalog_item_count = catalog_items.len();
let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
.iter()
.fold(
std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
|mut acc, c| {
acc.entry(c.vendor_id.clone()).or_default().push(c);
acc
},
)
.into_iter()
.collect();
let scorecards = scorecard_gen.generate(
company_code,
&vendor_contracts,
start_date,
end_date,
owner_id,
);
stats.scorecard_count = scorecards.len();
let mut sourcing_projects = sourcing_projects;
for project in &mut sourcing_projects {
project.rfx_ids = rfx_events
.iter()
.filter(|rfx| rfx.sourcing_project_id == project.project_id)
.map(|rfx| rfx.rfx_id.clone())
.collect();
project.contract_id = contracts
.iter()
.find(|c| {
c.sourcing_project_id
.as_deref()
.is_some_and(|sp| sp == project.project_id)
})
.map(|c| c.contract_id.clone());
project.spend_analysis_id = spend_analyses
.iter()
.find(|sa| sa.category_id == project.category_id)
.map(|sa| sa.category_id.clone());
}
info!(
"S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
stats.contract_count, stats.catalog_item_count, stats.scorecard_count
);
self.check_resources_with_log("post-sourcing")?;
Ok(SourcingSnapshot {
spend_analyses,
sourcing_projects,
qualifications,
rfx_events,
bids: all_bids,
bid_evaluations,
contracts,
catalog_items,
scorecards,
})
}
fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
let parent_code = self
.config
.companies
.first()
.map(|c| c.code.clone())
.unwrap_or_else(|| "PARENT".to_string());
let mut group = GroupStructure::new(parent_code);
for company in self.config.companies.iter().skip(1) {
let sub =
SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
group.add_subsidiary(sub);
}
group
}
fn phase_intercompany(
&mut self,
journal_entries: &[JournalEntry],
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<IntercompanySnapshot> {
if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
debug!("Phase 14b: Skipped (intercompany generation disabled)");
return Ok(IntercompanySnapshot::default());
}
if self.config.companies.len() < 2 {
debug!(
"Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
self.config.companies.len()
);
return Ok(IntercompanySnapshot::default());
}
info!("Phase 14b: Generating Intercompany Transactions");
let group_structure = self.build_group_structure();
debug!(
"Group structure built: parent={}, subsidiaries={}",
group_structure.parent_entity,
group_structure.subsidiaries.len()
);
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let parent_code = self.config.companies[0].code.clone();
let mut ownership_structure =
datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
for (i, company) in self.config.companies.iter().skip(1).enumerate() {
let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
format!("REL{:03}", i + 1),
parent_code.clone(),
company.code.clone(),
rust_decimal::Decimal::from(100), start_date,
);
ownership_structure.add_relationship(relationship);
}
let tp_method = match self.config.intercompany.transfer_pricing_method {
datasynth_config::schema::TransferPricingMethod::CostPlus => {
datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
}
datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
}
datasynth_config::schema::TransferPricingMethod::ResalePrice => {
datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
}
datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
}
datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
}
};
let ic_currency = self
.config
.companies
.first()
.map(|c| c.currency.clone())
.unwrap_or_else(|| "USD".to_string());
let ic_gen_config = datasynth_generators::ICGeneratorConfig {
ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
transfer_pricing_method: tp_method,
markup_percent: rust_decimal::Decimal::from_f64_retain(
self.config.intercompany.markup_percent,
)
.unwrap_or(rust_decimal::Decimal::from(5)),
generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
default_currency: ic_currency,
..Default::default()
};
let mut ic_generator = datasynth_generators::ICGenerator::new(
ic_gen_config,
ownership_structure.clone(),
seed + 50,
);
let transactions_per_day = 3;
let matched_pairs = ic_generator.generate_transactions_for_period(
start_date,
end_date,
transactions_per_day,
);
let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
debug!(
"Generated {} IC seller invoices, {} IC buyer POs",
ic_doc_chains.seller_invoices.len(),
ic_doc_chains.buyer_orders.len()
);
let mut seller_entries = Vec::new();
let mut buyer_entries = Vec::new();
let fiscal_year = start_date.year();
for pair in &matched_pairs {
let fiscal_period = pair.posting_date.month();
let (seller_je, buyer_je) =
ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
seller_entries.push(seller_je);
buyer_entries.push(buyer_je);
}
let matching_config = datasynth_generators::ICMatchingConfig {
base_currency: self
.config
.companies
.first()
.map(|c| c.currency.clone())
.unwrap_or_else(|| "USD".to_string()),
..Default::default()
};
let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
matching_engine.load_matched_pairs(&matched_pairs);
let matching_result = matching_engine.run_matching(end_date);
let mut elimination_entries = Vec::new();
if self.config.intercompany.generate_eliminations {
let elim_config = datasynth_generators::EliminationConfig {
consolidation_entity: "GROUP".to_string(),
base_currency: self
.config
.companies
.first()
.map(|c| c.currency.clone())
.unwrap_or_else(|| "USD".to_string()),
..Default::default()
};
let mut elim_generator =
datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
matching_result
.matched_balances
.iter()
.chain(matching_result.unmatched_balances.iter())
.cloned()
.collect();
let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
std::collections::HashMap::new();
let mut equity_amounts: std::collections::HashMap<
String,
std::collections::HashMap<String, rust_decimal::Decimal>,
> = std::collections::HashMap::new();
{
use rust_decimal::Decimal;
let hundred = Decimal::from(100u32);
let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
for sub in &group_structure.subsidiaries {
let net_assets = {
let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
if na > Decimal::ZERO {
na
} else {
Decimal::from(1_000_000u64)
}
};
let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
let mut eq_map = std::collections::HashMap::new();
eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
equity_amounts.insert(sub.entity_code.clone(), eq_map);
}
}
let journal = elim_generator.generate_eliminations(
&fiscal_period,
end_date,
&all_balances,
&matched_pairs,
&investment_amounts,
&equity_amounts,
);
elimination_entries = journal.entries.clone();
}
let matched_pair_count = matched_pairs.len();
let elimination_entry_count = elimination_entries.len();
let match_rate = matching_result.match_rate;
stats.ic_matched_pair_count = matched_pair_count;
stats.ic_elimination_count = elimination_entry_count;
stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
info!(
"Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
matched_pair_count,
stats.ic_transaction_count,
seller_entries.len(),
buyer_entries.len(),
elimination_entry_count,
match_rate * 100.0
);
self.check_resources_with_log("post-intercompany")?;
let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
use rust_decimal::Decimal;
let eight_pct = Decimal::new(8, 2);
group_structure
.subsidiaries
.iter()
.filter(|sub| {
sub.nci_percentage > Decimal::ZERO
&& sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
})
.map(|sub| {
let net_assets_from_jes =
Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
let net_assets = if net_assets_from_jes > Decimal::ZERO {
net_assets_from_jes.round_dp(2)
} else {
Decimal::from(1_000_000u64)
};
let net_income = (net_assets * eight_pct).round_dp(2);
NciMeasurement::compute(
sub.entity_code.clone(),
sub.nci_percentage,
net_assets,
net_income,
)
})
.collect()
};
if !nci_measurements.is_empty() {
info!(
"NCI measurements: {} subsidiaries with non-controlling interests",
nci_measurements.len()
);
}
Ok(IntercompanySnapshot {
group_structure: Some(group_structure),
matched_pairs,
seller_journal_entries: seller_entries,
buyer_journal_entries: buyer_entries,
elimination_entries,
nci_measurements,
ic_document_chains: Some(ic_doc_chains),
matched_pair_count,
elimination_entry_count,
match_rate,
})
}
fn phase_financial_reporting(
&mut self,
document_flows: &DocumentFlowSnapshot,
journal_entries: &[JournalEntry],
coa: &Arc<ChartOfAccounts>,
_hr: &HrSnapshot,
_audit: &AuditSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<FinancialReportingSnapshot> {
let fs_enabled = self.phase_config.generate_financial_statements
|| self.config.financial_reporting.enabled;
let br_enabled = self.phase_config.generate_bank_reconciliation;
if !fs_enabled && !br_enabled {
debug!("Phase 15: Skipped (financial reporting disabled)");
return Ok(FinancialReportingSnapshot::default());
}
info!("Phase 15: Generating Financial Reporting Data");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let mut financial_statements = Vec::new();
let mut bank_reconciliations = Vec::new();
let mut trial_balances = Vec::new();
let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
Vec::new();
let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
std::collections::HashMap::new();
let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
if fs_enabled {
let has_journal_entries = !journal_entries.is_empty();
let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
let elimination_entries: Vec<&JournalEntry> = journal_entries
.iter()
.filter(|je| je.header.is_elimination)
.collect();
for period in 0..self.config.global.period_months {
let period_start = start_date + chrono::Months::new(period);
let period_end =
start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
let fiscal_year = period_end.year() as u16;
let fiscal_period = period_end.month() as u8;
let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
let mut entity_tb_map: std::collections::HashMap<
String,
std::collections::HashMap<String, rust_decimal::Decimal>,
> = std::collections::HashMap::new();
for (company_idx, company) in self.config.companies.iter().enumerate() {
let company_code = company.code.as_str();
let currency = company.currency.as_str();
let company_seed_offset = 20u64 + (company_idx as u64 * 100);
let mut company_fs_gen =
FinancialStatementGenerator::new(seed + company_seed_offset);
if has_journal_entries {
let tb_entries = Self::build_cumulative_trial_balance(
journal_entries,
coa,
company_code,
start_date,
period_end,
fiscal_year,
fiscal_period,
);
let entity_cat_map =
entity_tb_map.entry(company_code.to_string()).or_default();
for tb_entry in &tb_entries {
let net = tb_entry.debit_balance - tb_entry.credit_balance;
*entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
}
let stmts = company_fs_gen.generate(
company_code,
currency,
&tb_entries,
period_start,
period_end,
fiscal_year,
fiscal_period,
None,
"SYS-AUTOCLOSE",
);
let mut entity_stmts = Vec::new();
for stmt in stmts {
if stmt.statement_type == StatementType::CashFlowStatement {
let net_income = Self::calculate_net_income_from_tb(&tb_entries);
let cf_items = Self::build_cash_flow_from_trial_balances(
&tb_entries,
None,
net_income,
);
entity_stmts.push(FinancialStatement {
cash_flow_items: cf_items,
..stmt
});
} else {
entity_stmts.push(stmt);
}
}
financial_statements.extend(entity_stmts.clone());
standalone_statements
.entry(company_code.to_string())
.or_default()
.extend(entity_stmts);
if company_idx == 0 {
trial_balances.push(PeriodTrialBalance {
fiscal_year,
fiscal_period,
period_start,
period_end,
entries: tb_entries,
});
}
} else {
let tb_entries = Self::build_trial_balance_from_entries(
journal_entries,
coa,
company_code,
fiscal_year,
fiscal_period,
);
let stmts = company_fs_gen.generate(
company_code,
currency,
&tb_entries,
period_start,
period_end,
fiscal_year,
fiscal_period,
None,
"SYS-AUTOCLOSE",
);
financial_statements.extend(stmts.clone());
standalone_statements
.entry(company_code.to_string())
.or_default()
.extend(stmts);
if company_idx == 0 && !tb_entries.is_empty() {
trial_balances.push(PeriodTrialBalance {
fiscal_year,
fiscal_period,
period_start,
period_end,
entries: tb_entries,
});
}
}
}
let group_currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let period_eliminations: Vec<JournalEntry> = elimination_entries
.iter()
.filter(|je| {
je.header.fiscal_year == fiscal_year
&& je.header.fiscal_period == fiscal_period
})
.map(|je| (*je).clone())
.collect();
let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
&entity_tb_map,
&period_eliminations,
&period_label,
);
let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
.line_items
.iter()
.map(|li| {
let net = li.post_elimination_total;
let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
(net, rust_decimal::Decimal::ZERO)
} else {
(rust_decimal::Decimal::ZERO, -net)
};
datasynth_generators::TrialBalanceEntry {
account_code: li.account_category.clone(),
account_name: li.account_category.clone(),
category: li.account_category.clone(),
debit_balance: debit,
credit_balance: credit,
}
})
.collect();
let mut cons_stmts = cons_gen.generate(
"GROUP",
group_currency,
&cons_tb,
period_start,
period_end,
fiscal_year,
fiscal_period,
None,
"SYS-AUTOCLOSE",
);
let bs_categories: &[&str] = &[
"CASH",
"RECEIVABLES",
"INVENTORY",
"FIXEDASSETS",
"PAYABLES",
"ACCRUEDLIABILITIES",
"LONGTERMDEBT",
"EQUITY",
];
let (bs_items, is_items): (Vec<_>, Vec<_>) =
cons_line_items.into_iter().partition(|li| {
let upper = li.label.to_uppercase();
bs_categories.iter().any(|c| upper == *c)
});
for stmt in &mut cons_stmts {
stmt.is_consolidated = true;
match stmt.statement_type {
StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
_ => {} }
}
consolidated_statements.extend(cons_stmts);
consolidation_schedules.push(schedule);
}
let _ = &mut fs_gen;
stats.financial_statement_count = financial_statements.len();
info!(
"Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
stats.financial_statement_count,
consolidated_statements.len(),
has_journal_entries
);
let entity_seeds: Vec<SegmentSeed> = self
.config
.companies
.iter()
.map(|c| SegmentSeed {
code: c.code.clone(),
name: c.name.clone(),
currency: c.currency.clone(),
})
.collect();
let mut seg_gen = SegmentGenerator::new(seed + 30);
for period in 0..self.config.global.period_months {
let period_end =
start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
let fiscal_year = period_end.year() as u16;
let fiscal_period = period_end.month() as u8;
let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
use datasynth_core::models::StatementType;
let cons_is = consolidated_statements.iter().find(|s| {
s.fiscal_year == fiscal_year
&& s.fiscal_period == fiscal_period
&& s.statement_type == StatementType::IncomeStatement
});
let cons_bs = consolidated_statements.iter().find(|s| {
s.fiscal_year == fiscal_year
&& s.fiscal_period == fiscal_period
&& s.statement_type == StatementType::BalanceSheet
});
let is_stmt = cons_is.or_else(|| {
financial_statements.iter().find(|s| {
s.fiscal_year == fiscal_year
&& s.fiscal_period == fiscal_period
&& s.statement_type == StatementType::IncomeStatement
})
});
let bs_stmt = cons_bs.or_else(|| {
financial_statements.iter().find(|s| {
s.fiscal_year == fiscal_year
&& s.fiscal_period == fiscal_period
&& s.statement_type == StatementType::BalanceSheet
})
});
let consolidated_revenue = is_stmt
.and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
.map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
let consolidated_profit = is_stmt
.and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
.map(|li| li.amount)
.unwrap_or(rust_decimal::Decimal::ZERO);
let consolidated_assets = bs_stmt
.and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
.map(|li| li.amount)
.unwrap_or(rust_decimal::Decimal::ZERO);
if consolidated_revenue == rust_decimal::Decimal::ZERO
&& consolidated_assets == rust_decimal::Decimal::ZERO
{
continue;
}
let group_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("GROUP");
let total_depr: rust_decimal::Decimal = journal_entries
.iter()
.filter(|je| je.header.document_type == "CL")
.flat_map(|je| je.lines.iter())
.filter(|l| l.gl_account.starts_with("6000"))
.map(|l| l.debit_amount)
.fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
Some(total_depr)
} else {
None
};
let (segs, recon) = seg_gen.generate(
group_code,
&period_label,
consolidated_revenue,
consolidated_profit,
consolidated_assets,
&entity_seeds,
depr_param,
);
segment_reports.extend(segs);
segment_reconciliations.push(recon);
}
info!(
"Segment reports generated: {} segments, {} reconciliations",
segment_reports.len(),
segment_reconciliations.len()
);
}
if br_enabled && !document_flows.payments.is_empty() {
let employee_ids: Vec<String> = self
.master_data
.employees
.iter()
.map(|e| e.employee_id.clone())
.collect();
let mut br_gen =
BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
for company in &self.config.companies {
let company_payments: Vec<PaymentReference> = document_flows
.payments
.iter()
.filter(|p| p.header.company_code == company.code)
.map(|p| PaymentReference {
id: p.header.document_id.clone(),
amount: if p.is_vendor { p.amount } else { -p.amount },
date: p.header.document_date,
reference: p
.check_number
.clone()
.or_else(|| p.wire_reference.clone())
.unwrap_or_else(|| p.header.document_id.clone()),
})
.collect();
if company_payments.is_empty() {
continue;
}
let bank_account_id = format!("{}-MAIN", company.code);
for period in 0..self.config.global.period_months {
let period_start = start_date + chrono::Months::new(period);
let period_end =
start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
let period_payments: Vec<PaymentReference> = company_payments
.iter()
.filter(|p| p.date >= period_start && p.date <= period_end)
.cloned()
.collect();
let recon = br_gen.generate(
&company.code,
&bank_account_id,
period_start,
period_end,
&company.currency,
&period_payments,
);
bank_reconciliations.push(recon);
}
}
info!(
"Bank reconciliations generated: {} reconciliations",
bank_reconciliations.len()
);
}
stats.bank_reconciliation_count = bank_reconciliations.len();
self.check_resources_with_log("post-financial-reporting")?;
if !trial_balances.is_empty() {
info!(
"Period-close trial balances captured: {} periods",
trial_balances.len()
);
}
let notes_to_financial_statements = Vec::new();
Ok(FinancialReportingSnapshot {
financial_statements,
standalone_statements,
consolidated_statements,
consolidation_schedules,
bank_reconciliations,
trial_balances,
segment_reports,
segment_reconciliations,
notes_to_financial_statements,
})
}
fn generate_notes_to_financial_statements(
&self,
financial_reporting: &mut FinancialReportingSnapshot,
accounting_standards: &AccountingStandardsSnapshot,
tax: &TaxSnapshot,
hr: &HrSnapshot,
audit: &AuditSnapshot,
treasury: &TreasurySnapshot,
) {
use datasynth_config::schema::AccountingFrameworkConfig;
use datasynth_core::models::StatementType;
use datasynth_generators::period_close::notes_generator::{
EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
};
let seed = self.seed;
let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
{
Ok(d) => d,
Err(_) => return,
};
let mut notes_gen = NotesGenerator::new(seed + 4235);
for company in &self.config.companies {
let last_period_end = start_date
+ chrono::Months::new(self.config.global.period_months)
- chrono::Days::new(1);
let fiscal_year = last_period_end.year() as u16;
let entity_is = financial_reporting
.standalone_statements
.get(&company.code)
.and_then(|stmts| {
stmts.iter().find(|s| {
s.fiscal_year == fiscal_year
&& s.statement_type == StatementType::IncomeStatement
})
});
let entity_bs = financial_reporting
.standalone_statements
.get(&company.code)
.and_then(|stmts| {
stmts.iter().find(|s| {
s.fiscal_year == fiscal_year
&& s.statement_type == StatementType::BalanceSheet
})
});
let revenue_amount = entity_is
.and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
.map(|li| li.amount);
let ppe_gross = entity_bs
.and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
.map(|li| li.amount);
let framework = match self
.config
.accounting_standards
.framework
.unwrap_or_default()
{
AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
"IFRS".to_string()
}
_ => "US GAAP".to_string(),
};
let (entity_dta, entity_dtl) = {
let mut dta = rust_decimal::Decimal::ZERO;
let mut dtl = rust_decimal::Decimal::ZERO;
for rf in &tax.deferred_tax.rollforwards {
if rf.entity_code == company.code {
dta += rf.closing_dta;
dtl += rf.closing_dtl;
}
}
(
if dta > rust_decimal::Decimal::ZERO {
Some(dta)
} else {
None
},
if dtl > rust_decimal::Decimal::ZERO {
Some(dtl)
} else {
None
},
)
};
let entity_provisions: Vec<_> = accounting_standards
.provisions
.iter()
.filter(|p| p.entity_code == company.code)
.collect();
let provision_count = entity_provisions.len();
let total_provisions = if provision_count > 0 {
Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
} else {
None
};
let entity_pension_plan_count = hr
.pension_plans
.iter()
.filter(|p| p.entity_code == company.code)
.count();
let entity_total_dbo: Option<rust_decimal::Decimal> = {
let sum: rust_decimal::Decimal = hr
.pension_disclosures
.iter()
.filter(|d| {
hr.pension_plans
.iter()
.any(|p| p.id == d.plan_id && p.entity_code == company.code)
})
.map(|d| d.net_pension_liability)
.sum();
let plan_assets_sum: rust_decimal::Decimal = hr
.pension_plan_assets
.iter()
.filter(|a| {
hr.pension_plans
.iter()
.any(|p| p.id == a.plan_id && p.entity_code == company.code)
})
.map(|a| a.fair_value_closing)
.sum();
if entity_pension_plan_count > 0 {
Some(sum + plan_assets_sum)
} else {
None
}
};
let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
let sum: rust_decimal::Decimal = hr
.pension_plan_assets
.iter()
.filter(|a| {
hr.pension_plans
.iter()
.any(|p| p.id == a.plan_id && p.entity_code == company.code)
})
.map(|a| a.fair_value_closing)
.sum();
if entity_pension_plan_count > 0 {
Some(sum)
} else {
None
}
};
let rp_count = audit.related_party_transactions.len();
let se_count = audit.subsequent_events.len();
let adjusting_count = audit
.subsequent_events
.iter()
.filter(|e| {
matches!(
e.classification,
datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
)
})
.count();
let ctx = NotesGeneratorContext {
entity_code: company.code.clone(),
framework,
period: format!("FY{}", fiscal_year),
period_end: last_period_end,
currency: company.currency.clone(),
revenue_amount,
total_ppe_gross: ppe_gross,
statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
deferred_tax_asset: entity_dta,
deferred_tax_liability: entity_dtl,
provision_count,
total_provisions,
pension_plan_count: entity_pension_plan_count,
total_dbo: entity_total_dbo,
total_plan_assets: entity_total_plan_assets,
related_party_transaction_count: rp_count,
subsequent_event_count: se_count,
adjusting_event_count: adjusting_count,
..NotesGeneratorContext::default()
};
let entity_notes = notes_gen.generate(&ctx);
let standard_note_count = entity_notes.len() as u32;
info!(
"Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
);
financial_reporting
.notes_to_financial_statements
.extend(entity_notes);
let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
.debt_instruments
.iter()
.filter(|d| d.entity_id == company.code)
.map(|d| {
(
format!("{:?}", d.instrument_type),
d.principal,
d.maturity_date.to_string(),
)
})
.collect();
let hedge_count = treasury.hedge_relationships.len();
let effective_hedges = treasury
.hedge_relationships
.iter()
.filter(|h| h.is_effective)
.count();
let total_notional: rust_decimal::Decimal = treasury
.hedging_instruments
.iter()
.map(|h| h.notional_amount)
.sum();
let total_fair_value: rust_decimal::Decimal = treasury
.hedging_instruments
.iter()
.map(|h| h.fair_value)
.sum();
let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
.provisions
.iter()
.filter(|p| p.entity_code == company.code)
.map(|p| p.id.as_str())
.collect();
let provision_movements: Vec<(
String,
rust_decimal::Decimal,
rust_decimal::Decimal,
rust_decimal::Decimal,
)> = accounting_standards
.provision_movements
.iter()
.filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
.map(|m| {
let prov_type = accounting_standards
.provisions
.iter()
.find(|p| p.id == m.provision_id)
.map(|p| format!("{:?}", p.provision_type))
.unwrap_or_else(|| "Unknown".to_string());
(prov_type, m.opening, m.additions, m.closing)
})
.collect();
let enhanced_ctx = EnhancedNotesContext {
entity_code: company.code.clone(),
period: format!("FY{}", fiscal_year),
currency: company.currency.clone(),
finished_goods_value: rust_decimal::Decimal::ZERO,
wip_value: rust_decimal::Decimal::ZERO,
raw_materials_value: rust_decimal::Decimal::ZERO,
debt_instruments,
hedge_count,
effective_hedges,
total_notional,
total_fair_value,
provision_movements,
};
let enhanced_notes =
notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
if !enhanced_notes.is_empty() {
info!(
"Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
company.code,
enhanced_notes.len(),
enhanced_ctx.debt_instruments.len(),
hedge_count,
enhanced_ctx.provision_movements.len(),
);
financial_reporting
.notes_to_financial_statements
.extend(enhanced_notes);
}
}
}
fn build_trial_balance_from_entries(
journal_entries: &[JournalEntry],
coa: &ChartOfAccounts,
company_code: &str,
fiscal_year: u16,
fiscal_period: u8,
) -> Vec<datasynth_generators::TrialBalanceEntry> {
use rust_decimal::Decimal;
let mut account_debits: HashMap<String, Decimal> = HashMap::new();
let mut account_credits: HashMap<String, Decimal> = HashMap::new();
for je in journal_entries {
if je.header.company_code != company_code
|| je.header.fiscal_year != fiscal_year
|| je.header.fiscal_period != fiscal_period
{
continue;
}
for line in &je.lines {
let acct = &line.gl_account;
*account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
*account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
}
}
let mut all_accounts: Vec<&String> = account_debits
.keys()
.chain(account_credits.keys())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
all_accounts.sort();
let mut entries = Vec::new();
for acct_number in all_accounts {
let debit = account_debits
.get(acct_number)
.copied()
.unwrap_or(Decimal::ZERO);
let credit = account_credits
.get(acct_number)
.copied()
.unwrap_or(Decimal::ZERO);
if debit.is_zero() && credit.is_zero() {
continue;
}
let account_name = coa
.get_account(acct_number)
.map(|gl| gl.short_description.clone())
.unwrap_or_else(|| format!("Account {acct_number}"));
let category = Self::category_from_account_code(acct_number);
entries.push(datasynth_generators::TrialBalanceEntry {
account_code: acct_number.clone(),
account_name,
category,
debit_balance: debit,
credit_balance: credit,
});
}
entries
}
fn build_cumulative_trial_balance(
journal_entries: &[JournalEntry],
coa: &ChartOfAccounts,
company_code: &str,
start_date: NaiveDate,
period_end: NaiveDate,
fiscal_year: u16,
fiscal_period: u8,
) -> Vec<datasynth_generators::TrialBalanceEntry> {
use rust_decimal::Decimal;
let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
let mut is_debits: HashMap<String, Decimal> = HashMap::new();
let mut is_credits: HashMap<String, Decimal> = HashMap::new();
for je in journal_entries {
if je.header.company_code != company_code {
continue;
}
for line in &je.lines {
let acct = &line.gl_account;
let category = Self::category_from_account_code(acct);
let is_bs_account = matches!(
category.as_str(),
"Cash"
| "Receivables"
| "Inventory"
| "FixedAssets"
| "Payables"
| "AccruedLiabilities"
| "LongTermDebt"
| "Equity"
);
if is_bs_account {
if je.header.document_date <= period_end
&& je.header.document_date >= start_date
{
*bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
line.debit_amount;
*bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
line.credit_amount;
}
} else {
if je.header.fiscal_year == fiscal_year
&& je.header.fiscal_period == fiscal_period
{
*is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
line.debit_amount;
*is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
line.credit_amount;
}
}
}
}
let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
all_accounts.extend(bs_debits.keys().cloned());
all_accounts.extend(bs_credits.keys().cloned());
all_accounts.extend(is_debits.keys().cloned());
all_accounts.extend(is_credits.keys().cloned());
let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
sorted_accounts.sort();
let mut entries = Vec::new();
for acct_number in &sorted_accounts {
let category = Self::category_from_account_code(acct_number);
let is_bs_account = matches!(
category.as_str(),
"Cash"
| "Receivables"
| "Inventory"
| "FixedAssets"
| "Payables"
| "AccruedLiabilities"
| "LongTermDebt"
| "Equity"
);
let (debit, credit) = if is_bs_account {
(
bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
bs_credits
.get(acct_number)
.copied()
.unwrap_or(Decimal::ZERO),
)
} else {
(
is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
is_credits
.get(acct_number)
.copied()
.unwrap_or(Decimal::ZERO),
)
};
if debit.is_zero() && credit.is_zero() {
continue;
}
let account_name = coa
.get_account(acct_number)
.map(|gl| gl.short_description.clone())
.unwrap_or_else(|| format!("Account {acct_number}"));
entries.push(datasynth_generators::TrialBalanceEntry {
account_code: acct_number.clone(),
account_name,
category,
debit_balance: debit,
credit_balance: credit,
});
}
entries
}
fn build_cash_flow_from_trial_balances(
current_tb: &[datasynth_generators::TrialBalanceEntry],
prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
net_income: rust_decimal::Decimal,
) -> Vec<CashFlowItem> {
use rust_decimal::Decimal;
let aggregate =
|tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
let mut map: HashMap<String, Decimal> = HashMap::new();
for entry in tb {
let net = entry.debit_balance - entry.credit_balance;
*map.entry(entry.category.clone()).or_default() += net;
}
map
};
let current = aggregate(current_tb);
let prior = prior_tb.map(aggregate);
let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
*map.get(key).unwrap_or(&Decimal::ZERO)
};
let change = |key: &str| -> Decimal {
let curr = get(¤t, key);
match &prior {
Some(p) => curr - get(p, key),
None => curr,
}
};
let fixed_asset_change = change("FixedAssets");
let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
-fixed_asset_change
} else {
Decimal::ZERO
};
let ar_change = change("Receivables");
let inventory_change = change("Inventory");
let ap_change = change("Payables");
let accrued_change = change("AccruedLiabilities");
let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
+ (-ap_change)
+ (-accrued_change);
let capex = if fixed_asset_change > Decimal::ZERO {
-fixed_asset_change
} else {
Decimal::ZERO
};
let investing_cf = capex;
let debt_change = -change("LongTermDebt");
let equity_change = -change("Equity");
let financing_cf = debt_change + equity_change;
let net_change = operating_cf + investing_cf + financing_cf;
vec![
CashFlowItem {
item_code: "CF-NI".to_string(),
label: "Net Income".to_string(),
category: CashFlowCategory::Operating,
amount: net_income,
amount_prior: None,
sort_order: 1,
is_total: false,
},
CashFlowItem {
item_code: "CF-DEP".to_string(),
label: "Depreciation & Amortization".to_string(),
category: CashFlowCategory::Operating,
amount: depreciation_addback,
amount_prior: None,
sort_order: 2,
is_total: false,
},
CashFlowItem {
item_code: "CF-AR".to_string(),
label: "Change in Accounts Receivable".to_string(),
category: CashFlowCategory::Operating,
amount: -ar_change,
amount_prior: None,
sort_order: 3,
is_total: false,
},
CashFlowItem {
item_code: "CF-AP".to_string(),
label: "Change in Accounts Payable".to_string(),
category: CashFlowCategory::Operating,
amount: -ap_change,
amount_prior: None,
sort_order: 4,
is_total: false,
},
CashFlowItem {
item_code: "CF-INV".to_string(),
label: "Change in Inventory".to_string(),
category: CashFlowCategory::Operating,
amount: -inventory_change,
amount_prior: None,
sort_order: 5,
is_total: false,
},
CashFlowItem {
item_code: "CF-OP".to_string(),
label: "Net Cash from Operating Activities".to_string(),
category: CashFlowCategory::Operating,
amount: operating_cf,
amount_prior: None,
sort_order: 6,
is_total: true,
},
CashFlowItem {
item_code: "CF-CAPEX".to_string(),
label: "Capital Expenditures".to_string(),
category: CashFlowCategory::Investing,
amount: capex,
amount_prior: None,
sort_order: 7,
is_total: false,
},
CashFlowItem {
item_code: "CF-INV-T".to_string(),
label: "Net Cash from Investing Activities".to_string(),
category: CashFlowCategory::Investing,
amount: investing_cf,
amount_prior: None,
sort_order: 8,
is_total: true,
},
CashFlowItem {
item_code: "CF-DEBT".to_string(),
label: "Net Borrowings / (Repayments)".to_string(),
category: CashFlowCategory::Financing,
amount: debt_change,
amount_prior: None,
sort_order: 9,
is_total: false,
},
CashFlowItem {
item_code: "CF-EQ".to_string(),
label: "Equity Changes".to_string(),
category: CashFlowCategory::Financing,
amount: equity_change,
amount_prior: None,
sort_order: 10,
is_total: false,
},
CashFlowItem {
item_code: "CF-FIN-T".to_string(),
label: "Net Cash from Financing Activities".to_string(),
category: CashFlowCategory::Financing,
amount: financing_cf,
amount_prior: None,
sort_order: 11,
is_total: true,
},
CashFlowItem {
item_code: "CF-NET".to_string(),
label: "Net Change in Cash".to_string(),
category: CashFlowCategory::Operating,
amount: net_change,
amount_prior: None,
sort_order: 12,
is_total: true,
},
]
}
fn calculate_net_income_from_tb(
tb: &[datasynth_generators::TrialBalanceEntry],
) -> rust_decimal::Decimal {
use rust_decimal::Decimal;
let mut aggregated: HashMap<String, Decimal> = HashMap::new();
for entry in tb {
let net = entry.debit_balance - entry.credit_balance;
*aggregated.entry(entry.category.clone()).or_default() += net;
}
let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
let opex = *aggregated
.get("OperatingExpenses")
.unwrap_or(&Decimal::ZERO);
let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
let operating_income = revenue - cogs - opex - other_expenses - other_income;
let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
operating_income - tax
}
fn category_from_account_code(code: &str) -> String {
let prefix: String = code.chars().take(2).collect();
match prefix.as_str() {
"10" => "Cash",
"11" => "Receivables",
"12" | "13" | "14" => "Inventory",
"15" | "16" | "17" | "18" | "19" => "FixedAssets",
"20" => "Payables",
"21" | "22" | "23" | "24" => "AccruedLiabilities",
"25" | "26" | "27" | "28" | "29" => "LongTermDebt",
"30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
"40" | "41" | "42" | "43" | "44" => "Revenue",
"50" | "51" | "52" => "CostOfSales",
"60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
"OperatingExpenses"
}
"70" | "71" | "72" | "73" | "74" => "OtherIncome",
"80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
_ => "OperatingExpenses",
}
.to_string()
}
fn phase_hr_data(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<HrSnapshot> {
if !self.phase_config.generate_hr {
debug!("Phase 16: Skipped (HR generation disabled)");
return Ok(HrSnapshot::default());
}
info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let employee_ids: Vec<String> = self
.master_data
.employees
.iter()
.map(|e| e.employee_id.clone())
.collect();
if employee_ids.is_empty() {
debug!("Phase 16: Skipped (no employees available)");
return Ok(HrSnapshot::default());
}
let cost_center_ids: Vec<String> = self
.master_data
.employees
.iter()
.filter_map(|e| e.cost_center.clone())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let mut snapshot = HrSnapshot::default();
if self.config.hr.payroll.enabled {
let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
.with_pools(employee_ids.clone(), cost_center_ids.clone());
let payroll_pack = self.primary_pack();
payroll_gen.set_country_pack(payroll_pack.clone());
let employees_with_salary: Vec<(
String,
rust_decimal::Decimal,
Option<String>,
Option<String>,
)> = self
.master_data
.employees
.iter()
.map(|e| {
let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
e.base_salary
} else {
rust_decimal::Decimal::from(60_000)
};
(
e.employee_id.clone(),
annual, e.cost_center.clone(),
e.department_id.clone(),
)
})
.collect();
let change_history = &self.master_data.employee_change_history;
let has_changes = !change_history.is_empty();
if has_changes {
debug!(
"Payroll will incorporate {} employee change events",
change_history.len()
);
}
for month in 0..self.config.global.period_months {
let period_start = start_date + chrono::Months::new(month);
let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
let (run, items) = if has_changes {
payroll_gen.generate_with_changes(
company_code,
&employees_with_salary,
period_start,
period_end,
currency,
change_history,
)
} else {
payroll_gen.generate(
company_code,
&employees_with_salary,
period_start,
period_end,
currency,
)
};
snapshot.payroll_runs.push(run);
snapshot.payroll_run_count += 1;
snapshot.payroll_line_item_count += items.len();
snapshot.payroll_line_items.extend(items);
}
}
if self.config.hr.time_attendance.enabled {
let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
.with_pools(employee_ids.clone(), cost_center_ids.clone());
let entries = time_gen.generate(
&employee_ids,
start_date,
end_date,
&self.config.hr.time_attendance,
);
snapshot.time_entry_count = entries.len();
snapshot.time_entries = entries;
}
if self.config.hr.expenses.enabled {
let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
.with_pools(employee_ids.clone(), cost_center_ids.clone());
expense_gen.set_country_pack(self.primary_pack().clone());
let company_currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let reports = expense_gen.generate_with_currency(
&employee_ids,
start_date,
end_date,
&self.config.hr.expenses,
company_currency,
);
snapshot.expense_report_count = reports.len();
snapshot.expense_reports = reports;
}
if self.config.hr.payroll.enabled {
let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
let employee_pairs: Vec<(String, String)> = self
.master_data
.employees
.iter()
.map(|e| (e.employee_id.clone(), e.display_name.clone()))
.collect();
let enrollments =
benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
snapshot.benefit_enrollment_count = enrollments.len();
snapshot.benefit_enrollments = enrollments;
}
if self.phase_config.generate_hr {
let entity_name = self
.config
.companies
.first()
.map(|c| c.name.as_str())
.unwrap_or("Entity");
let period_months = self.config.global.period_months;
let period_label = {
let y = start_date.year();
let m = start_date.month();
if period_months >= 12 {
format!("FY{y}")
} else {
format!("{y}-{m:02}")
}
};
let reporting_date =
start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
let avg_salary: Option<rust_decimal::Decimal> = {
let employee_count = employee_ids.len();
if self.config.hr.payroll.enabled
&& employee_count > 0
&& !snapshot.payroll_runs.is_empty()
{
let total_gross: rust_decimal::Decimal = snapshot
.payroll_runs
.iter()
.filter(|r| r.company_code == company_code)
.map(|r| r.total_gross)
.sum();
if total_gross > rust_decimal::Decimal::ZERO {
let annual_total = if period_months > 0 && period_months < 12 {
total_gross * rust_decimal::Decimal::from(12u32)
/ rust_decimal::Decimal::from(period_months)
} else {
total_gross
};
Some(
(annual_total / rust_decimal::Decimal::from(employee_count))
.round_dp(2),
)
} else {
None
}
} else {
None
}
};
let mut pension_gen =
datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
let pension_snap = pension_gen.generate(
company_code,
entity_name,
&period_label,
reporting_date,
employee_ids.len(),
currency,
avg_salary,
period_months,
);
snapshot.pension_plan_count = pension_snap.plans.len();
snapshot.pension_plans = pension_snap.plans;
snapshot.pension_obligations = pension_snap.obligations;
snapshot.pension_plan_assets = pension_snap.plan_assets;
snapshot.pension_disclosures = pension_snap.disclosures;
snapshot.pension_journal_entries = pension_snap.journal_entries;
}
if self.phase_config.generate_hr && !employee_ids.is_empty() {
let period_months = self.config.global.period_months;
let period_label = {
let y = start_date.year();
let m = start_date.month();
if period_months >= 12 {
format!("FY{y}")
} else {
format!("{y}-{m:02}")
}
};
let reporting_date =
start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
let mut stock_comp_gen =
datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
let stock_snap = stock_comp_gen.generate(
company_code,
&employee_ids,
start_date,
&period_label,
reporting_date,
currency,
);
snapshot.stock_grant_count = stock_snap.grants.len();
snapshot.stock_grants = stock_snap.grants;
snapshot.stock_comp_expenses = stock_snap.expenses;
snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
}
stats.payroll_run_count = snapshot.payroll_run_count;
stats.time_entry_count = snapshot.time_entry_count;
stats.expense_report_count = snapshot.expense_report_count;
stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
stats.pension_plan_count = snapshot.pension_plan_count;
stats.stock_grant_count = snapshot.stock_grant_count;
info!(
"HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
snapshot.payroll_run_count, snapshot.payroll_line_item_count,
snapshot.time_entry_count, snapshot.expense_report_count,
snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
snapshot.stock_grant_count
);
self.check_resources_with_log("post-hr")?;
Ok(snapshot)
}
fn phase_accounting_standards(
&mut self,
ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
journal_entries: &[JournalEntry],
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<AccountingStandardsSnapshot> {
if !self.phase_config.generate_accounting_standards {
debug!("Phase 17: Skipped (accounting standards generation disabled)");
return Ok(AccountingStandardsSnapshot::default());
}
info!("Phase 17: Generating Accounting Standards Data");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let framework = match self.config.accounting_standards.framework {
Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
datasynth_standards::framework::AccountingFramework::UsGaap
}
Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
datasynth_standards::framework::AccountingFramework::Ifrs
}
Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
datasynth_standards::framework::AccountingFramework::DualReporting
}
Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
datasynth_standards::framework::AccountingFramework::FrenchGaap
}
Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
datasynth_standards::framework::AccountingFramework::GermanGaap
}
None => {
let pack = self.primary_pack();
let pack_fw = pack.accounting.framework.as_str();
match pack_fw {
"ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
"dual_reporting" => {
datasynth_standards::framework::AccountingFramework::DualReporting
}
"french_gaap" => {
datasynth_standards::framework::AccountingFramework::FrenchGaap
}
"german_gaap" | "hgb" => {
datasynth_standards::framework::AccountingFramework::GermanGaap
}
_ => datasynth_standards::framework::AccountingFramework::UsGaap,
}
}
};
let mut snapshot = AccountingStandardsSnapshot::default();
if self.config.accounting_standards.revenue_recognition.enabled {
let customer_ids: Vec<String> = self
.master_data
.customers
.iter()
.map(|c| c.customer_id.clone())
.collect();
if !customer_ids.is_empty() {
let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
let contracts = rev_gen.generate(
company_code,
&customer_ids,
start_date,
end_date,
currency,
&self.config.accounting_standards.revenue_recognition,
framework,
);
snapshot.revenue_contract_count = contracts.len();
snapshot.contracts = contracts;
}
}
if self.config.accounting_standards.impairment.enabled {
let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
.master_data
.assets
.iter()
.map(|a| {
(
a.asset_id.clone(),
a.description.clone(),
a.acquisition_cost,
)
})
.collect();
if !asset_data.is_empty() {
let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
let tests = imp_gen.generate(
company_code,
&asset_data,
end_date,
&self.config.accounting_standards.impairment,
framework,
);
snapshot.impairment_test_count = tests.len();
snapshot.impairment_tests = tests;
}
}
if self
.config
.accounting_standards
.business_combinations
.enabled
{
let bc_config = &self.config.accounting_standards.business_combinations;
let framework_str = match framework {
datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
_ => "US_GAAP",
};
let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
let bc_snap = bc_gen.generate(
company_code,
currency,
start_date,
end_date,
bc_config.acquisition_count,
framework_str,
);
snapshot.business_combination_count = bc_snap.combinations.len();
snapshot.business_combination_journal_entries = bc_snap.journal_entries;
snapshot.business_combinations = bc_snap.combinations;
}
if self
.config
.accounting_standards
.expected_credit_loss
.enabled
{
let ecl_config = &self.config.accounting_standards.expected_credit_loss;
let framework_str = match framework {
datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
_ => "ASC_326",
};
let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
let mut ecl_gen = EclGenerator::new(seed + 43);
let bucket_exposures: Vec<(
datasynth_core::models::subledger::ar::AgingBucket,
rust_decimal::Decimal,
)> = if ar_aging_reports.is_empty() {
use datasynth_core::models::subledger::ar::AgingBucket;
vec![
(
AgingBucket::Current,
rust_decimal::Decimal::from(500_000_u32),
),
(
AgingBucket::Days1To30,
rust_decimal::Decimal::from(120_000_u32),
),
(
AgingBucket::Days31To60,
rust_decimal::Decimal::from(45_000_u32),
),
(
AgingBucket::Days61To90,
rust_decimal::Decimal::from(15_000_u32),
),
(
AgingBucket::Over90Days,
rust_decimal::Decimal::from(8_000_u32),
),
]
} else {
use datasynth_core::models::subledger::ar::AgingBucket;
let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
std::collections::HashMap::new();
for report in ar_aging_reports {
for (bucket, amount) in &report.bucket_totals {
*totals.entry(*bucket).or_default() += amount;
}
}
AgingBucket::all()
.into_iter()
.map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
.collect()
};
let ecl_snap = ecl_gen.generate(
company_code,
end_date,
&bucket_exposures,
ecl_config,
&period_label,
framework_str,
);
snapshot.ecl_model_count = ecl_snap.ecl_models.len();
snapshot.ecl_models = ecl_snap.ecl_models;
snapshot.ecl_provision_movements = ecl_snap.provision_movements;
snapshot.ecl_journal_entries = ecl_snap.journal_entries;
}
{
let framework_str = match framework {
datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
_ => "US_GAAP",
};
let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
.max(rust_decimal::Decimal::from(100_000_u32));
let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
let mut prov_gen = ProvisionGenerator::new(seed + 44);
let prov_snap = prov_gen.generate(
company_code,
currency,
revenue_proxy,
end_date,
&period_label,
framework_str,
None, );
snapshot.provision_count = prov_snap.provisions.len();
snapshot.provisions = prov_snap.provisions;
snapshot.provision_movements = prov_snap.movements;
snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
snapshot.provision_journal_entries = prov_snap.journal_entries;
}
{
let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
let presentation_currency = self
.config
.global
.presentation_currency
.clone()
.unwrap_or_else(|| self.config.global.group_currency.clone());
let mut rate_table = FxRateTable::new(&presentation_currency);
let base_rates = base_rates_usd();
for (ccy, rate) in &base_rates {
rate_table.add_rate(FxRate::new(
ccy,
"USD",
RateType::Closing,
end_date,
*rate,
"SYNTHETIC",
));
let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
rate_table.add_rate(FxRate::new(
ccy,
"USD",
RateType::Average,
end_date,
avg,
"SYNTHETIC",
));
}
let mut translation_results = Vec::new();
for company in &self.config.companies {
let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
.max(rust_decimal::Decimal::from(100_000_u32));
let func_ccy = company
.functional_currency
.clone()
.unwrap_or_else(|| company.currency.clone());
let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
&company.code,
&func_ccy,
&presentation_currency,
&ias21_period_label,
end_date,
company_revenue,
&rate_table,
);
translation_results.push(result);
}
snapshot.currency_translation_count = translation_results.len();
snapshot.currency_translation_results = translation_results;
}
stats.revenue_contract_count = snapshot.revenue_contract_count;
stats.impairment_test_count = snapshot.impairment_test_count;
stats.business_combination_count = snapshot.business_combination_count;
stats.ecl_model_count = snapshot.ecl_model_count;
stats.provision_count = snapshot.provision_count;
info!(
"Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
snapshot.revenue_contract_count,
snapshot.impairment_test_count,
snapshot.business_combination_count,
snapshot.ecl_model_count,
snapshot.provision_count,
snapshot.currency_translation_count
);
self.check_resources_with_log("post-accounting-standards")?;
Ok(snapshot)
}
fn phase_manufacturing(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<ManufacturingSnapshot> {
if !self.phase_config.generate_manufacturing {
debug!("Phase 18: Skipped (manufacturing generation disabled)");
return Ok(ManufacturingSnapshot::default());
}
info!("Phase 18: Generating Manufacturing Data");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let material_data: Vec<(String, String)> = self
.master_data
.materials
.iter()
.map(|m| (m.material_id.clone(), m.description.clone()))
.collect();
if material_data.is_empty() {
debug!("Phase 18: Skipped (no materials available)");
return Ok(ManufacturingSnapshot::default());
}
let mut snapshot = ManufacturingSnapshot::default();
let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
let production_orders = prod_gen.generate(
company_code,
&material_data,
start_date,
end_date,
&self.config.manufacturing.production_orders,
&self.config.manufacturing.costing,
&self.config.manufacturing.routing,
);
snapshot.production_order_count = production_orders.len();
let inspection_data: Vec<(String, String, String)> = production_orders
.iter()
.map(|po| {
(
po.order_id.clone(),
po.material_id.clone(),
po.material_description.clone(),
)
})
.collect();
snapshot.production_orders = production_orders;
if !inspection_data.is_empty() {
let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
snapshot.quality_inspection_count = inspections.len();
snapshot.quality_inspections = inspections;
}
let storage_locations: Vec<(String, String)> = material_data
.iter()
.enumerate()
.map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
.collect();
let employee_ids: Vec<String> = self
.master_data
.employees
.iter()
.map(|e| e.employee_id.clone())
.collect();
let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
.with_employee_pool(employee_ids);
let mut cycle_count_total = 0usize;
for month in 0..self.config.global.period_months {
let count_date = start_date + chrono::Months::new(month);
let items_per_count = storage_locations.len().clamp(10, 50);
let cc = cc_gen.generate(
company_code,
&storage_locations,
count_date,
items_per_count,
);
snapshot.cycle_counts.push(cc);
cycle_count_total += 1;
}
snapshot.cycle_count_count = cycle_count_total;
let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
let bom_components = bom_gen.generate(company_code, &material_data);
snapshot.bom_component_count = bom_components.len();
snapshot.bom_components = bom_components;
let currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let production_order_ids: Vec<String> = snapshot
.production_orders
.iter()
.map(|po| po.order_id.clone())
.collect();
let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
let inventory_movements = inv_mov_gen.generate_with_production_orders(
company_code,
&material_data,
start_date,
end_date,
2,
currency,
&production_order_ids,
);
snapshot.inventory_movement_count = inventory_movements.len();
snapshot.inventory_movements = inventory_movements;
stats.production_order_count = snapshot.production_order_count;
stats.quality_inspection_count = snapshot.quality_inspection_count;
stats.cycle_count_count = snapshot.cycle_count_count;
stats.bom_component_count = snapshot.bom_component_count;
stats.inventory_movement_count = snapshot.inventory_movement_count;
info!(
"Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
snapshot.bom_component_count, snapshot.inventory_movement_count
);
self.check_resources_with_log("post-manufacturing")?;
Ok(snapshot)
}
fn phase_sales_kpi_budgets(
&mut self,
coa: &Arc<ChartOfAccounts>,
financial_reporting: &FinancialReportingSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<SalesKpiBudgetsSnapshot> {
if !self.phase_config.generate_sales_kpi_budgets {
debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
return Ok(SalesKpiBudgetsSnapshot::default());
}
info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let mut snapshot = SalesKpiBudgetsSnapshot::default();
if self.config.sales_quotes.enabled {
let customer_data: Vec<(String, String)> = self
.master_data
.customers
.iter()
.map(|c| (c.customer_id.clone(), c.name.clone()))
.collect();
let material_data: Vec<(String, String)> = self
.master_data
.materials
.iter()
.map(|m| (m.material_id.clone(), m.description.clone()))
.collect();
if !customer_data.is_empty() && !material_data.is_empty() {
let employee_ids: Vec<String> = self
.master_data
.employees
.iter()
.map(|e| e.employee_id.clone())
.collect();
let customer_ids: Vec<String> = self
.master_data
.customers
.iter()
.map(|c| c.customer_id.clone())
.collect();
let company_currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
.with_pools(employee_ids, customer_ids);
let quotes = quote_gen.generate_with_currency(
company_code,
&customer_data,
&material_data,
start_date,
end_date,
&self.config.sales_quotes,
company_currency,
);
snapshot.sales_quote_count = quotes.len();
snapshot.sales_quotes = quotes;
}
}
if self.config.financial_reporting.management_kpis.enabled {
let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
let mut kpis = kpi_gen.generate(
company_code,
start_date,
end_date,
&self.config.financial_reporting.management_kpis,
);
{
use rust_decimal::Decimal;
if let Some(income_stmt) =
financial_reporting.financial_statements.iter().find(|fs| {
fs.statement_type == StatementType::IncomeStatement
&& fs.company_code == company_code
})
{
let total_revenue: Decimal = income_stmt
.line_items
.iter()
.filter(|li| li.section.contains("Revenue") && !li.is_total)
.map(|li| li.amount)
.sum();
let total_cogs: Decimal = income_stmt
.line_items
.iter()
.filter(|li| {
(li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
&& !li.is_total
})
.map(|li| li.amount.abs())
.sum();
let total_opex: Decimal = income_stmt
.line_items
.iter()
.filter(|li| {
li.section.contains("Expense")
&& !li.is_total
&& !li.section.contains("Cost")
})
.map(|li| li.amount.abs())
.sum();
if total_revenue > Decimal::ZERO {
let hundred = Decimal::from(100);
let gross_margin_pct =
((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
let operating_income = total_revenue - total_cogs - total_opex;
let op_margin_pct =
(operating_income * hundred / total_revenue).round_dp(2);
for kpi in &mut kpis {
if kpi.name == "Gross Margin" {
kpi.value = gross_margin_pct;
} else if kpi.name == "Operating Margin" {
kpi.value = op_margin_pct;
}
}
}
}
if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
fs.statement_type == StatementType::BalanceSheet
&& fs.company_code == company_code
}) {
let current_assets: Decimal = bs
.line_items
.iter()
.filter(|li| li.section.contains("Current Assets") && !li.is_total)
.map(|li| li.amount)
.sum();
let current_liabilities: Decimal = bs
.line_items
.iter()
.filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
.map(|li| li.amount.abs())
.sum();
if current_liabilities > Decimal::ZERO {
let current_ratio = (current_assets / current_liabilities).round_dp(2);
for kpi in &mut kpis {
if kpi.name == "Current Ratio" {
kpi.value = current_ratio;
}
}
}
}
}
snapshot.kpi_count = kpis.len();
snapshot.kpis = kpis;
}
if self.config.financial_reporting.budgets.enabled {
let account_data: Vec<(String, String)> = coa
.accounts
.iter()
.map(|a| (a.account_number.clone(), a.short_description.clone()))
.collect();
if !account_data.is_empty() {
let fiscal_year = start_date.year() as u32;
let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
let budget = budget_gen.generate(
company_code,
fiscal_year,
&account_data,
&self.config.financial_reporting.budgets,
);
snapshot.budget_line_count = budget.line_items.len();
snapshot.budgets.push(budget);
}
}
stats.sales_quote_count = snapshot.sales_quote_count;
stats.kpi_count = snapshot.kpi_count;
stats.budget_line_count = snapshot.budget_line_count;
info!(
"Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
);
self.check_resources_with_log("post-sales-kpi-budgets")?;
Ok(snapshot)
}
fn compute_pre_tax_income(
company_code: &str,
journal_entries: &[JournalEntry],
) -> rust_decimal::Decimal {
use datasynth_core::accounts::AccountCategory;
use rust_decimal::Decimal;
let mut total_revenue = Decimal::ZERO;
let mut total_expenses = Decimal::ZERO;
for je in journal_entries {
if je.header.company_code != company_code {
continue;
}
for line in &je.lines {
let cat = AccountCategory::from_account(&line.gl_account);
match cat {
AccountCategory::Revenue => {
total_revenue += line.credit_amount - line.debit_amount;
}
AccountCategory::Cogs
| AccountCategory::OperatingExpense
| AccountCategory::OtherIncomeExpense => {
total_expenses += line.debit_amount - line.credit_amount;
}
_ => {}
}
}
}
let pti = (total_revenue - total_expenses).round_dp(2);
if pti == rust_decimal::Decimal::ZERO {
rust_decimal::Decimal::from(1_000_000u32)
} else {
pti
}
}
fn phase_tax_generation(
&mut self,
document_flows: &DocumentFlowSnapshot,
journal_entries: &[JournalEntry],
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<TaxSnapshot> {
if !self.phase_config.generate_tax {
debug!("Phase 20: Skipped (tax generation disabled)");
return Ok(TaxSnapshot::default());
}
info!("Phase 20: Generating Tax Data");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let fiscal_year = start_date.year();
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
seed + 370,
self.config.tax.clone(),
);
let pack = self.primary_pack().clone();
let (jurisdictions, codes) =
gen.generate_from_country_pack(&pack, company_code, fiscal_year);
let mut provisions = Vec::new();
if self.config.tax.provisions.enabled {
let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
for company in &self.config.companies {
let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
let statutory_rate = rust_decimal::Decimal::new(
(self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
2,
);
let provision = provision_gen.generate(
&company.code,
start_date,
pre_tax_income,
statutory_rate,
);
provisions.push(provision);
}
}
let mut tax_lines = Vec::new();
if !codes.is_empty() {
let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
datasynth_generators::TaxLineGeneratorConfig::default(),
codes.clone(),
seed + 372,
);
let buyer_country = self
.config
.companies
.first()
.map(|c| c.country.as_str())
.unwrap_or("US");
for vi in &document_flows.vendor_invoices {
let lines = tax_line_gen.generate_for_document(
datasynth_core::models::TaxableDocumentType::VendorInvoice,
&vi.header.document_id,
buyer_country, buyer_country,
vi.payable_amount,
vi.header.document_date,
None,
);
tax_lines.extend(lines);
}
for ci in &document_flows.customer_invoices {
let lines = tax_line_gen.generate_for_document(
datasynth_core::models::TaxableDocumentType::CustomerInvoice,
&ci.header.document_id,
buyer_country, buyer_country,
ci.total_gross_amount,
ci.header.document_date,
None,
);
tax_lines.extend(lines);
}
}
let deferred_tax = {
let companies: Vec<(&str, &str)> = self
.config
.companies
.iter()
.map(|c| (c.code.as_str(), c.country.as_str()))
.collect();
let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
deferred_gen.generate(&companies, start_date, journal_entries)
};
let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
std::collections::HashMap::new();
for vi in &document_flows.vendor_invoices {
doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
}
for ci in &document_flows.customer_invoices {
doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
}
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let tax_posting_journal_entries = if !tax_lines.is_empty() {
let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
&tax_lines,
company_code,
&doc_dates,
end_date,
);
debug!("Generated {} tax posting JEs", jes.len());
jes
} else {
Vec::new()
};
let snapshot = TaxSnapshot {
jurisdiction_count: jurisdictions.len(),
code_count: codes.len(),
jurisdictions,
codes,
tax_provisions: provisions,
tax_lines,
tax_returns: Vec::new(),
withholding_records: Vec::new(),
tax_anomaly_labels: Vec::new(),
deferred_tax,
tax_posting_journal_entries,
};
stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
stats.tax_code_count = snapshot.code_count;
stats.tax_provision_count = snapshot.tax_provisions.len();
stats.tax_line_count = snapshot.tax_lines.len();
info!(
"Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
snapshot.jurisdiction_count,
snapshot.code_count,
snapshot.tax_provisions.len(),
snapshot.deferred_tax.temporary_differences.len(),
snapshot.deferred_tax.journal_entries.len(),
snapshot.tax_posting_journal_entries.len(),
);
self.check_resources_with_log("post-tax")?;
Ok(snapshot)
}
fn phase_esg_generation(
&mut self,
document_flows: &DocumentFlowSnapshot,
manufacturing: &ManufacturingSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<EsgSnapshot> {
if !self.phase_config.generate_esg {
debug!("Phase 21: Skipped (ESG generation disabled)");
return Ok(EsgSnapshot::default());
}
let degradation = self.check_resources()?;
if degradation >= DegradationLevel::Reduced {
debug!(
"Phase skipped due to resource pressure (degradation: {:?})",
degradation
);
return Ok(EsgSnapshot::default());
}
info!("Phase 21: Generating ESG Data");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let entity_id = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let esg_cfg = &self.config.esg;
let mut snapshot = EsgSnapshot::default();
let mut energy_gen = datasynth_generators::EnergyGenerator::new(
esg_cfg.environmental.energy.clone(),
seed + 80,
);
let energy_records = energy_gen.generate(entity_id, start_date, end_date);
let facility_count = esg_cfg.environmental.energy.facility_count;
let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
snapshot.water = water_gen.generate(entity_id, start_date, end_date);
let mut waste_gen = datasynth_generators::WasteGenerator::new(
seed + 82,
esg_cfg.environmental.waste.diversion_target,
facility_count,
);
snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
let mut emission_gen =
datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
.iter()
.map(|e| datasynth_generators::EnergyInput {
facility_id: e.facility_id.clone(),
energy_type: match e.energy_source {
EnergySourceType::NaturalGas => {
datasynth_generators::EnergyInputType::NaturalGas
}
EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
_ => datasynth_generators::EnergyInputType::Electricity,
},
consumption_kwh: e.consumption_kwh,
period: e.period,
})
.collect();
if !manufacturing.production_orders.is_empty() {
let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
&manufacturing.production_orders,
rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
if !mfg_energy.is_empty() {
info!(
"ESG: {} energy inputs derived from {} production orders",
mfg_energy.len(),
manufacturing.production_orders.len(),
);
energy_inputs.extend(mfg_energy);
}
}
let mut emissions = Vec::new();
emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
for payment in &document_flows.payments {
if payment.is_vendor {
*totals
.entry(payment.business_partner_id.clone())
.or_default() += payment.amount;
}
}
totals
};
let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
.master_data
.vendors
.iter()
.map(|v| {
let spend = vendor_payment_totals
.get(&v.vendor_id)
.copied()
.unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
datasynth_generators::VendorSpendInput {
vendor_id: v.vendor_id.clone(),
category: format!("{:?}", v.vendor_type).to_lowercase(),
spend,
country: v.country.clone(),
}
})
.collect();
if !vendor_spend.is_empty() {
emissions.extend(emission_gen.generate_scope3_purchased_goods(
entity_id,
&vendor_spend,
start_date,
end_date,
));
}
let headcount = self.master_data.employees.len() as u32;
if headcount > 0 {
let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
emissions.extend(emission_gen.generate_scope3_business_travel(
entity_id,
travel_spend,
start_date,
));
emissions
.extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
}
snapshot.emission_count = emissions.len();
snapshot.emissions = emissions;
snapshot.energy = energy_records;
let mut workforce_gen =
datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
let total_headcount = headcount.max(100);
snapshot.diversity =
workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
if !self.master_data.employees.is_empty() {
let hr_diversity = workforce_gen.generate_diversity_from_employees(
entity_id,
&self.master_data.employees,
end_date,
);
if !hr_diversity.is_empty() {
info!(
"ESG: {} diversity metrics derived from {} actual employees",
hr_diversity.len(),
self.master_data.employees.len(),
);
snapshot.diversity.extend(hr_diversity);
}
}
snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
entity_id,
facility_count,
start_date,
end_date,
);
let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
entity_id,
&snapshot.safety_incidents,
total_hours,
start_date,
);
snapshot.safety_metrics = vec![safety_metric];
let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
seed + 85,
esg_cfg.governance.board_size,
esg_cfg.governance.independence_target,
);
snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
esg_cfg.supply_chain_esg.clone(),
seed + 86,
);
let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
.master_data
.vendors
.iter()
.map(|v| datasynth_generators::VendorInput {
vendor_id: v.vendor_id.clone(),
country: v.country.clone(),
industry: format!("{:?}", v.vendor_type).to_lowercase(),
quality_score: None,
})
.collect();
snapshot.supplier_assessments =
supplier_gen.generate(entity_id, &vendor_inputs, start_date);
let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
seed + 87,
esg_cfg.reporting.clone(),
esg_cfg.climate_scenarios.clone(),
);
snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
snapshot.disclosures = disclosure_gen.generate_disclosures(
entity_id,
&snapshot.materiality,
start_date,
end_date,
);
snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
snapshot.disclosure_count = snapshot.disclosures.len();
if esg_cfg.anomaly_rate > 0.0 {
let mut anomaly_injector =
datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
let mut labels = Vec::new();
labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
labels.extend(
anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
);
labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
snapshot.anomaly_labels = labels;
}
stats.esg_emission_count = snapshot.emission_count;
stats.esg_disclosure_count = snapshot.disclosure_count;
info!(
"ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
snapshot.emission_count,
snapshot.disclosure_count,
snapshot.supplier_assessments.len()
);
self.check_resources_with_log("post-esg")?;
Ok(snapshot)
}
fn phase_treasury_data(
&mut self,
document_flows: &DocumentFlowSnapshot,
subledger: &SubledgerSnapshot,
intercompany: &IntercompanySnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<TreasurySnapshot> {
if !self.phase_config.generate_treasury {
debug!("Phase 22: Skipped (treasury generation disabled)");
return Ok(TreasurySnapshot::default());
}
let degradation = self.check_resources()?;
if degradation >= DegradationLevel::Reduced {
debug!(
"Phase skipped due to resource pressure (degradation: {:?})",
degradation
);
return Ok(TreasurySnapshot::default());
}
info!("Phase 22: Generating Treasury Data");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let currency = self
.config
.companies
.first()
.map(|c| c.currency.as_str())
.unwrap_or("USD");
let entity_id = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let mut snapshot = TreasurySnapshot::default();
let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
self.config.treasury.debt.clone(),
seed + 90,
);
snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
self.config.treasury.hedging.clone(),
seed + 91,
);
for debt in &snapshot.debt_instruments {
if debt.rate_type == InterestRateType::Variable {
let swap = hedge_gen.generate_ir_swap(
currency,
debt.principal,
debt.origination_date,
debt.maturity_date,
);
snapshot.hedging_instruments.push(swap);
}
}
{
let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
for payment in &document_flows.payments {
if payment.currency != currency {
let entry = fx_map
.entry(payment.currency.clone())
.or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
entry.0 += payment.amount;
if payment.header.document_date > entry.1 {
entry.1 = payment.header.document_date;
}
}
}
if !fx_map.is_empty() {
let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
.into_iter()
.map(|(foreign_ccy, (net_amount, settlement_date))| {
datasynth_generators::treasury::FxExposure {
currency_pair: format!("{foreign_ccy}/{currency}"),
foreign_currency: foreign_ccy,
net_amount,
settlement_date,
description: "AP payment FX exposure".to_string(),
}
})
.collect();
let (fx_instruments, fx_relationships) =
hedge_gen.generate(start_date, &fx_exposures);
snapshot.hedging_instruments.extend(fx_instruments);
snapshot.hedge_relationships.extend(fx_relationships);
}
}
if self.config.treasury.anomaly_rate > 0.0 {
let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
seed + 92,
self.config.treasury.anomaly_rate,
);
let mut labels = Vec::new();
labels.extend(
anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
);
snapshot.treasury_anomaly_labels = labels;
}
if self.config.treasury.cash_positioning.enabled {
let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
for payment in &document_flows.payments {
cash_flows.push(datasynth_generators::treasury::CashFlow {
date: payment.header.document_date,
account_id: format!("{entity_id}-MAIN"),
amount: payment.amount,
direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
});
}
for chain in &document_flows.o2c_chains {
if let Some(ref receipt) = chain.customer_receipt {
cash_flows.push(datasynth_generators::treasury::CashFlow {
date: receipt.header.document_date,
account_id: format!("{entity_id}-MAIN"),
amount: receipt.amount,
direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
});
}
for receipt in &chain.remainder_receipts {
cash_flows.push(datasynth_generators::treasury::CashFlow {
date: receipt.header.document_date,
account_id: format!("{entity_id}-MAIN"),
amount: receipt.amount,
direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
});
}
}
if !cash_flows.is_empty() {
let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
self.config.treasury.cash_positioning.clone(),
seed + 93,
);
let account_id = format!("{entity_id}-MAIN");
snapshot.cash_positions = cash_gen.generate(
entity_id,
&account_id,
currency,
&cash_flows,
start_date,
start_date + chrono::Months::new(self.config.global.period_months),
rust_decimal::Decimal::new(1_000_000, 0), );
}
}
if self.config.treasury.cash_forecasting.enabled {
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
.ar_invoices
.iter()
.filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
.map(|inv| {
let days_past_due = if inv.due_date < end_date {
(end_date - inv.due_date).num_days().max(0) as u32
} else {
0
};
datasynth_generators::treasury::ArAgingItem {
expected_date: inv.due_date,
amount: inv.amount_remaining,
days_past_due,
document_id: inv.invoice_number.clone(),
}
})
.collect();
let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
.ap_invoices
.iter()
.filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
.map(|inv| datasynth_generators::treasury::ApAgingItem {
payment_date: inv.due_date,
amount: inv.amount_remaining,
document_id: inv.invoice_number.clone(),
})
.collect();
let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
self.config.treasury.cash_forecasting.clone(),
seed + 94,
);
let forecast = forecast_gen.generate(
entity_id,
currency,
end_date,
&ar_items,
&ap_items,
&[], );
snapshot.cash_forecasts.push(forecast);
}
if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
self.config.treasury.cash_pooling.clone(),
seed + 95,
);
let account_ids: Vec<String> = snapshot
.cash_positions
.iter()
.map(|cp| cp.bank_account_id.clone())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
if let Some(pool) =
pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
{
let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
for cp in &snapshot.cash_positions {
latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
}
let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
latest_balances
.into_iter()
.filter(|(id, _)| pool.participant_accounts.contains(id))
.map(
|(id, balance)| datasynth_generators::treasury::AccountBalance {
account_id: id,
balance,
},
)
.collect();
let sweeps =
pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
snapshot.cash_pool_sweeps = sweeps;
snapshot.cash_pools.push(pool);
}
}
if self.config.treasury.bank_guarantees.enabled {
let vendor_names: Vec<String> = self
.master_data
.vendors
.iter()
.map(|v| v.name.clone())
.collect();
if !vendor_names.is_empty() {
let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
self.config.treasury.bank_guarantees.clone(),
seed + 96,
);
snapshot.bank_guarantees =
bg_gen.generate(entity_id, currency, start_date, &vendor_names);
}
}
if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
let entity_ids: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
.matched_pairs
.iter()
.map(|mp| {
(
mp.seller_company.clone(),
mp.buyer_company.clone(),
mp.amount,
)
})
.collect();
if entity_ids.len() >= 2 {
let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
self.config.treasury.netting.clone(),
seed + 97,
);
snapshot.netting_runs = netting_gen.generate(
&entity_ids,
currency,
start_date,
self.config.global.period_months,
&ic_amounts,
);
}
}
{
use datasynth_generators::treasury::TreasuryAccounting;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let mut treasury_jes = Vec::new();
if !snapshot.debt_instruments.is_empty() {
let debt_jes =
TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
debug!("Generated {} debt interest accrual JEs", debt_jes.len());
treasury_jes.extend(debt_jes);
}
if !snapshot.hedging_instruments.is_empty() {
let hedge_jes = TreasuryAccounting::generate_hedge_jes(
&snapshot.hedging_instruments,
&snapshot.hedge_relationships,
end_date,
entity_id,
);
debug!("Generated {} hedge MTM JEs", hedge_jes.len());
treasury_jes.extend(hedge_jes);
}
if !snapshot.cash_pool_sweeps.is_empty() {
let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
&snapshot.cash_pool_sweeps,
entity_id,
);
debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
treasury_jes.extend(sweep_jes);
}
if !treasury_jes.is_empty() {
debug!("Total treasury journal entries: {}", treasury_jes.len());
}
snapshot.journal_entries = treasury_jes;
}
stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
stats.cash_position_count = snapshot.cash_positions.len();
stats.cash_forecast_count = snapshot.cash_forecasts.len();
stats.cash_pool_count = snapshot.cash_pools.len();
info!(
"Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
snapshot.debt_instruments.len(),
snapshot.hedging_instruments.len(),
snapshot.cash_positions.len(),
snapshot.cash_forecasts.len(),
snapshot.cash_pools.len(),
snapshot.bank_guarantees.len(),
snapshot.netting_runs.len(),
snapshot.journal_entries.len(),
);
self.check_resources_with_log("post-treasury")?;
Ok(snapshot)
}
fn phase_project_accounting(
&mut self,
document_flows: &DocumentFlowSnapshot,
hr: &HrSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<ProjectAccountingSnapshot> {
if !self.phase_config.generate_project_accounting {
debug!("Phase 23: Skipped (project accounting disabled)");
return Ok(ProjectAccountingSnapshot::default());
}
let degradation = self.check_resources()?;
if degradation >= DegradationLevel::Reduced {
debug!(
"Phase skipped due to resource pressure (degradation: {:?})",
degradation
);
return Ok(ProjectAccountingSnapshot::default());
}
info!("Phase 23: Generating Project Accounting Data");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let mut snapshot = ProjectAccountingSnapshot::default();
let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
self.config.project_accounting.clone(),
seed + 95,
);
let pool = project_gen.generate(company_code, start_date, end_date);
snapshot.projects = pool.projects.clone();
{
let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
Vec::new();
for te in &hr.time_entries {
let total_hours = te.hours_regular + te.hours_overtime;
if total_hours > 0.0 {
source_docs.push(datasynth_generators::project_accounting::SourceDocument {
id: te.entry_id.clone(),
entity_id: company_code.to_string(),
date: te.date,
amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
.unwrap_or(rust_decimal::Decimal::ZERO),
source_type: CostSourceType::TimeEntry,
hours: Some(
rust_decimal::Decimal::from_f64_retain(total_hours)
.unwrap_or(rust_decimal::Decimal::ZERO),
),
});
}
}
for er in &hr.expense_reports {
source_docs.push(datasynth_generators::project_accounting::SourceDocument {
id: er.report_id.clone(),
entity_id: company_code.to_string(),
date: er.submission_date,
amount: er.total_amount,
source_type: CostSourceType::ExpenseReport,
hours: None,
});
}
for po in &document_flows.purchase_orders {
source_docs.push(datasynth_generators::project_accounting::SourceDocument {
id: po.header.document_id.clone(),
entity_id: company_code.to_string(),
date: po.header.document_date,
amount: po.total_net_amount,
source_type: CostSourceType::PurchaseOrder,
hours: None,
});
}
for vi in &document_flows.vendor_invoices {
source_docs.push(datasynth_generators::project_accounting::SourceDocument {
id: vi.header.document_id.clone(),
entity_id: company_code.to_string(),
date: vi.header.document_date,
amount: vi.payable_amount,
source_type: CostSourceType::VendorInvoice,
hours: None,
});
}
if !source_docs.is_empty() && !pool.projects.is_empty() {
let mut cost_gen =
datasynth_generators::project_accounting::ProjectCostGenerator::new(
self.config.project_accounting.cost_allocation.clone(),
seed + 99,
);
snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
}
}
if self.config.project_accounting.change_orders.enabled {
let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
self.config.project_accounting.change_orders.clone(),
seed + 96,
);
snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
}
if self.config.project_accounting.milestones.enabled {
let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
self.config.project_accounting.milestones.clone(),
seed + 97,
);
snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
}
if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
self.config.project_accounting.earned_value.clone(),
seed + 98,
);
snapshot.earned_value_metrics =
evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
}
if self.config.project_accounting.revenue_recognition.enabled
&& !snapshot.projects.is_empty()
&& !snapshot.cost_lines.is_empty()
{
use datasynth_generators::project_accounting::RevenueGenerator;
let rev_config = self.config.project_accounting.revenue_recognition.clone();
let avg_contract_value =
rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
.unwrap_or(rust_decimal::Decimal::new(500_000, 0));
let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
snapshot
.projects
.iter()
.filter(|p| {
matches!(
p.project_type,
datasynth_core::models::ProjectType::Customer
)
})
.map(|p| {
let cv = if p.budget > rust_decimal::Decimal::ZERO {
(p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
} else {
avg_contract_value
};
let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
})
.collect();
if !contract_values.is_empty() {
let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
snapshot.revenue_records = rev_gen.generate(
&snapshot.projects,
&snapshot.cost_lines,
&contract_values,
start_date,
end_date,
);
debug!(
"Generated {} revenue recognition records for {} customer projects",
snapshot.revenue_records.len(),
contract_values.len()
);
}
}
stats.project_count = snapshot.projects.len();
stats.project_change_order_count = snapshot.change_orders.len();
stats.project_cost_line_count = snapshot.cost_lines.len();
info!(
"Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
snapshot.projects.len(),
snapshot.change_orders.len(),
snapshot.milestones.len(),
snapshot.earned_value_metrics.len()
);
self.check_resources_with_log("post-project-accounting")?;
Ok(snapshot)
}
fn phase_evolution_events(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
if !self.phase_config.generate_evolution_events {
debug!("Phase 24: Skipped (evolution events disabled)");
return Ok((Vec::new(), Vec::new()));
}
info!("Phase 24: Generating Process Evolution + Organizational Events");
let seed = self.seed;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let mut proc_gen =
datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
seed + 100,
);
let process_events = proc_gen.generate_events(start_date, end_date);
let company_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let mut org_gen =
datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
seed + 101,
);
let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
stats.process_evolution_event_count = process_events.len();
stats.organizational_event_count = org_events.len();
info!(
"Evolution events generated: {} process evolution, {} organizational",
process_events.len(),
org_events.len()
);
self.check_resources_with_log("post-evolution-events")?;
Ok((process_events, org_events))
}
fn phase_disruption_events(
&self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
if !self.config.organizational_events.enabled {
debug!("Phase 24b: Skipped (organizational events disabled)");
return Ok(Vec::new());
}
info!("Phase 24b: Generating Disruption Events");
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let company_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
let events = gen.generate(start_date, end_date, &company_codes);
stats.disruption_event_count = events.len();
info!("Disruption events generated: {} events", events.len());
self.check_resources_with_log("post-disruption-events")?;
Ok(events)
}
fn phase_counterfactuals(
&self,
journal_entries: &[JournalEntry],
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
return Ok(Vec::new());
}
info!("Phase 25: Generating Counterfactual Pairs for ML Training");
use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
let mut gen = CounterfactualGenerator::new(self.seed + 110);
let specs = [
CounterfactualSpec::ScaleAmount { factor: 2.5 },
CounterfactualSpec::ShiftDate { days: -14 },
CounterfactualSpec::SelfApprove,
CounterfactualSpec::SplitTransaction { split_count: 3 },
];
let pairs: Vec<_> = journal_entries
.iter()
.enumerate()
.map(|(i, je)| {
let spec = &specs[i % specs.len()];
gen.generate(je, spec)
})
.collect();
stats.counterfactual_pair_count = pairs.len();
info!(
"Counterfactual pairs generated: {} pairs from {} journal entries",
pairs.len(),
journal_entries.len()
);
self.check_resources_with_log("post-counterfactuals")?;
Ok(pairs)
}
fn phase_red_flags(
&self,
anomaly_labels: &AnomalyLabels,
document_flows: &DocumentFlowSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
if !self.config.fraud.enabled {
debug!("Phase 26: Skipped (fraud generation disabled)");
return Ok(Vec::new());
}
info!("Phase 26: Generating Fraud Red-Flag Indicators");
use datasynth_generators::fraud::RedFlagGenerator;
let generator = RedFlagGenerator::new();
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
.labels
.iter()
.filter(|label| label.anomaly_type.is_intentional())
.map(|label| label.document_id.as_str())
.collect();
let mut flags = Vec::new();
for chain in &document_flows.p2p_chains {
let doc_id = &chain.purchase_order.header.document_id;
let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
}
for chain in &document_flows.o2c_chains {
let doc_id = &chain.sales_order.header.document_id;
let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
}
stats.red_flag_count = flags.len();
info!(
"Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
flags.len(),
document_flows.p2p_chains.len(),
document_flows.o2c_chains.len(),
fraud_doc_ids.len()
);
self.check_resources_with_log("post-red-flags")?;
Ok(flags)
}
fn phase_collusion_rings(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
return Ok(Vec::new());
}
info!("Phase 26b: Generating Collusion Rings");
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let months = self.config.global.period_months;
let employee_ids: Vec<String> = self
.master_data
.employees
.iter()
.map(|e| e.employee_id.clone())
.collect();
let vendor_ids: Vec<String> = self
.master_data
.vendors
.iter()
.map(|v| v.vendor_id.clone())
.collect();
let mut generator =
datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
stats.collusion_ring_count = rings.len();
info!(
"Collusion rings generated: {} rings, total members: {}",
rings.len(),
rings
.iter()
.map(datasynth_generators::fraud::CollusionRing::size)
.sum::<usize>()
);
self.check_resources_with_log("post-collusion-rings")?;
Ok(rings)
}
fn phase_temporal_attributes(
&mut self,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<
Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
> {
if !self.config.temporal_attributes.enabled {
debug!("Phase 27: Skipped (temporal attributes disabled)");
return Ok(Vec::new());
}
info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let generate_version_chains = self.config.temporal_attributes.generate_version_chains
|| self.config.temporal_attributes.enabled;
let temporal_config = {
let ta = &self.config.temporal_attributes;
datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
.enabled(ta.enabled)
.closed_probability(ta.valid_time.closed_probability)
.avg_validity_days(ta.valid_time.avg_validity_days)
.avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
.with_version_chains(if generate_version_chains {
ta.avg_versions_per_entity
} else {
1.0
})
.build()
};
let temporal_config = if self
.config
.temporal_attributes
.transaction_time
.allow_backdating
{
let mut c = temporal_config;
c.transaction_time.allow_backdating = true;
c.transaction_time.backdating_probability = self
.config
.temporal_attributes
.transaction_time
.backdating_probability;
c.transaction_time.max_backdate_days = self
.config
.temporal_attributes
.transaction_time
.max_backdate_days;
c
} else {
temporal_config
};
let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
temporal_config,
self.seed + 130,
start_date,
);
let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
self.seed + 130,
datasynth_core::GeneratorType::Vendor,
);
let chains: Vec<_> = self
.master_data
.vendors
.iter()
.map(|vendor| {
let id = uuid_factory.next();
gen.generate_version_chain(vendor.clone(), id)
})
.collect();
stats.temporal_version_chain_count = chains.len();
info!("Temporal version chains generated: {} chains", chains.len());
self.check_resources_with_log("post-temporal-attributes")?;
Ok(chains)
}
fn phase_entity_relationships(
&self,
journal_entries: &[JournalEntry],
document_flows: &DocumentFlowSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<(
Option<datasynth_core::models::EntityGraph>,
Vec<datasynth_core::models::CrossProcessLink>,
)> {
use datasynth_generators::relationships::{
DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
TransactionSummary,
};
let rs_enabled = self.config.relationship_strength.enabled;
let cpl_enabled = self.config.cross_process_links.enabled
|| (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
if !rs_enabled && !cpl_enabled {
debug!(
"Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
);
return Ok((None, Vec::new()));
}
info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
let gen_config = EntityGraphConfig {
enabled: rs_enabled,
cross_process: datasynth_generators::relationships::CrossProcessConfig {
enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
enable_return_flows: false,
enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
1.0
} else {
0.30
},
..Default::default()
},
strength_config: datasynth_generators::relationships::StrengthConfig {
transaction_volume_weight: self
.config
.relationship_strength
.calculation
.transaction_volume_weight,
transaction_count_weight: self
.config
.relationship_strength
.calculation
.transaction_count_weight,
duration_weight: self
.config
.relationship_strength
.calculation
.relationship_duration_weight,
recency_weight: self.config.relationship_strength.calculation.recency_weight,
mutual_connections_weight: self
.config
.relationship_strength
.calculation
.mutual_connections_weight,
recency_half_life_days: self
.config
.relationship_strength
.calculation
.recency_half_life_days,
},
..Default::default()
};
let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
let entity_graph = if rs_enabled {
let vendor_summaries: Vec<EntitySummary> = self
.master_data
.vendors
.iter()
.map(|v| {
EntitySummary::new(
&v.vendor_id,
&v.name,
datasynth_core::models::GraphEntityType::Vendor,
start_date,
)
})
.collect();
let customer_summaries: Vec<EntitySummary> = self
.master_data
.customers
.iter()
.map(|c| {
EntitySummary::new(
&c.customer_id,
&c.name,
datasynth_core::models::GraphEntityType::Customer,
start_date,
)
})
.collect();
let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
std::collections::HashMap::new();
for je in journal_entries {
let cc = je.header.company_code.clone();
let posting_date = je.header.posting_date;
for line in &je.lines {
if let Some(ref tp) = line.trading_partner {
let amount = if line.debit_amount > line.credit_amount {
line.debit_amount
} else {
line.credit_amount
};
let entry = txn_summaries
.entry((cc.clone(), tp.clone()))
.or_insert_with(|| TransactionSummary {
total_volume: rust_decimal::Decimal::ZERO,
transaction_count: 0,
first_transaction_date: posting_date,
last_transaction_date: posting_date,
related_entities: std::collections::HashSet::new(),
});
entry.total_volume += amount;
entry.transaction_count += 1;
if posting_date < entry.first_transaction_date {
entry.first_transaction_date = posting_date;
}
if posting_date > entry.last_transaction_date {
entry.last_transaction_date = posting_date;
}
entry.related_entities.insert(cc.clone());
}
}
}
for chain in &document_flows.p2p_chains {
let cc = chain.purchase_order.header.company_code.clone();
let vendor_id = chain.purchase_order.vendor_id.clone();
let po_date = chain.purchase_order.header.document_date;
let amount = chain.purchase_order.total_net_amount;
let entry = txn_summaries
.entry((cc.clone(), vendor_id))
.or_insert_with(|| TransactionSummary {
total_volume: rust_decimal::Decimal::ZERO,
transaction_count: 0,
first_transaction_date: po_date,
last_transaction_date: po_date,
related_entities: std::collections::HashSet::new(),
});
entry.total_volume += amount;
entry.transaction_count += 1;
if po_date < entry.first_transaction_date {
entry.first_transaction_date = po_date;
}
if po_date > entry.last_transaction_date {
entry.last_transaction_date = po_date;
}
entry.related_entities.insert(cc);
}
for chain in &document_flows.o2c_chains {
let cc = chain.sales_order.header.company_code.clone();
let customer_id = chain.sales_order.customer_id.clone();
let so_date = chain.sales_order.header.document_date;
let amount = chain.sales_order.total_net_amount;
let entry = txn_summaries
.entry((cc.clone(), customer_id))
.or_insert_with(|| TransactionSummary {
total_volume: rust_decimal::Decimal::ZERO,
transaction_count: 0,
first_transaction_date: so_date,
last_transaction_date: so_date,
related_entities: std::collections::HashSet::new(),
});
entry.total_volume += amount;
entry.transaction_count += 1;
if so_date < entry.first_transaction_date {
entry.first_transaction_date = so_date;
}
if so_date > entry.last_transaction_date {
entry.last_transaction_date = so_date;
}
entry.related_entities.insert(cc);
}
let as_of_date = journal_entries
.last()
.map(|je| je.header.posting_date)
.unwrap_or(start_date);
let graph = gen.generate_entity_graph(
company_code,
as_of_date,
&vendor_summaries,
&customer_summaries,
&txn_summaries,
);
info!(
"Entity relationship graph: {} nodes, {} edges",
graph.nodes.len(),
graph.edges.len()
);
stats.entity_relationship_node_count = graph.nodes.len();
stats.entity_relationship_edge_count = graph.edges.len();
Some(graph)
} else {
None
};
let cross_process_links = if cpl_enabled {
let gr_refs: Vec<GoodsReceiptRef> = document_flows
.p2p_chains
.iter()
.flat_map(|chain| {
let vendor_id = chain.purchase_order.vendor_id.clone();
let cc = chain.purchase_order.header.company_code.clone();
chain.goods_receipts.iter().flat_map(move |gr| {
gr.items.iter().filter_map({
let doc_id = gr.header.document_id.clone();
let v_id = vendor_id.clone();
let company = cc.clone();
let receipt_date = gr.header.document_date;
move |item| {
item.base
.material_id
.as_ref()
.map(|mat_id| GoodsReceiptRef {
document_id: doc_id.clone(),
material_id: mat_id.clone(),
quantity: item.base.quantity,
receipt_date,
vendor_id: v_id.clone(),
company_code: company.clone(),
})
}
})
})
})
.collect();
let del_refs: Vec<DeliveryRef> = document_flows
.o2c_chains
.iter()
.flat_map(|chain| {
let customer_id = chain.sales_order.customer_id.clone();
let cc = chain.sales_order.header.company_code.clone();
chain.deliveries.iter().flat_map(move |del| {
let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
del.items.iter().filter_map({
let doc_id = del.header.document_id.clone();
let c_id = customer_id.clone();
let company = cc.clone();
move |item| {
item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
document_id: doc_id.clone(),
material_id: mat_id.clone(),
quantity: item.base.quantity,
delivery_date,
customer_id: c_id.clone(),
company_code: company.clone(),
})
}
})
})
})
.collect();
let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
info!("Cross-process links generated: {} links", links.len());
stats.cross_process_link_count = links.len();
links
} else {
Vec::new()
};
self.check_resources_with_log("post-entity-relationships")?;
Ok((entity_graph, cross_process_links))
}
fn phase_industry_data(
&self,
stats: &mut EnhancedGenerationStatistics,
) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
if !self.config.industry_specific.enabled {
return None;
}
info!("Phase 29: Generating industry-specific data");
let output = datasynth_generators::industry::factory::generate_industry_output(
self.config.global.industry,
);
stats.industry_gl_account_count = output.gl_accounts.len();
info!(
"Industry data generated: {} GL accounts for {:?}",
output.gl_accounts.len(),
self.config.global.industry
);
Some(output)
}
fn phase_opening_balances(
&mut self,
coa: &Arc<ChartOfAccounts>,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<Vec<GeneratedOpeningBalance>> {
if !self.config.balance.generate_opening_balances {
debug!("Phase 3b: Skipped (opening balance generation disabled)");
return Ok(Vec::new());
}
info!("Phase 3b: Generating Opening Balances");
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let fiscal_year = start_date.year();
let industry = match self.config.global.industry {
IndustrySector::Manufacturing => IndustryType::Manufacturing,
IndustrySector::Retail => IndustryType::Retail,
IndustrySector::FinancialServices => IndustryType::Financial,
IndustrySector::Healthcare => IndustryType::Healthcare,
IndustrySector::Technology => IndustryType::Technology,
_ => IndustryType::Manufacturing,
};
let config = datasynth_generators::OpeningBalanceConfig {
industry,
..Default::default()
};
let mut gen =
datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
let mut results = Vec::new();
for company in &self.config.companies {
let spec = OpeningBalanceSpec::new(
company.code.clone(),
start_date,
fiscal_year,
company.currency.clone(),
rust_decimal::Decimal::new(10_000_000, 0),
industry,
);
let ob = gen.generate(&spec, coa, start_date, &company.code);
results.push(ob);
}
stats.opening_balance_count = results.len();
info!("Opening balances generated: {} companies", results.len());
self.check_resources_with_log("post-opening-balances")?;
Ok(results)
}
fn phase_subledger_reconciliation(
&mut self,
subledger: &SubledgerSnapshot,
entries: &[JournalEntry],
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
if !self.config.balance.reconcile_subledgers {
debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
return Ok(Vec::new());
}
info!("Phase 9b: Reconciling GL to subledger balances");
let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map(|d| d + chrono::Months::new(self.config.global.period_months))
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let tracker_config = BalanceTrackerConfig {
validate_on_each_entry: false,
track_history: false,
fail_on_validation_error: false,
..Default::default()
};
let recon_currency = self
.config
.companies
.first()
.map(|c| c.currency.clone())
.unwrap_or_else(|| "USD".to_string());
let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
let validation_errors = tracker.apply_entries(entries);
if !validation_errors.is_empty() {
warn!(
error_count = validation_errors.len(),
"Balance tracker encountered validation errors during subledger reconciliation"
);
for err in &validation_errors {
debug!("Balance validation error: {:?}", err);
}
}
let mut engine = datasynth_generators::ReconciliationEngine::new(
datasynth_generators::ReconciliationConfig::default(),
);
let mut results = Vec::new();
let company_code = self
.config
.companies
.first()
.map(|c| c.code.as_str())
.unwrap_or("1000");
if !subledger.ar_invoices.is_empty() {
let gl_balance = tracker
.get_account_balance(
company_code,
datasynth_core::accounts::control_accounts::AR_CONTROL,
)
.map(|b| b.closing_balance)
.unwrap_or_default();
let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
}
if !subledger.ap_invoices.is_empty() {
let gl_balance = tracker
.get_account_balance(
company_code,
datasynth_core::accounts::control_accounts::AP_CONTROL,
)
.map(|b| b.closing_balance)
.unwrap_or_default();
let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
}
if !subledger.fa_records.is_empty() {
let gl_asset_balance = tracker
.get_account_balance(
company_code,
datasynth_core::accounts::control_accounts::FIXED_ASSETS,
)
.map(|b| b.closing_balance)
.unwrap_or_default();
let gl_accum_depr_balance = tracker
.get_account_balance(
company_code,
datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
)
.map(|b| b.closing_balance)
.unwrap_or_default();
let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
subledger.fa_records.iter().collect();
let (asset_recon, depr_recon) = engine.reconcile_fa(
company_code,
end_date,
gl_asset_balance,
gl_accum_depr_balance,
&fa_refs,
);
results.push(asset_recon);
results.push(depr_recon);
}
if !subledger.inventory_positions.is_empty() {
let gl_balance = tracker
.get_account_balance(
company_code,
datasynth_core::accounts::control_accounts::INVENTORY,
)
.map(|b| b.closing_balance)
.unwrap_or_default();
let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
subledger.inventory_positions.iter().collect();
results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
}
stats.subledger_reconciliation_count = results.len();
let passed = results.iter().filter(|r| r.is_balanced()).count();
let failed = results.len() - passed;
info!(
"Subledger reconciliation: {} checks, {} passed, {} failed",
results.len(),
passed,
failed
);
self.check_resources_with_log("post-subledger-reconciliation")?;
Ok(results)
}
fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
let coa_framework = self.resolve_coa_framework();
let mut gen = ChartOfAccountsGenerator::new(
self.config.chart_of_accounts.complexity,
self.config.global.industry,
self.seed,
)
.with_coa_framework(coa_framework);
let coa = Arc::new(gen.generate());
self.coa = Some(Arc::clone(&coa));
if let Some(pb) = pb {
pb.finish_with_message("Chart of Accounts complete");
}
Ok(coa)
}
fn generate_master_data(&mut self) -> SynthResult<()> {
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
let pack = self.primary_pack().clone();
let vendors_per_company = self.phase_config.vendors_per_company;
let customers_per_company = self.phase_config.customers_per_company;
let materials_per_company = self.phase_config.materials_per_company;
let assets_per_company = self.phase_config.assets_per_company;
let coa_framework = self.resolve_coa_framework();
let per_company_results: Vec<_> = self
.config
.companies
.par_iter()
.enumerate()
.map(|(i, company)| {
let company_seed = self.seed.wrapping_add(i as u64 * 1000);
let pack = pack.clone();
let mut vendor_gen = VendorGenerator::new(company_seed);
vendor_gen.set_country_pack(pack.clone());
vendor_gen.set_coa_framework(coa_framework);
vendor_gen.set_counter_offset(i * vendors_per_company);
if self.config.vendor_network.enabled {
let vn = &self.config.vendor_network;
vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
enabled: true,
depth: vn.depth,
tier1_count: datasynth_generators::TierCountConfig::new(
vn.tier1.min,
vn.tier1.max,
),
tier2_per_parent: datasynth_generators::TierCountConfig::new(
vn.tier2_per_parent.min,
vn.tier2_per_parent.max,
),
tier3_per_parent: datasynth_generators::TierCountConfig::new(
vn.tier3_per_parent.min,
vn.tier3_per_parent.max,
),
cluster_distribution: datasynth_generators::ClusterDistribution {
reliable_strategic: vn.clusters.reliable_strategic,
standard_operational: vn.clusters.standard_operational,
transactional: vn.clusters.transactional,
problematic: vn.clusters.problematic,
},
concentration_limits: datasynth_generators::ConcentrationLimits {
max_single_vendor: vn.dependencies.max_single_vendor_concentration,
max_top5: vn.dependencies.top_5_concentration,
},
..datasynth_generators::VendorNetworkConfig::default()
});
}
let vendor_pool =
vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
let mut customer_gen = CustomerGenerator::new(company_seed + 100);
customer_gen.set_country_pack(pack.clone());
customer_gen.set_coa_framework(coa_framework);
customer_gen.set_counter_offset(i * customers_per_company);
if self.config.customer_segmentation.enabled {
let cs = &self.config.customer_segmentation;
let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
enabled: true,
segment_distribution: datasynth_generators::SegmentDistribution {
enterprise: cs.value_segments.enterprise.customer_share,
mid_market: cs.value_segments.mid_market.customer_share,
smb: cs.value_segments.smb.customer_share,
consumer: cs.value_segments.consumer.customer_share,
},
referral_config: datasynth_generators::ReferralConfig {
enabled: cs.networks.referrals.enabled,
referral_rate: cs.networks.referrals.referral_rate,
..Default::default()
},
hierarchy_config: datasynth_generators::HierarchyConfig {
enabled: cs.networks.corporate_hierarchies.enabled,
hierarchy_rate: cs.networks.corporate_hierarchies.probability,
..Default::default()
},
..Default::default()
};
customer_gen.set_segmentation_config(seg_cfg);
}
let customer_pool = customer_gen.generate_customer_pool(
customers_per_company,
&company.code,
start_date,
);
let mut material_gen = MaterialGenerator::new(company_seed + 200);
material_gen.set_country_pack(pack.clone());
material_gen.set_counter_offset(i * materials_per_company);
let material_pool = material_gen.generate_material_pool(
materials_per_company,
&company.code,
start_date,
);
let mut asset_gen = AssetGenerator::new(company_seed + 300);
let asset_pool = asset_gen.generate_asset_pool(
assets_per_company,
&company.code,
(start_date, end_date),
);
let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
employee_gen.set_country_pack(pack);
let employee_pool =
employee_gen.generate_company_pool(&company.code, (start_date, end_date));
let employee_change_history =
employee_gen.generate_all_change_history(&employee_pool, end_date);
let employee_ids: Vec<String> = employee_pool
.employees
.iter()
.map(|e| e.employee_id.clone())
.collect();
let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
(
vendor_pool.vendors,
customer_pool.customers,
material_pool.materials,
asset_pool.assets,
employee_pool.employees,
employee_change_history,
cost_centers,
)
})
.collect();
for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
per_company_results
{
self.master_data.vendors.extend(vendors);
self.master_data.customers.extend(customers);
self.master_data.materials.extend(materials);
self.master_data.assets.extend(assets);
self.master_data.employees.extend(employees);
self.master_data.cost_centers.extend(cost_centers);
self.master_data
.employee_change_history
.extend(change_history);
}
if let Some(pb) = &pb {
pb.inc(total);
}
if let Some(pb) = pb {
pb.finish_with_message("Master data generation complete");
}
Ok(())
}
fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let months = (self.config.global.period_months as usize).max(1);
let p2p_count = self
.phase_config
.p2p_chains
.min(self.master_data.vendors.len() * 2 * months);
let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
p2p_gen.set_country_pack(self.primary_pack().clone());
for i in 0..p2p_count {
let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
let materials: Vec<&Material> = self
.master_data
.materials
.iter()
.skip(i % self.master_data.materials.len().max(1))
.take(2.min(self.master_data.materials.len()))
.collect();
if materials.is_empty() {
continue;
}
let company = &self.config.companies[i % self.config.companies.len()];
let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
let fiscal_period = po_date.month() as u8;
let created_by = if self.master_data.employees.is_empty() {
"SYSTEM"
} else {
self.master_data.employees[i % self.master_data.employees.len()]
.user_id
.as_str()
};
let chain = p2p_gen.generate_chain(
&company.code,
vendor,
&materials,
po_date,
start_date.year() as u16,
fiscal_period,
created_by,
);
flows.purchase_orders.push(chain.purchase_order.clone());
flows.goods_receipts.extend(chain.goods_receipts.clone());
if let Some(vi) = &chain.vendor_invoice {
flows.vendor_invoices.push(vi.clone());
}
if let Some(payment) = &chain.payment {
flows.payments.push(payment.clone());
}
for remainder in &chain.remainder_payments {
flows.payments.push(remainder.clone());
}
flows.p2p_chains.push(chain);
if let Some(pb) = &pb {
pb.inc(1);
}
}
if let Some(pb) = pb {
pb.finish_with_message("P2P document flows complete");
}
let o2c_count = self
.phase_config
.o2c_chains
.min(self.master_data.customers.len() * 2 * months);
let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
o2c_gen.set_country_pack(self.primary_pack().clone());
for i in 0..o2c_count {
let customer = &self.master_data.customers[i % self.master_data.customers.len()];
let materials: Vec<&Material> = self
.master_data
.materials
.iter()
.skip(i % self.master_data.materials.len().max(1))
.take(2.min(self.master_data.materials.len()))
.collect();
if materials.is_empty() {
continue;
}
let company = &self.config.companies[i % self.config.companies.len()];
let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
let fiscal_period = so_date.month() as u8;
let created_by = if self.master_data.employees.is_empty() {
"SYSTEM"
} else {
self.master_data.employees[i % self.master_data.employees.len()]
.user_id
.as_str()
};
let chain = o2c_gen.generate_chain(
&company.code,
customer,
&materials,
so_date,
start_date.year() as u16,
fiscal_period,
created_by,
);
flows.sales_orders.push(chain.sales_order.clone());
flows.deliveries.extend(chain.deliveries.clone());
if let Some(ci) = &chain.customer_invoice {
flows.customer_invoices.push(ci.clone());
}
if let Some(receipt) = &chain.customer_receipt {
flows.payments.push(receipt.clone());
}
for receipt in &chain.remainder_receipts {
flows.payments.push(receipt.clone());
}
flows.o2c_chains.push(chain);
if let Some(pb) = &pb {
pb.inc(1);
}
}
if let Some(pb) = pb {
pb.finish_with_message("O2C document flows complete");
}
{
let mut refs = Vec::new();
for doc in &flows.purchase_orders {
refs.extend(doc.header.document_references.iter().cloned());
}
for doc in &flows.goods_receipts {
refs.extend(doc.header.document_references.iter().cloned());
}
for doc in &flows.vendor_invoices {
refs.extend(doc.header.document_references.iter().cloned());
}
for doc in &flows.sales_orders {
refs.extend(doc.header.document_references.iter().cloned());
}
for doc in &flows.deliveries {
refs.extend(doc.header.document_references.iter().cloned());
}
for doc in &flows.customer_invoices {
refs.extend(doc.header.document_references.iter().cloned());
}
for doc in &flows.payments {
refs.extend(doc.header.document_references.iter().cloned());
}
debug!(
"Collected {} document cross-references from document headers",
refs.len()
);
flows.document_references = refs;
}
Ok(())
}
fn generate_journal_entries(
&mut self,
coa: &Arc<ChartOfAccounts>,
) -> SynthResult<Vec<JournalEntry>> {
use datasynth_core::traits::ParallelGenerator;
let total = self.calculate_total_transactions();
let pb = self.create_progress_bar(total, "Generating Journal Entries");
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let end_date = start_date + chrono::Months::new(self.config.global.period_months);
let company_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let mut generator = JournalEntryGenerator::new_with_params(
self.config.transactions.clone(),
Arc::clone(coa),
company_codes,
start_date,
end_date,
self.seed,
);
let bp = &self.config.business_processes;
generator.set_business_process_weights(
bp.o2c_weight,
bp.p2p_weight,
bp.r2r_weight,
bp.h2r_weight,
bp.a2r_weight,
);
let generator = generator;
let je_pack = self.primary_pack();
let mut generator = generator
.with_master_data(
&self.master_data.vendors,
&self.master_data.customers,
&self.master_data.materials,
)
.with_country_pack_names(je_pack)
.with_country_pack_temporal(
self.config.temporal_patterns.clone(),
self.seed + 200,
je_pack,
)
.with_persona_errors(true)
.with_fraud_config(self.config.fraud.clone());
if self.config.temporal.enabled {
let drift_config = self.config.temporal.to_core_config();
generator = generator.with_drift_config(drift_config, self.seed + 100);
}
self.check_memory_limit()?;
let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
let entries = if total >= 10_000 && num_threads > 1 {
let sub_generators = generator.split(num_threads);
let entries_per_thread = total as usize / num_threads;
let remainder = total as usize % num_threads;
let batches: Vec<Vec<JournalEntry>> = sub_generators
.into_par_iter()
.enumerate()
.map(|(i, mut gen)| {
let count = entries_per_thread + if i < remainder { 1 } else { 0 };
gen.generate_batch(count)
})
.collect();
let entries = JournalEntryGenerator::merge_results(batches);
if let Some(pb) = &pb {
pb.inc(total);
}
entries
} else {
let mut entries = Vec::with_capacity(total as usize);
for _ in 0..total {
let entry = generator.generate();
entries.push(entry);
if let Some(pb) = &pb {
pb.inc(1);
}
}
entries
};
if let Some(pb) = pb {
pb.finish_with_message("Journal entries complete");
}
Ok(entries)
}
fn generate_jes_from_document_flows(
&mut self,
flows: &DocumentFlowSnapshot,
) -> SynthResult<Vec<JournalEntry>> {
let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
let je_config = match self.resolve_coa_framework() {
CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
CoAFramework::GermanSkr04 => {
let fa = datasynth_core::FrameworkAccounts::german_gaap();
DocumentFlowJeConfig::from(&fa)
}
CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
};
let populate_fec = je_config.populate_fec_fields;
let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
if populate_fec {
let mut aux_lookup = std::collections::HashMap::new();
for vendor in &self.master_data.vendors {
if let Some(ref aux) = vendor.auxiliary_gl_account {
aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
}
}
for customer in &self.master_data.customers {
if let Some(ref aux) = customer.auxiliary_gl_account {
aux_lookup.insert(customer.customer_id.clone(), aux.clone());
}
}
if !aux_lookup.is_empty() {
generator.set_auxiliary_account_lookup(aux_lookup);
}
}
let mut entries = Vec::new();
for chain in &flows.p2p_chains {
let chain_entries = generator.generate_from_p2p_chain(chain);
entries.extend(chain_entries);
if let Some(pb) = &pb {
pb.inc(1);
}
}
for chain in &flows.o2c_chains {
let chain_entries = generator.generate_from_o2c_chain(chain);
entries.extend(chain_entries);
if let Some(pb) = &pb {
pb.inc(1);
}
}
if let Some(pb) = pb {
pb.finish_with_message(format!(
"Generated {} JEs from document flows",
entries.len()
));
}
Ok(entries)
}
fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
use datasynth_core::accounts::{expense_accounts, suspense_accounts};
let mut jes = Vec::with_capacity(payroll_runs.len());
for run in payroll_runs {
let mut je = JournalEntry::new_simple(
format!("JE-PAYROLL-{}", run.payroll_id),
run.company_code.clone(),
run.run_date,
format!("Payroll {}", run.payroll_id),
);
je.add_line(JournalEntryLine {
line_number: 1,
gl_account: expense_accounts::SALARIES_WAGES.to_string(),
debit_amount: run.total_gross,
reference: Some(run.payroll_id.clone()),
text: Some(format!(
"Payroll {} ({} employees)",
run.payroll_id, run.employee_count
)),
..Default::default()
});
je.add_line(JournalEntryLine {
line_number: 2,
gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
credit_amount: run.total_gross,
reference: Some(run.payroll_id.clone()),
..Default::default()
});
jes.push(je);
}
jes
}
fn link_document_flows_to_subledgers(
&mut self,
flows: &DocumentFlowSnapshot,
) -> SynthResult<SubledgerSnapshot> {
let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
let vendor_names: std::collections::HashMap<String, String> = self
.master_data
.vendors
.iter()
.map(|v| (v.vendor_id.clone(), v.name.clone()))
.collect();
let customer_names: std::collections::HashMap<String, String> = self
.master_data
.customers
.iter()
.map(|c| (c.customer_id.clone(), c.name.clone()))
.collect();
let mut linker = DocumentFlowLinker::new()
.with_vendor_names(vendor_names)
.with_customer_names(customer_names);
let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
if let Some(pb) = &pb {
pb.inc(flows.vendor_invoices.len() as u64);
}
let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
if let Some(pb) = &pb {
pb.inc(flows.customer_invoices.len() as u64);
}
if let Some(pb) = pb {
pb.finish_with_message(format!(
"Linked {} AP and {} AR invoices",
ap_invoices.len(),
ar_invoices.len()
));
}
Ok(SubledgerSnapshot {
ap_invoices,
ar_invoices,
fa_records: Vec::new(),
inventory_positions: Vec::new(),
inventory_movements: Vec::new(),
ar_aging_reports: Vec::new(),
ap_aging_reports: Vec::new(),
depreciation_runs: Vec::new(),
inventory_valuations: Vec::new(),
dunning_runs: Vec::new(),
dunning_letters: Vec::new(),
})
}
#[allow(clippy::too_many_arguments)]
fn generate_ocpm_events(
&mut self,
flows: &DocumentFlowSnapshot,
sourcing: &SourcingSnapshot,
hr: &HrSnapshot,
manufacturing: &ManufacturingSnapshot,
banking: &BankingSnapshot,
audit: &AuditSnapshot,
financial_reporting: &FinancialReportingSnapshot,
) -> SynthResult<OcpmSnapshot> {
let total_chains = flows.p2p_chains.len()
+ flows.o2c_chains.len()
+ sourcing.sourcing_projects.len()
+ hr.payroll_runs.len()
+ manufacturing.production_orders.len()
+ banking.customers.len()
+ audit.engagements.len()
+ financial_reporting.bank_reconciliations.len();
let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
let ocpm_config = OcpmGeneratorConfig {
generate_p2p: true,
generate_o2c: true,
generate_s2c: !sourcing.sourcing_projects.is_empty(),
generate_h2r: !hr.payroll_runs.is_empty(),
generate_mfg: !manufacturing.production_orders.is_empty(),
generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
generate_bank: !banking.customers.is_empty(),
generate_audit: !audit.engagements.is_empty(),
happy_path_rate: 0.75,
exception_path_rate: 0.20,
error_path_rate: 0.05,
add_duration_variability: true,
duration_std_dev_factor: 0.3,
};
let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
let available_users: Vec<String> = self
.master_data
.employees
.iter()
.take(20)
.map(|e| e.user_id.clone())
.collect();
let fallback_date =
NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.unwrap_or(fallback_date);
let base_midnight = base_date
.and_hms_opt(0, 0, 0)
.expect("midnight is always valid");
let base_datetime =
chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
let add_result = |event_log: &mut OcpmEventLog,
result: datasynth_ocpm::CaseGenerationResult| {
for event in result.events {
event_log.add_event(event);
}
for object in result.objects {
event_log.add_object(object);
}
for relationship in result.relationships {
event_log.add_relationship(relationship);
}
for corr in result.correlation_events {
event_log.add_correlation_event(corr);
}
event_log.add_case(result.case_trace);
};
for chain in &flows.p2p_chains {
let po = &chain.purchase_order;
let documents = P2pDocuments::new(
&po.header.document_id,
&po.vendor_id,
&po.header.company_code,
po.total_net_amount,
&po.header.currency,
&ocpm_uuid_factory,
)
.with_goods_receipt(
chain
.goods_receipts
.first()
.map(|gr| gr.header.document_id.as_str())
.unwrap_or(""),
&ocpm_uuid_factory,
)
.with_invoice(
chain
.vendor_invoice
.as_ref()
.map(|vi| vi.header.document_id.as_str())
.unwrap_or(""),
&ocpm_uuid_factory,
)
.with_payment(
chain
.payment
.as_ref()
.map(|p| p.header.document_id.as_str())
.unwrap_or(""),
&ocpm_uuid_factory,
);
let start_time =
chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
add_result(&mut event_log, result);
if let Some(pb) = &pb {
pb.inc(1);
}
}
for chain in &flows.o2c_chains {
let so = &chain.sales_order;
let documents = O2cDocuments::new(
&so.header.document_id,
&so.customer_id,
&so.header.company_code,
so.total_net_amount,
&so.header.currency,
&ocpm_uuid_factory,
)
.with_delivery(
chain
.deliveries
.first()
.map(|d| d.header.document_id.as_str())
.unwrap_or(""),
&ocpm_uuid_factory,
)
.with_invoice(
chain
.customer_invoice
.as_ref()
.map(|ci| ci.header.document_id.as_str())
.unwrap_or(""),
&ocpm_uuid_factory,
)
.with_receipt(
chain
.customer_receipt
.as_ref()
.map(|r| r.header.document_id.as_str())
.unwrap_or(""),
&ocpm_uuid_factory,
);
let start_time =
chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
add_result(&mut event_log, result);
if let Some(pb) = &pb {
pb.inc(1);
}
}
for project in &sourcing.sourcing_projects {
let vendor_id = sourcing
.contracts
.iter()
.find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
.map(|c| c.vendor_id.clone())
.or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
.or_else(|| {
self.master_data
.vendors
.first()
.map(|v| v.vendor_id.clone())
})
.unwrap_or_else(|| "V000".to_string());
let mut docs = S2cDocuments::new(
&project.project_id,
&vendor_id,
&project.company_code,
project.estimated_annual_spend,
&ocpm_uuid_factory,
);
if let Some(rfx) = sourcing
.rfx_events
.iter()
.find(|r| r.sourcing_project_id == project.project_id)
{
docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
if let Some(bid) = sourcing.bids.iter().find(|b| {
b.rfx_id == rfx.rfx_id
&& b.status == datasynth_core::models::sourcing::BidStatus::Accepted
}) {
docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
}
}
if let Some(contract) = sourcing
.contracts
.iter()
.find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
{
docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
}
let start_time = base_datetime - chrono::Duration::days(90);
let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
add_result(&mut event_log, result);
if let Some(pb) = &pb {
pb.inc(1);
}
}
for run in &hr.payroll_runs {
let employee_id = hr
.payroll_line_items
.iter()
.find(|li| li.payroll_id == run.payroll_id)
.map(|li| li.employee_id.as_str())
.unwrap_or("EMP000");
let docs = H2rDocuments::new(
&run.payroll_id,
employee_id,
&run.company_code,
run.total_gross,
&ocpm_uuid_factory,
)
.with_time_entries(
hr.time_entries
.iter()
.filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
.take(5)
.map(|t| t.entry_id.as_str())
.collect(),
);
let start_time = base_datetime - chrono::Duration::days(30);
let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
add_result(&mut event_log, result);
if let Some(pb) = &pb {
pb.inc(1);
}
}
for order in &manufacturing.production_orders {
let mut docs = MfgDocuments::new(
&order.order_id,
&order.material_id,
&order.company_code,
order.planned_quantity,
&ocpm_uuid_factory,
)
.with_operations(
order
.operations
.iter()
.map(|o| format!("OP-{:04}", o.operation_number))
.collect::<Vec<_>>()
.iter()
.map(std::string::String::as_str)
.collect(),
);
if let Some(insp) = manufacturing
.quality_inspections
.iter()
.find(|i| i.reference_id == order.order_id)
{
docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
}
if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
cc.items
.iter()
.any(|item| item.material_id == order.material_id)
}) {
docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
}
let start_time = base_datetime - chrono::Duration::days(60);
let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
add_result(&mut event_log, result);
if let Some(pb) = &pb {
pb.inc(1);
}
}
for customer in &banking.customers {
let customer_id_str = customer.customer_id.to_string();
let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
if let Some(account) = banking
.accounts
.iter()
.find(|a| a.primary_owner_id == customer.customer_id)
{
let account_id_str = account.account_id.to_string();
docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
let txn_strs: Vec<String> = banking
.transactions
.iter()
.filter(|t| t.account_id == account.account_id)
.take(10)
.map(|t| t.transaction_id.to_string())
.collect();
let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
let txn_amounts: Vec<rust_decimal::Decimal> = banking
.transactions
.iter()
.filter(|t| t.account_id == account.account_id)
.take(10)
.map(|t| t.amount)
.collect();
if !txn_ids.is_empty() {
docs = docs.with_transactions(txn_ids, txn_amounts);
}
}
let start_time = base_datetime - chrono::Duration::days(180);
let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
add_result(&mut event_log, result);
if let Some(pb) = &pb {
pb.inc(1);
}
}
for engagement in &audit.engagements {
let engagement_id_str = engagement.engagement_id.to_string();
let docs = AuditDocuments::new(
&engagement_id_str,
&engagement.client_entity_id,
&ocpm_uuid_factory,
)
.with_workpapers(
audit
.workpapers
.iter()
.filter(|w| w.engagement_id == engagement.engagement_id)
.take(10)
.map(|w| w.workpaper_id.to_string())
.collect::<Vec<_>>()
.iter()
.map(std::string::String::as_str)
.collect(),
)
.with_evidence(
audit
.evidence
.iter()
.filter(|e| e.engagement_id == engagement.engagement_id)
.take(10)
.map(|e| e.evidence_id.to_string())
.collect::<Vec<_>>()
.iter()
.map(std::string::String::as_str)
.collect(),
)
.with_risks(
audit
.risk_assessments
.iter()
.filter(|r| r.engagement_id == engagement.engagement_id)
.take(5)
.map(|r| r.risk_id.to_string())
.collect::<Vec<_>>()
.iter()
.map(std::string::String::as_str)
.collect(),
)
.with_findings(
audit
.findings
.iter()
.filter(|f| f.engagement_id == engagement.engagement_id)
.take(5)
.map(|f| f.finding_id.to_string())
.collect::<Vec<_>>()
.iter()
.map(std::string::String::as_str)
.collect(),
)
.with_judgments(
audit
.judgments
.iter()
.filter(|j| j.engagement_id == engagement.engagement_id)
.take(5)
.map(|j| j.judgment_id.to_string())
.collect::<Vec<_>>()
.iter()
.map(std::string::String::as_str)
.collect(),
);
let start_time = base_datetime - chrono::Duration::days(120);
let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
add_result(&mut event_log, result);
if let Some(pb) = &pb {
pb.inc(1);
}
}
for recon in &financial_reporting.bank_reconciliations {
let docs = BankReconDocuments::new(
&recon.reconciliation_id,
&recon.bank_account_id,
&recon.company_code,
recon.bank_ending_balance,
&ocpm_uuid_factory,
)
.with_statement_lines(
recon
.statement_lines
.iter()
.take(20)
.map(|l| l.line_id.as_str())
.collect(),
)
.with_reconciling_items(
recon
.reconciling_items
.iter()
.take(10)
.map(|i| i.item_id.as_str())
.collect(),
);
let start_time = base_datetime - chrono::Duration::days(30);
let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
add_result(&mut event_log, result);
if let Some(pb) = &pb {
pb.inc(1);
}
}
event_log.compute_variants();
let summary = event_log.summary();
if let Some(pb) = pb {
pb.finish_with_message(format!(
"Generated {} OCPM events, {} objects",
summary.event_count, summary.object_count
));
}
Ok(OcpmSnapshot {
event_count: summary.event_count,
object_count: summary.object_count,
case_count: summary.case_count,
event_log: Some(event_log),
})
}
fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
let total_rate = if self.config.anomaly_injection.enabled {
self.config.anomaly_injection.rates.total_rate
} else if self.config.fraud.enabled {
self.config.fraud.fraud_rate
} else {
0.02
};
let fraud_rate = if self.config.anomaly_injection.enabled {
self.config.anomaly_injection.rates.fraud_rate
} else {
AnomalyRateConfig::default().fraud_rate
};
let error_rate = if self.config.anomaly_injection.enabled {
self.config.anomaly_injection.rates.error_rate
} else {
AnomalyRateConfig::default().error_rate
};
let process_issue_rate = if self.config.anomaly_injection.enabled {
self.config.anomaly_injection.rates.process_rate
} else {
AnomalyRateConfig::default().process_issue_rate
};
let anomaly_config = AnomalyInjectorConfig {
rates: AnomalyRateConfig {
total_rate,
fraud_rate,
error_rate,
process_issue_rate,
..Default::default()
},
seed: self.seed + 5000,
..Default::default()
};
let mut injector = AnomalyInjector::new(anomaly_config);
let result = injector.process_entries(entries);
if let Some(pb) = &pb {
pb.inc(entries.len() as u64);
pb.finish_with_message("Anomaly injection complete");
}
let mut by_type = HashMap::new();
for label in &result.labels {
*by_type
.entry(format!("{:?}", label.anomaly_type))
.or_insert(0) += 1;
}
Ok(AnomalyLabels {
labels: result.labels,
summary: Some(result.summary),
by_type,
})
}
fn validate_journal_entries(
&mut self,
entries: &[JournalEntry],
) -> SynthResult<BalanceValidationResult> {
let clean_entries: Vec<&JournalEntry> = entries
.iter()
.filter(|e| {
e.header
.header_text
.as_ref()
.map(|t| !t.contains("[HUMAN_ERROR:"))
.unwrap_or(true)
})
.collect();
let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
let config = BalanceTrackerConfig {
validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
};
let validation_currency = self
.config
.companies
.first()
.map(|c| c.currency.clone())
.unwrap_or_else(|| "USD".to_string());
let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
let errors = tracker.apply_entries(&clean_refs);
if let Some(pb) = &pb {
pb.inc(entries.len() as u64);
}
let has_unbalanced = tracker
.get_validation_errors()
.iter()
.any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
let mut all_errors = errors;
all_errors.extend(tracker.get_validation_errors().iter().cloned());
let company_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map(|d| d + chrono::Months::new(self.config.global.period_months))
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
for company_code in &company_codes {
if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
all_errors.push(e);
}
}
let stats = tracker.get_statistics();
let is_balanced = all_errors.is_empty();
if let Some(pb) = pb {
let msg = if is_balanced {
"Balance validation passed"
} else {
"Balance validation completed with errors"
};
pb.finish_with_message(msg);
}
Ok(BalanceValidationResult {
validated: true,
is_balanced,
entries_processed: stats.entries_processed,
total_debits: stats.total_debits,
total_credits: stats.total_credits,
accounts_tracked: stats.accounts_tracked,
companies_tracked: stats.companies_tracked,
validation_errors: all_errors,
has_unbalanced_entries: has_unbalanced,
})
}
fn inject_data_quality(
&mut self,
entries: &mut [JournalEntry],
) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
let config = if self.config.data_quality.enabled {
let dq = &self.config.data_quality;
DataQualityConfig {
enable_missing_values: dq.missing_values.enabled,
missing_values: datasynth_generators::MissingValueConfig {
global_rate: dq.effective_missing_rate(),
..Default::default()
},
enable_format_variations: dq.format_variations.enabled,
format_variations: datasynth_generators::FormatVariationConfig {
date_variation_rate: dq.format_variations.dates.rate,
amount_variation_rate: dq.format_variations.amounts.rate,
identifier_variation_rate: dq.format_variations.identifiers.rate,
..Default::default()
},
enable_duplicates: dq.duplicates.enabled,
duplicates: datasynth_generators::DuplicateConfig {
duplicate_rate: dq.effective_duplicate_rate(),
..Default::default()
},
enable_typos: dq.typos.enabled,
typos: datasynth_generators::TypoConfig {
char_error_rate: dq.effective_typo_rate(),
..Default::default()
},
enable_encoding_issues: dq.encoding_issues.enabled,
encoding_issue_rate: dq.encoding_issues.rate,
seed: self.seed.wrapping_add(77), track_statistics: true,
}
} else {
DataQualityConfig::minimal()
};
let mut injector = DataQualityInjector::new(config);
injector.set_country_pack(self.primary_pack().clone());
let context = HashMap::new();
for entry in entries.iter_mut() {
if let Some(text) = &entry.header.header_text {
let processed = injector.process_text_field(
"header_text",
text,
&entry.header.document_id.to_string(),
&context,
);
match processed {
Some(new_text) if new_text != *text => {
entry.header.header_text = Some(new_text);
}
None => {
entry.header.header_text = None; }
_ => {}
}
}
if let Some(ref_text) = &entry.header.reference {
let processed = injector.process_text_field(
"reference",
ref_text,
&entry.header.document_id.to_string(),
&context,
);
match processed {
Some(new_text) if new_text != *ref_text => {
entry.header.reference = Some(new_text);
}
None => {
entry.header.reference = None;
}
_ => {}
}
}
let user_persona = entry.header.user_persona.clone();
if let Some(processed) = injector.process_text_field(
"user_persona",
&user_persona,
&entry.header.document_id.to_string(),
&context,
) {
if processed != user_persona {
entry.header.user_persona = processed;
}
}
for line in &mut entry.lines {
if let Some(ref text) = line.line_text {
let processed = injector.process_text_field(
"line_text",
text,
&entry.header.document_id.to_string(),
&context,
);
match processed {
Some(new_text) if new_text != *text => {
line.line_text = Some(new_text);
}
None => {
line.line_text = None;
}
_ => {}
}
}
if let Some(cc) = &line.cost_center {
let processed = injector.process_text_field(
"cost_center",
cc,
&entry.header.document_id.to_string(),
&context,
);
match processed {
Some(new_cc) if new_cc != *cc => {
line.cost_center = Some(new_cc);
}
None => {
line.cost_center = None;
}
_ => {}
}
}
}
if let Some(pb) = &pb {
pb.inc(1);
}
}
if let Some(pb) = pb {
pb.finish_with_message("Data quality injection complete");
}
let quality_issues = injector.issues().to_vec();
Ok((injector.stats().clone(), quality_issues))
}
fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
let use_fsm = self
.config
.audit
.fsm
.as_ref()
.map(|f| f.enabled)
.unwrap_or(false);
if use_fsm {
return self.generate_audit_data_with_fsm(entries);
}
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let fiscal_year = start_date.year() as u16;
let period_end = start_date + chrono::Months::new(self.config.global.period_months);
let total_revenue: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
.map(|l| l.credit_amount)
.sum();
let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
let mut snapshot = AuditSnapshot::default();
let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
let mut finding_gen = FindingGenerator::new(self.seed + 7400);
let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
let mut sample_gen = SampleGenerator::new(self.seed + 7800);
let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
let accounts: Vec<String> = self
.coa
.as_ref()
.map(|coa| {
coa.get_postable_accounts()
.iter()
.map(|acc| acc.account_code().to_string())
.collect()
})
.unwrap_or_default();
for (i, company) in self.config.companies.iter().enumerate() {
let company_revenue = total_revenue
* rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
let engagements_for_company =
self.phase_config.audit_engagements / self.config.companies.len().max(1);
let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
1
} else {
0
};
for _eng_idx in 0..(engagements_for_company + extra) {
let mut engagement = engagement_gen.generate_engagement(
&company.code,
&company.name,
fiscal_year,
period_end,
company_revenue,
None, );
if !self.master_data.employees.is_empty() {
let emp_count = self.master_data.employees.len();
let base = (i * 10 + _eng_idx) % emp_count;
engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
.employee_id
.clone();
engagement.engagement_manager_id = self.master_data.employees
[(base + 1) % emp_count]
.employee_id
.clone();
let real_team: Vec<String> = engagement
.team_member_ids
.iter()
.enumerate()
.map(|(j, _)| {
self.master_data.employees[(base + 2 + j) % emp_count]
.employee_id
.clone()
})
.collect();
engagement.team_member_ids = real_team;
}
if let Some(pb) = &pb {
pb.inc(1);
}
let team_members: Vec<String> = engagement.team_member_ids.clone();
let workpapers =
workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
for wp in &workpapers {
if let Some(pb) = &pb {
pb.inc(1);
}
let evidence = evidence_gen.generate_evidence_for_workpaper(
wp,
&team_members,
wp.preparer_date,
);
for _ in &evidence {
if let Some(pb) = &pb {
pb.inc(1);
}
}
snapshot.evidence.extend(evidence);
}
let risks =
risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
for _ in &risks {
if let Some(pb) = &pb {
pb.inc(1);
}
}
snapshot.risk_assessments.extend(risks);
let findings = finding_gen.generate_findings_for_engagement(
&engagement,
&workpapers,
&team_members,
);
for _ in &findings {
if let Some(pb) = &pb {
pb.inc(1);
}
}
snapshot.findings.extend(findings);
let judgments =
judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
for _ in &judgments {
if let Some(pb) = &pb {
pb.inc(1);
}
}
snapshot.judgments.extend(judgments);
let (confs, resps) =
confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
snapshot.confirmations.extend(confs);
snapshot.confirmation_responses.extend(resps);
let team_pairs: Vec<(String, String)> = team_members
.iter()
.map(|id| {
let name = self
.master_data
.employees
.iter()
.find(|e| e.employee_id == *id)
.map(|e| e.display_name.clone())
.unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
(id.clone(), name)
})
.collect();
for wp in &workpapers {
let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
snapshot.procedure_steps.extend(steps);
}
for wp in &workpapers {
if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
snapshot.samples.push(sample);
}
}
let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
snapshot.analytical_results.extend(analytical);
let (ia_func, ia_reports) = ia_gen.generate(&engagement);
snapshot.ia_functions.push(ia_func);
snapshot.ia_reports.extend(ia_reports);
let vendor_names: Vec<String> = self
.master_data
.vendors
.iter()
.map(|v| v.name.clone())
.collect();
let customer_names: Vec<String> = self
.master_data
.customers
.iter()
.map(|c| c.name.clone())
.collect();
let (parties, rp_txns) =
related_party_gen.generate(&engagement, &vendor_names, &customer_names);
snapshot.related_parties.extend(parties);
snapshot.related_party_transactions.extend(rp_txns);
snapshot.workpapers.extend(workpapers);
{
let scope_id = format!(
"SCOPE-{}-{}",
engagement.engagement_id.simple(),
&engagement.client_entity_id
);
let scope = datasynth_core::models::audit::AuditScope::new(
scope_id.clone(),
engagement.engagement_id.to_string(),
engagement.client_entity_id.clone(),
engagement.materiality,
);
let mut eng = engagement;
eng.scope_id = Some(scope_id);
snapshot.audit_scopes.push(scope);
snapshot.engagements.push(eng);
}
}
}
if self.config.companies.len() > 1 {
let group_materiality = snapshot
.engagements
.first()
.map(|e| e.materiality)
.unwrap_or_else(|| {
let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
total_revenue * pct
});
let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
let group_engagement_id = snapshot
.engagements
.first()
.map(|e| e.engagement_id.to_string())
.unwrap_or_else(|| "GROUP-ENG".to_string());
let component_snapshot = component_gen.generate(
&self.config.companies,
group_materiality,
&group_engagement_id,
period_end,
);
snapshot.component_auditors = component_snapshot.component_auditors;
snapshot.group_audit_plan = component_snapshot.group_audit_plan;
snapshot.component_instructions = component_snapshot.component_instructions;
snapshot.component_reports = component_snapshot.component_reports;
info!(
"ISA 600 group audit: {} component auditors, {} instructions, {} reports",
snapshot.component_auditors.len(),
snapshot.component_instructions.len(),
snapshot.component_reports.len(),
);
}
{
let applicable_framework = self
.config
.accounting_standards
.framework
.as_ref()
.map(|f| format!("{f:?}"))
.unwrap_or_else(|| "IFRS".to_string());
let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
let entity_count = self.config.companies.len();
for engagement in &snapshot.engagements {
let company = self
.config
.companies
.iter()
.find(|c| c.code == engagement.client_entity_id);
let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
let letter_date = engagement.planning_start;
let letter = letter_gen.generate(
&engagement.engagement_id.to_string(),
&engagement.client_name,
entity_count,
engagement.period_end_date,
currency,
&applicable_framework,
letter_date,
);
snapshot.engagement_letters.push(letter);
}
info!(
"ISA 210 engagement letters: {} generated",
snapshot.engagement_letters.len()
);
}
{
let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
let entity_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
info!(
"ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
subsequent.len(),
subsequent
.iter()
.filter(|e| matches!(
e.classification,
datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
))
.count(),
subsequent
.iter()
.filter(|e| matches!(
e.classification,
datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
))
.count(),
);
snapshot.subsequent_events = subsequent;
}
{
let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
let entity_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
info!(
"ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
soc_snapshot.service_organizations.len(),
soc_snapshot.soc_reports.len(),
soc_snapshot.user_entity_controls.len(),
);
snapshot.service_organizations = soc_snapshot.service_organizations;
snapshot.soc_reports = soc_snapshot.soc_reports;
snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
}
{
use datasynth_generators::audit::going_concern_generator::{
GoingConcernGenerator, GoingConcernInput,
};
let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
let entity_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let assessment_date = period_end + chrono::Duration::days(75);
let period_label = format!("FY{}", period_end.year());
let gc_inputs: Vec<GoingConcernInput> = self
.config
.companies
.iter()
.map(|company| {
let code = &company.code;
let mut revenue = rust_decimal::Decimal::ZERO;
let mut expenses = rust_decimal::Decimal::ZERO;
let mut current_assets = rust_decimal::Decimal::ZERO;
let mut current_liabs = rust_decimal::Decimal::ZERO;
let mut total_debt = rust_decimal::Decimal::ZERO;
for je in entries.iter().filter(|je| &je.header.company_code == code) {
for line in &je.lines {
let acct = line.gl_account.as_str();
let net = line.debit_amount - line.credit_amount;
if acct.starts_with('4') {
revenue -= net;
} else if acct.starts_with('6') {
expenses += net;
}
if acct.starts_with('1') {
if let Ok(n) = acct.parse::<u32>() {
if (1000..=1499).contains(&n) {
current_assets += net;
}
}
} else if acct.starts_with('2') {
if let Ok(n) = acct.parse::<u32>() {
if (2000..=2499).contains(&n) {
current_liabs -= net; } else if (2500..=2999).contains(&n) {
total_debt -= net;
}
}
}
}
}
let net_income = revenue - expenses;
let working_capital = current_assets - current_liabs;
let operating_cash_flow = net_income;
GoingConcernInput {
entity_code: code.clone(),
net_income,
working_capital,
operating_cash_flow,
total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
assessment_date,
}
})
.collect();
let assessments = if gc_inputs.is_empty() {
gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
} else {
gc_gen.generate_for_entities_with_inputs(
&entity_codes,
&gc_inputs,
assessment_date,
&period_label,
)
};
info!(
"ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
assessments.len(),
assessments.iter().filter(|a| matches!(
a.auditor_conclusion,
datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
)).count(),
assessments.iter().filter(|a| matches!(
a.auditor_conclusion,
datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
)).count(),
assessments.iter().filter(|a| matches!(
a.auditor_conclusion,
datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
)).count(),
);
snapshot.going_concern_assessments = assessments;
}
{
use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
let entity_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let estimates = est_gen.generate_for_entities(&entity_codes);
info!(
"ISA 540 accounting estimates: {} estimates across {} entities \
({} with retrospective reviews, {} with auditor point estimates)",
estimates.len(),
entity_codes.len(),
estimates
.iter()
.filter(|e| e.retrospective_review.is_some())
.count(),
estimates
.iter()
.filter(|e| e.auditor_point_estimate.is_some())
.count(),
);
snapshot.accounting_estimates = estimates;
}
{
use datasynth_generators::audit::audit_opinion_generator::{
AuditOpinionGenerator, AuditOpinionInput,
};
let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
let opinion_inputs: Vec<AuditOpinionInput> = snapshot
.engagements
.iter()
.map(|eng| {
let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
.findings
.iter()
.filter(|f| f.engagement_id == eng.engagement_id)
.cloned()
.collect();
let gc = snapshot
.going_concern_assessments
.iter()
.find(|g| g.entity_code == eng.client_entity_id)
.cloned();
let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
snapshot.component_reports.clone();
let auditor = self
.master_data
.employees
.first()
.map(|e| e.display_name.clone())
.unwrap_or_else(|| "Global Audit LLP".into());
let partner = self
.master_data
.employees
.get(1)
.map(|e| e.display_name.clone())
.unwrap_or_else(|| eng.engagement_partner_id.clone());
AuditOpinionInput {
entity_code: eng.client_entity_id.clone(),
entity_name: eng.client_name.clone(),
engagement_id: eng.engagement_id,
period_end: eng.period_end_date,
findings: eng_findings,
going_concern: gc,
component_reports: comp_reports,
is_us_listed: {
let fw = &self.config.audit_standards.isa_compliance.framework;
fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
},
auditor_name: auditor,
engagement_partner: partner,
}
})
.collect();
let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
for go in &generated_opinions {
snapshot
.key_audit_matters
.extend(go.key_audit_matters.clone());
}
snapshot.audit_opinions = generated_opinions
.into_iter()
.map(|go| go.opinion)
.collect();
info!(
"ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
snapshot.audit_opinions.len(),
snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
);
}
{
use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
let mut sox_gen = SoxGenerator::new(self.seed + 8302);
for (i, company) in self.config.companies.iter().enumerate() {
let company_engagement_ids: Vec<uuid::Uuid> = snapshot
.engagements
.iter()
.filter(|e| e.client_entity_id == company.code)
.map(|e| e.engagement_id)
.collect();
let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
.findings
.iter()
.filter(|f| company_engagement_ids.contains(&f.engagement_id))
.cloned()
.collect();
let emp_count = self.master_data.employees.len();
let ceo_name = if emp_count > 0 {
self.master_data.employees[i % emp_count]
.display_name
.clone()
} else {
format!("CEO of {}", company.name)
};
let cfo_name = if emp_count > 1 {
self.master_data.employees[(i + 1) % emp_count]
.display_name
.clone()
} else {
format!("CFO of {}", company.name)
};
let materiality = snapshot
.engagements
.iter()
.find(|e| e.client_entity_id == company.code)
.map(|e| e.materiality)
.unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
let input = SoxGeneratorInput {
company_code: company.code.clone(),
company_name: company.name.clone(),
fiscal_year,
period_end,
findings: company_findings,
ceo_name,
cfo_name,
materiality_threshold: materiality,
revenue_percent: rust_decimal::Decimal::from(100),
assets_percent: rust_decimal::Decimal::from(100),
significant_accounts: vec![
"Revenue".into(),
"Accounts Receivable".into(),
"Inventory".into(),
"Fixed Assets".into(),
"Accounts Payable".into(),
],
};
let (certs, assessment) = sox_gen.generate(&input);
snapshot.sox_302_certifications.extend(certs);
snapshot.sox_404_assessments.push(assessment);
}
info!(
"SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
snapshot.sox_302_certifications.len(),
snapshot.sox_404_assessments.len(),
snapshot
.sox_404_assessments
.iter()
.filter(|a| a.icfr_effective)
.count(),
snapshot
.sox_404_assessments
.iter()
.filter(|a| !a.icfr_effective)
.count(),
);
}
{
use datasynth_generators::audit::materiality_generator::{
MaterialityGenerator, MaterialityInput,
};
let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
for company in &self.config.companies {
let company_code = company.code.clone();
let company_revenue: rust_decimal::Decimal = entries
.iter()
.filter(|e| e.company_code() == company_code)
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('4'))
.map(|l| l.credit_amount)
.sum();
let total_assets: rust_decimal::Decimal = entries
.iter()
.filter(|e| e.company_code() == company_code)
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('1'))
.map(|l| l.debit_amount)
.sum();
let total_expenses: rust_decimal::Decimal = entries
.iter()
.filter(|e| e.company_code() == company_code)
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
.map(|l| l.debit_amount)
.sum();
let equity: rust_decimal::Decimal = entries
.iter()
.filter(|e| e.company_code() == company_code)
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('3'))
.map(|l| l.credit_amount)
.sum();
let pretax_income = company_revenue - total_expenses;
let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
let w = rust_decimal::Decimal::try_from(company.volume_weight)
.unwrap_or(rust_decimal::Decimal::ONE);
(
total_revenue * w,
total_revenue * w * rust_decimal::Decimal::from(3),
total_revenue * w * rust_decimal::Decimal::new(1, 1),
total_revenue * w * rust_decimal::Decimal::from(2),
)
} else {
(company_revenue, total_assets, pretax_income, equity)
};
let gross_profit = rev * rust_decimal::Decimal::new(35, 2);
materiality_inputs.push(MaterialityInput {
entity_code: company_code,
period: format!("FY{}", fiscal_year),
revenue: rev,
pretax_income: pti,
total_assets: assets,
equity: eq,
gross_profit,
});
}
snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
info!(
"Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
{} total assets, {} equity benchmarks)",
snapshot.materiality_calculations.len(),
snapshot
.materiality_calculations
.iter()
.filter(|m| matches!(
m.benchmark,
datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
))
.count(),
snapshot
.materiality_calculations
.iter()
.filter(|m| matches!(
m.benchmark,
datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
))
.count(),
snapshot
.materiality_calculations
.iter()
.filter(|m| matches!(
m.benchmark,
datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
))
.count(),
snapshot
.materiality_calculations
.iter()
.filter(|m| matches!(
m.benchmark,
datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
))
.count(),
);
}
{
use datasynth_generators::audit::cra_generator::CraGenerator;
let mut cra_gen = CraGenerator::new(self.seed + 8315);
let entity_scope_map: std::collections::HashMap<String, String> = snapshot
.audit_scopes
.iter()
.map(|s| (s.entity_code.clone(), s.id.clone()))
.collect();
for company in &self.config.companies {
let cras = cra_gen.generate_for_entity(&company.code, None);
let scope_id = entity_scope_map.get(&company.code).cloned();
let cras_with_scope: Vec<_> = cras
.into_iter()
.map(|mut cra| {
cra.scope_id = scope_id.clone();
cra
})
.collect();
snapshot.combined_risk_assessments.extend(cras_with_scope);
}
let significant_count = snapshot
.combined_risk_assessments
.iter()
.filter(|c| c.significant_risk)
.count();
let high_cra_count = snapshot
.combined_risk_assessments
.iter()
.filter(|c| {
matches!(
c.combined_risk,
datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
)
})
.count();
info!(
"CRA: {} combined risk assessments ({} significant, {} high CRA)",
snapshot.combined_risk_assessments.len(),
significant_count,
high_cra_count,
);
}
{
use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
for company in &self.config.companies {
let entity_code = company.code.clone();
let tolerable_error = snapshot
.materiality_calculations
.iter()
.find(|m| m.entity_code == entity_code)
.map(|m| m.tolerable_error);
let entity_cras: Vec<_> = snapshot
.combined_risk_assessments
.iter()
.filter(|c| c.entity_code == entity_code)
.cloned()
.collect();
if !entity_cras.is_empty() {
let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
snapshot.sampling_plans.extend(plans);
snapshot.sampled_items.extend(items);
}
}
let misstatement_count = snapshot
.sampled_items
.iter()
.filter(|i| i.misstatement_found)
.count();
info!(
"ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
snapshot.sampling_plans.len(),
snapshot.sampled_items.len(),
misstatement_count,
);
}
{
use datasynth_generators::audit::scots_generator::{
ScotsGenerator, ScotsGeneratorConfig,
};
let ic_enabled = self.config.intercompany.enabled;
let config = ScotsGeneratorConfig {
intercompany_enabled: ic_enabled,
..ScotsGeneratorConfig::default()
};
let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
for company in &self.config.companies {
let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
snapshot
.significant_transaction_classes
.extend(entity_scots);
}
let estimation_count = snapshot
.significant_transaction_classes
.iter()
.filter(|s| {
matches!(
s.transaction_type,
datasynth_core::models::audit::scots::ScotTransactionType::Estimation
)
})
.count();
info!(
"ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
snapshot.significant_transaction_classes.len(),
estimation_count,
);
}
{
use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
let entity_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let unusual_flags =
unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
info!(
"ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
unusual_flags.len(),
unusual_flags
.iter()
.filter(|f| matches!(
f.severity,
datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
))
.count(),
unusual_flags
.iter()
.filter(|f| matches!(
f.severity,
datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
))
.count(),
unusual_flags
.iter()
.filter(|f| matches!(
f.severity,
datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
))
.count(),
);
snapshot.unusual_items = unusual_flags;
}
{
use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
let entity_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let current_period_label = format!("FY{fiscal_year}");
let prior_period_label = format!("FY{}", fiscal_year - 1);
let analytical_rels = ar_gen.generate_for_entities(
&entity_codes,
entries,
¤t_period_label,
&prior_period_label,
);
let out_of_range = analytical_rels
.iter()
.filter(|r| !r.within_expected_range)
.count();
info!(
"ISA 520 analytical relationships: {} relationships ({} out of expected range)",
analytical_rels.len(),
out_of_range,
);
snapshot.analytical_relationships = analytical_rels;
}
if let Some(pb) = pb {
pb.finish_with_message(format!(
"Audit data: {} engagements, {} workpapers, {} evidence, \
{} confirmations, {} procedure steps, {} samples, \
{} analytical, {} IA funcs, {} related parties, \
{} component auditors, {} letters, {} subsequent events, \
{} service orgs, {} going concern, {} accounting estimates, \
{} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
{} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
{} unusual items, {} analytical relationships",
snapshot.engagements.len(),
snapshot.workpapers.len(),
snapshot.evidence.len(),
snapshot.confirmations.len(),
snapshot.procedure_steps.len(),
snapshot.samples.len(),
snapshot.analytical_results.len(),
snapshot.ia_functions.len(),
snapshot.related_parties.len(),
snapshot.component_auditors.len(),
snapshot.engagement_letters.len(),
snapshot.subsequent_events.len(),
snapshot.service_organizations.len(),
snapshot.going_concern_assessments.len(),
snapshot.accounting_estimates.len(),
snapshot.audit_opinions.len(),
snapshot.key_audit_matters.len(),
snapshot.sox_302_certifications.len(),
snapshot.sox_404_assessments.len(),
snapshot.materiality_calculations.len(),
snapshot.combined_risk_assessments.len(),
snapshot.sampling_plans.len(),
snapshot.significant_transaction_classes.len(),
snapshot.unusual_items.len(),
snapshot.analytical_relationships.len(),
));
}
{
use datasynth_standards::audit::pcaob::PcaobIsaMapping;
snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
debug!(
"PCAOB-ISA mappings generated: {} mappings",
snapshot.isa_pcaob_mappings.len()
);
}
{
use datasynth_standards::audit::isa_reference::IsaStandard;
snapshot.isa_mappings = IsaStandard::standard_entries();
debug!(
"ISA standard entries generated: {} standards",
snapshot.isa_mappings.len()
);
}
{
let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
.engagements
.iter()
.map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
.collect();
for rpt in &mut snapshot.related_party_transactions {
if rpt.journal_entry_id.is_some() {
continue; }
let entity = engagement_by_id
.get(&rpt.engagement_id.to_string())
.copied()
.unwrap_or("");
let best_je = entries
.iter()
.filter(|je| je.header.company_code == entity)
.min_by_key(|je| {
(je.header.posting_date - rpt.transaction_date)
.num_days()
.abs()
});
if let Some(je) = best_je {
rpt.journal_entry_id = Some(je.header.document_id.to_string());
}
}
let linked = snapshot
.related_party_transactions
.iter()
.filter(|t| t.journal_entry_id.is_some())
.count();
debug!(
"Linked {}/{} related party transactions to journal entries",
linked,
snapshot.related_party_transactions.len()
);
}
if !snapshot.engagements.is_empty() {
use datasynth_generators::audit_opinion_generator::{
AuditOpinionGenerator, AuditOpinionInput,
};
let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
let inputs: Vec<AuditOpinionInput> = snapshot
.engagements
.iter()
.map(|eng| {
let findings = snapshot
.findings
.iter()
.filter(|f| f.engagement_id == eng.engagement_id)
.cloned()
.collect();
let going_concern = snapshot
.going_concern_assessments
.iter()
.find(|gc| gc.entity_code == eng.client_entity_id)
.cloned();
let component_reports = snapshot
.component_reports
.iter()
.filter(|r| r.entity_code == eng.client_entity_id)
.cloned()
.collect();
AuditOpinionInput {
entity_code: eng.client_entity_id.clone(),
entity_name: eng.client_name.clone(),
engagement_id: eng.engagement_id,
period_end: eng.period_end_date,
findings,
going_concern,
component_reports,
is_us_listed: matches!(
eng.engagement_type,
datasynth_core::audit::EngagementType::IntegratedAudit
| datasynth_core::audit::EngagementType::Sox404
),
auditor_name: "DataSynth Audit LLP".to_string(),
engagement_partner: "Engagement Partner".to_string(),
}
})
.collect();
let generated = opinion_gen.generate_batch(&inputs);
for g in generated {
snapshot.key_audit_matters.extend(g.key_audit_matters);
snapshot.audit_opinions.push(g.opinion);
}
debug!(
"Generated {} audit opinions with {} key audit matters",
snapshot.audit_opinions.len(),
snapshot.key_audit_matters.len()
);
}
Ok(snapshot)
}
fn generate_audit_data_with_fsm(
&mut self,
entries: &[JournalEntry],
) -> SynthResult<AuditSnapshot> {
use datasynth_audit_fsm::{
context::EngagementContext,
engine::AuditFsmEngine,
loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
};
use rand::SeedableRng;
use rand_chacha::ChaCha8Rng;
info!("Audit FSM: generating audit data via FSM engine");
let fsm_config = self
.config
.audit
.fsm
.as_ref()
.expect("FSM config must be present when FSM is enabled");
let bwp = match fsm_config.blueprint.as_str() {
"builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
"builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
_ => {
warn!(
"Unknown FSM blueprint '{}', falling back to builtin:fsa",
fsm_config.blueprint
);
BlueprintWithPreconditions::load_builtin_fsa()
}
}
.map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
let overlay = match fsm_config.overlay.as_str() {
"builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
"builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
"builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
_ => {
warn!(
"Unknown FSM overlay '{}', falling back to builtin:default",
fsm_config.overlay
);
load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
}
}
.map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
let period_end = start_date + chrono::Months::new(self.config.global.period_months);
let company = self.config.companies.first();
let company_code = company
.map(|c| c.code.clone())
.unwrap_or_else(|| "UNKNOWN".to_string());
let company_name = company
.map(|c| c.name.clone())
.unwrap_or_else(|| "Unknown Company".to_string());
let currency = company
.map(|c| c.currency.clone())
.unwrap_or_else(|| "USD".to_string());
let entity_entries: Vec<_> = entries
.iter()
.filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
.cloned()
.collect();
let entries = &entity_entries;
let total_revenue: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('4'))
.map(|l| l.credit_amount - l.debit_amount)
.sum();
let total_assets: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('1'))
.map(|l| l.debit_amount - l.credit_amount)
.sum();
let total_expenses: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
.map(|l| l.debit_amount)
.sum();
let equity: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('3'))
.map(|l| l.credit_amount - l.debit_amount)
.sum();
let total_debt: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('2'))
.map(|l| l.credit_amount - l.debit_amount)
.sum();
let pretax_income = total_revenue - total_expenses;
let cogs: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with('5'))
.map(|l| l.debit_amount)
.sum();
let gross_profit = total_revenue - cogs;
let current_assets: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| {
l.account_code.starts_with("10")
|| l.account_code.starts_with("11")
|| l.account_code.starts_with("12")
|| l.account_code.starts_with("13")
})
.map(|l| l.debit_amount - l.credit_amount)
.sum();
let current_liabilities: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| {
l.account_code.starts_with("20")
|| l.account_code.starts_with("21")
|| l.account_code.starts_with("22")
})
.map(|l| l.credit_amount - l.debit_amount)
.sum();
let working_capital = current_assets - current_liabilities;
let depreciation: rust_decimal::Decimal = entries
.iter()
.flat_map(|e| e.lines.iter())
.filter(|l| l.account_code.starts_with("60"))
.map(|l| l.debit_amount)
.sum();
let operating_cash_flow = pretax_income + depreciation;
let accounts: Vec<String> = self
.coa
.as_ref()
.map(|coa| {
coa.get_postable_accounts()
.iter()
.map(|acc| acc.account_code().to_string())
.collect()
})
.unwrap_or_default();
let team_member_ids: Vec<String> = self
.master_data
.employees
.iter()
.take(8) .map(|e| e.employee_id.clone())
.collect();
let team_member_pairs: Vec<(String, String)> = self
.master_data
.employees
.iter()
.take(8)
.map(|e| (e.employee_id.clone(), e.display_name.clone()))
.collect();
let vendor_names: Vec<String> = self
.master_data
.vendors
.iter()
.map(|v| v.name.clone())
.collect();
let customer_names: Vec<String> = self
.master_data
.customers
.iter()
.map(|c| c.name.clone())
.collect();
let entity_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let journal_entry_ids: Vec<String> = entries
.iter()
.take(50)
.map(|e| e.header.document_id.to_string())
.collect();
let mut account_balances = std::collections::HashMap::<String, f64>::new();
for entry in entries {
for line in &entry.lines {
let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
*account_balances
.entry(line.account_code.clone())
.or_insert(0.0) += debit_f64 - credit_f64;
}
}
let control_ids: Vec<String> = Vec::new();
let anomaly_refs: Vec<String> = Vec::new();
let mut context = EngagementContext {
company_code,
company_name,
fiscal_year: start_date.year(),
currency,
total_revenue,
total_assets,
engagement_start: start_date,
report_date: period_end,
pretax_income,
equity,
gross_profit,
working_capital,
operating_cash_flow,
total_debt,
team_member_ids,
team_member_pairs,
accounts,
vendor_names,
customer_names,
journal_entry_ids,
account_balances,
control_ids,
anomaly_refs,
journal_entries: entries.to_vec(),
is_us_listed: false,
entity_codes,
auditor_firm_name: "DataSynth Audit LLP".into(),
accounting_framework: self
.config
.accounting_standards
.framework
.map(|f| match f {
datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
"French GAAP"
}
datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
"German GAAP"
}
datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
"Dual Reporting"
}
})
.unwrap_or("IFRS")
.into(),
};
let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
let rng = ChaCha8Rng::seed_from_u64(seed);
let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
let mut result = engine
.run_engagement(&context)
.map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
info!(
"Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
{} phases completed, duration {:.1}h",
result.event_log.len(),
result.artifacts.total_artifacts(),
result.anomalies.len(),
result.phases_completed.len(),
result.total_duration_hours,
);
let tb_entity = context.company_code.clone();
let tb_fy = context.fiscal_year;
result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
result.artifacts.trial_balance_entries = compute_trial_balance_entries(
entries,
&tb_entity,
tb_fy,
self.coa.as_ref().map(|c| c.as_ref()),
);
let bag = result.artifacts;
let mut snapshot = AuditSnapshot {
engagements: bag.engagements,
engagement_letters: bag.engagement_letters,
materiality_calculations: bag.materiality_calculations,
risk_assessments: bag.risk_assessments,
combined_risk_assessments: bag.combined_risk_assessments,
workpapers: bag.workpapers,
evidence: bag.evidence,
findings: bag.findings,
judgments: bag.judgments,
sampling_plans: bag.sampling_plans,
sampled_items: bag.sampled_items,
analytical_results: bag.analytical_results,
going_concern_assessments: bag.going_concern_assessments,
subsequent_events: bag.subsequent_events,
audit_opinions: bag.audit_opinions,
key_audit_matters: bag.key_audit_matters,
procedure_steps: bag.procedure_steps,
samples: bag.samples,
confirmations: bag.confirmations,
confirmation_responses: bag.confirmation_responses,
fsm_event_trail: Some(result.event_log),
..Default::default()
};
{
use datasynth_standards::audit::pcaob::PcaobIsaMapping;
snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
}
{
use datasynth_standards::audit::isa_reference::IsaStandard;
snapshot.isa_mappings = IsaStandard::standard_entries();
}
info!(
"Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
{} risk assessments, {} findings, {} materiality calcs",
snapshot.engagements.len(),
snapshot.workpapers.len(),
snapshot.evidence.len(),
snapshot.risk_assessments.len(),
snapshot.findings.len(),
snapshot.materiality_calculations.len(),
);
Ok(snapshot)
}
fn export_graphs(
&mut self,
entries: &[JournalEntry],
_coa: &Arc<ChartOfAccounts>,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<GraphExportSnapshot> {
let pb = self.create_progress_bar(100, "Exporting Graphs");
let mut snapshot = GraphExportSnapshot::default();
let output_dir = self
.output_path
.clone()
.unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
for graph_type in &self.config.graph_export.graph_types {
if let Some(pb) = &pb {
pb.inc(10);
}
let graph_config = TransactionGraphConfig {
include_vendors: false,
include_customers: false,
create_debit_credit_edges: true,
include_document_nodes: graph_type.include_document_nodes,
min_edge_weight: graph_type.min_edge_weight,
aggregate_parallel_edges: graph_type.aggregate_edges,
framework: None,
};
let mut builder = TransactionGraphBuilder::new(graph_config);
builder.add_journal_entries(entries);
let graph = builder.build();
stats.graph_node_count += graph.node_count();
stats.graph_edge_count += graph.edge_count();
if let Some(pb) = &pb {
pb.inc(40);
}
for format in &self.config.graph_export.formats {
let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
if let Err(e) = std::fs::create_dir_all(&format_dir) {
warn!("Failed to create graph output directory: {}", e);
continue;
}
match format {
datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
let pyg_config = PyGExportConfig {
common: datasynth_graph::CommonExportConfig {
export_node_features: true,
export_edge_features: true,
export_node_labels: true,
export_edge_labels: true,
export_masks: true,
train_ratio: self.config.graph_export.train_ratio,
val_ratio: self.config.graph_export.validation_ratio,
seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
},
one_hot_categoricals: false,
};
let exporter = PyGExporter::new(pyg_config);
match exporter.export(&graph, &format_dir) {
Ok(metadata) => {
snapshot.exports.insert(
format!("{}_{}", graph_type.name, "pytorch_geometric"),
GraphExportInfo {
name: graph_type.name.clone(),
format: "pytorch_geometric".to_string(),
output_path: format_dir.clone(),
node_count: metadata.num_nodes,
edge_count: metadata.num_edges,
},
);
snapshot.graph_count += 1;
}
Err(e) => {
warn!("Failed to export PyTorch Geometric graph: {}", e);
}
}
}
datasynth_config::schema::GraphExportFormat::Neo4j => {
use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
let neo4j_config = Neo4jExportConfig {
export_node_properties: true,
export_edge_properties: true,
export_features: true,
generate_cypher: true,
generate_admin_import: true,
database_name: "synth".to_string(),
cypher_batch_size: 1000,
};
let exporter = Neo4jExporter::new(neo4j_config);
match exporter.export(&graph, &format_dir) {
Ok(metadata) => {
snapshot.exports.insert(
format!("{}_{}", graph_type.name, "neo4j"),
GraphExportInfo {
name: graph_type.name.clone(),
format: "neo4j".to_string(),
output_path: format_dir.clone(),
node_count: metadata.num_nodes,
edge_count: metadata.num_edges,
},
);
snapshot.graph_count += 1;
}
Err(e) => {
warn!("Failed to export Neo4j graph: {}", e);
}
}
}
datasynth_config::schema::GraphExportFormat::Dgl => {
use datasynth_graph::{DGLExportConfig, DGLExporter};
let dgl_config = DGLExportConfig {
common: datasynth_graph::CommonExportConfig {
export_node_features: true,
export_edge_features: true,
export_node_labels: true,
export_edge_labels: true,
export_masks: true,
train_ratio: self.config.graph_export.train_ratio,
val_ratio: self.config.graph_export.validation_ratio,
seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
},
heterogeneous: self.config.graph_export.dgl.heterogeneous,
include_pickle_script: true, };
let exporter = DGLExporter::new(dgl_config);
match exporter.export(&graph, &format_dir) {
Ok(metadata) => {
snapshot.exports.insert(
format!("{}_{}", graph_type.name, "dgl"),
GraphExportInfo {
name: graph_type.name.clone(),
format: "dgl".to_string(),
output_path: format_dir.clone(),
node_count: metadata.common.num_nodes,
edge_count: metadata.common.num_edges,
},
);
snapshot.graph_count += 1;
}
Err(e) => {
warn!("Failed to export DGL graph: {}", e);
}
}
}
datasynth_config::schema::GraphExportFormat::RustGraph => {
use datasynth_graph::{
RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
};
let rustgraph_config = RustGraphExportConfig {
include_features: true,
include_temporal: true,
include_labels: true,
source_name: "datasynth".to_string(),
batch_id: None,
output_format: RustGraphOutputFormat::JsonLines,
export_node_properties: true,
export_edge_properties: true,
pretty_print: false,
};
let exporter = RustGraphExporter::new(rustgraph_config);
match exporter.export(&graph, &format_dir) {
Ok(metadata) => {
snapshot.exports.insert(
format!("{}_{}", graph_type.name, "rustgraph"),
GraphExportInfo {
name: graph_type.name.clone(),
format: "rustgraph".to_string(),
output_path: format_dir.clone(),
node_count: metadata.num_nodes,
edge_count: metadata.num_edges,
},
);
snapshot.graph_count += 1;
}
Err(e) => {
warn!("Failed to export RustGraph: {}", e);
}
}
}
datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
}
}
}
if let Some(pb) = &pb {
pb.inc(40);
}
}
stats.graph_export_count = snapshot.graph_count;
snapshot.exported = snapshot.graph_count > 0;
if let Some(pb) = pb {
pb.finish_with_message(format!(
"Graphs exported: {} graphs ({} nodes, {} edges)",
snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
));
}
Ok(snapshot)
}
fn build_additional_graphs(
&self,
banking: &BankingSnapshot,
intercompany: &IntercompanySnapshot,
entries: &[JournalEntry],
stats: &mut EnhancedGenerationStatistics,
) {
let output_dir = self
.output_path
.clone()
.unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
if !banking.customers.is_empty() && !banking.transactions.is_empty() {
info!("Phase 10c: Building banking network graph");
let config = BankingGraphConfig::default();
let mut builder = BankingGraphBuilder::new(config);
builder.add_customers(&banking.customers);
builder.add_accounts(&banking.accounts, &banking.customers);
builder.add_transactions(&banking.transactions);
let graph = builder.build();
let node_count = graph.node_count();
let edge_count = graph.edge_count();
stats.graph_node_count += node_count;
stats.graph_edge_count += edge_count;
for format in &self.config.graph_export.formats {
if matches!(
format,
datasynth_config::schema::GraphExportFormat::PytorchGeometric
) {
let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
if let Err(e) = std::fs::create_dir_all(&format_dir) {
warn!("Failed to create banking graph output dir: {}", e);
continue;
}
let pyg_config = PyGExportConfig::default();
let exporter = PyGExporter::new(pyg_config);
if let Err(e) = exporter.export(&graph, &format_dir) {
warn!("Failed to export banking graph as PyG: {}", e);
} else {
info!(
"Banking network graph exported: {} nodes, {} edges",
node_count, edge_count
);
}
}
}
}
let approval_entries: Vec<_> = entries
.iter()
.filter(|je| je.header.approval_workflow.is_some())
.collect();
if !approval_entries.is_empty() {
info!(
"Phase 10c: Building approval network graph ({} entries with approvals)",
approval_entries.len()
);
let config = ApprovalGraphConfig::default();
let mut builder = ApprovalGraphBuilder::new(config);
for je in &approval_entries {
if let Some(ref wf) = je.header.approval_workflow {
for action in &wf.actions {
let record = datasynth_core::models::ApprovalRecord {
approval_id: format!(
"APR-{}-{}",
je.header.document_id, action.approval_level
),
document_number: je.header.document_id.to_string(),
document_type: "JE".to_string(),
company_code: je.company_code().to_string(),
requester_id: wf.preparer_id.clone(),
requester_name: Some(wf.preparer_name.clone()),
approver_id: action.actor_id.clone(),
approver_name: action.actor_name.clone(),
approval_date: je.posting_date(),
action: format!("{:?}", action.action),
amount: wf.amount,
approval_limit: None,
comments: action.comments.clone(),
delegation_from: None,
is_auto_approved: false,
};
builder.add_approval(&record);
}
}
}
let graph = builder.build();
let node_count = graph.node_count();
let edge_count = graph.edge_count();
stats.graph_node_count += node_count;
stats.graph_edge_count += edge_count;
for format in &self.config.graph_export.formats {
if matches!(
format,
datasynth_config::schema::GraphExportFormat::PytorchGeometric
) {
let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
if let Err(e) = std::fs::create_dir_all(&format_dir) {
warn!("Failed to create approval graph output dir: {}", e);
continue;
}
let pyg_config = PyGExportConfig::default();
let exporter = PyGExporter::new(pyg_config);
if let Err(e) = exporter.export(&graph, &format_dir) {
warn!("Failed to export approval graph as PyG: {}", e);
} else {
info!(
"Approval network graph exported: {} nodes, {} edges",
node_count, edge_count
);
}
}
}
}
if self.config.companies.len() >= 2 {
info!(
"Phase 10c: Building entity relationship graph ({} companies)",
self.config.companies.len()
);
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
let parent_code = &self.config.companies[0].code;
let mut companies: Vec<datasynth_core::models::Company> =
Vec::with_capacity(self.config.companies.len());
let first = &self.config.companies[0];
companies.push(datasynth_core::models::Company::parent(
&first.code,
&first.name,
&first.country,
&first.currency,
));
for cc in self.config.companies.iter().skip(1) {
companies.push(datasynth_core::models::Company::subsidiary(
&cc.code,
&cc.name,
&cc.country,
&cc.currency,
parent_code,
rust_decimal::Decimal::from(100),
));
}
let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
self.config
.companies
.iter()
.skip(1)
.enumerate()
.map(|(i, cc)| {
let mut rel =
datasynth_core::models::intercompany::IntercompanyRelationship::new(
format!("REL{:03}", i + 1),
parent_code.clone(),
cc.code.clone(),
rust_decimal::Decimal::from(100),
start_date,
);
rel.functional_currency = cc.currency.clone();
rel
})
.collect();
let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
builder.add_companies(&companies);
builder.add_ownership_relationships(&relationships);
for pair in &intercompany.matched_pairs {
builder.add_intercompany_edge(
&pair.seller_company,
&pair.buyer_company,
pair.amount,
&format!("{:?}", pair.transaction_type),
);
}
let graph = builder.build();
let node_count = graph.node_count();
let edge_count = graph.edge_count();
stats.graph_node_count += node_count;
stats.graph_edge_count += edge_count;
for format in &self.config.graph_export.formats {
if matches!(
format,
datasynth_config::schema::GraphExportFormat::PytorchGeometric
) {
let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
if let Err(e) = std::fs::create_dir_all(&format_dir) {
warn!("Failed to create entity graph output dir: {}", e);
continue;
}
let pyg_config = PyGExportConfig::default();
let exporter = PyGExporter::new(pyg_config);
if let Err(e) = exporter.export(&graph, &format_dir) {
warn!("Failed to export entity graph as PyG: {}", e);
} else {
info!(
"Entity relationship graph exported: {} nodes, {} edges",
node_count, edge_count
);
}
}
}
} else {
debug!(
"EntityGraphBuilder: skipped (requires 2+ companies, found {})",
self.config.companies.len()
);
}
}
#[allow(clippy::too_many_arguments)]
fn export_hypergraph(
&self,
coa: &Arc<ChartOfAccounts>,
entries: &[JournalEntry],
document_flows: &DocumentFlowSnapshot,
sourcing: &SourcingSnapshot,
hr: &HrSnapshot,
manufacturing: &ManufacturingSnapshot,
banking: &BankingSnapshot,
audit: &AuditSnapshot,
financial_reporting: &FinancialReportingSnapshot,
ocpm: &OcpmSnapshot,
compliance: &ComplianceRegulationsSnapshot,
stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<HypergraphExportInfo> {
use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
use datasynth_graph::models::hypergraph::AggregationStrategy;
let hg_settings = &self.config.graph_export.hypergraph;
let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
"truncate" => AggregationStrategy::Truncate,
"pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
"pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
"importance_sample" => AggregationStrategy::ImportanceSample,
_ => AggregationStrategy::PoolByCounterparty,
};
let builder_config = HypergraphConfig {
max_nodes: hg_settings.max_nodes,
aggregation_strategy,
include_coso: hg_settings.governance_layer.include_coso,
include_controls: hg_settings.governance_layer.include_controls,
include_sox: hg_settings.governance_layer.include_sox,
include_vendors: hg_settings.governance_layer.include_vendors,
include_customers: hg_settings.governance_layer.include_customers,
include_employees: hg_settings.governance_layer.include_employees,
include_p2p: hg_settings.process_layer.include_p2p,
include_o2c: hg_settings.process_layer.include_o2c,
include_s2c: hg_settings.process_layer.include_s2c,
include_h2r: hg_settings.process_layer.include_h2r,
include_mfg: hg_settings.process_layer.include_mfg,
include_bank: hg_settings.process_layer.include_bank,
include_audit: hg_settings.process_layer.include_audit,
include_r2r: hg_settings.process_layer.include_r2r,
events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
docs_per_counterparty_threshold: hg_settings
.process_layer
.docs_per_counterparty_threshold,
include_accounts: hg_settings.accounting_layer.include_accounts,
je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
include_cross_layer_edges: hg_settings.cross_layer.enabled,
include_compliance: self.config.compliance_regulations.enabled,
include_tax: true,
include_treasury: true,
include_esg: true,
include_project: true,
include_intercompany: true,
include_temporal_events: true,
};
let mut builder = HypergraphBuilder::new(builder_config);
builder.add_coso_framework();
if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
let controls = InternalControl::standard_controls();
builder.add_controls(&controls);
}
builder.add_vendors(&self.master_data.vendors);
builder.add_customers(&self.master_data.customers);
builder.add_employees(&self.master_data.employees);
builder.add_p2p_documents(
&document_flows.purchase_orders,
&document_flows.goods_receipts,
&document_flows.vendor_invoices,
&document_flows.payments,
);
builder.add_o2c_documents(
&document_flows.sales_orders,
&document_flows.deliveries,
&document_flows.customer_invoices,
);
builder.add_s2c_documents(
&sourcing.sourcing_projects,
&sourcing.qualifications,
&sourcing.rfx_events,
&sourcing.bids,
&sourcing.bid_evaluations,
&sourcing.contracts,
);
builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
builder.add_mfg_documents(
&manufacturing.production_orders,
&manufacturing.quality_inspections,
&manufacturing.cycle_counts,
);
builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
builder.add_audit_documents(
&audit.engagements,
&audit.workpapers,
&audit.findings,
&audit.evidence,
&audit.risk_assessments,
&audit.judgments,
&audit.materiality_calculations,
&audit.audit_opinions,
&audit.going_concern_assessments,
);
builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
if let Some(ref event_log) = ocpm.event_log {
builder.add_ocpm_events(event_log);
}
if self.config.compliance_regulations.enabled
&& hg_settings.governance_layer.include_controls
{
let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
.standard_records
.iter()
.filter_map(|r| {
let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
registry.get(&sid).cloned()
})
.collect();
builder.add_compliance_regulations(
&standards,
&compliance.findings,
&compliance.filings,
);
}
builder.add_accounts(coa);
builder.add_journal_entries_as_hyperedges(entries);
let hypergraph = builder.build();
let output_dir = self
.output_path
.clone()
.unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
let hg_dir = output_dir
.join(&self.config.graph_export.output_subdirectory)
.join(&hg_settings.output_subdirectory);
let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
"unified" => {
let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
SynthError::generation(format!("Unified hypergraph export failed: {e}"))
})?;
(
metadata.num_nodes,
metadata.num_edges,
metadata.num_hyperedges,
)
}
_ => {
let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
SynthError::generation(format!("Hypergraph export failed: {e}"))
})?;
(
metadata.num_nodes,
metadata.num_edges,
metadata.num_hyperedges,
)
}
};
#[cfg(feature = "streaming")]
if let Some(ref target_url) = hg_settings.stream_target {
use crate::stream_client::{StreamClient, StreamConfig};
use std::io::Write as _;
let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
let stream_config = StreamConfig {
target_url: target_url.clone(),
batch_size: hg_settings.stream_batch_size,
api_key,
..StreamConfig::default()
};
match StreamClient::new(stream_config) {
Ok(mut client) => {
let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
match exporter.export_to_writer(&hypergraph, &mut client) {
Ok(_) => {
if let Err(e) = client.flush() {
warn!("Failed to flush stream client: {}", e);
} else {
info!("Streamed {} records to {}", client.total_sent(), target_url);
}
}
Err(e) => {
warn!("Streaming export failed: {}", e);
}
}
}
Err(e) => {
warn!("Failed to create stream client: {}", e);
}
}
}
stats.graph_node_count += num_nodes;
stats.graph_edge_count += num_edges;
stats.graph_export_count += 1;
Ok(HypergraphExportInfo {
node_count: num_nodes,
edge_count: num_edges,
hyperedge_count: num_hyperedges,
output_path: hg_dir,
})
}
fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
let pb = self.create_progress_bar(100, "Generating Banking Data");
let orchestrator = BankingOrchestratorBuilder::new()
.config(self.config.banking.clone())
.seed(self.seed + 9000)
.country_pack(self.primary_pack().clone())
.build();
if let Some(pb) = &pb {
pb.inc(10);
}
let result = orchestrator.generate();
if let Some(pb) = &pb {
pb.inc(90);
pb.finish_with_message(format!(
"Banking: {} customers, {} transactions",
result.customers.len(),
result.transactions.len()
));
}
let mut banking_customers = result.customers;
let core_customers = &self.master_data.customers;
if !core_customers.is_empty() {
for (i, bc) in banking_customers.iter_mut().enumerate() {
let core = &core_customers[i % core_customers.len()];
bc.name = CustomerName::business(&core.name);
bc.residence_country = core.country.clone();
bc.enterprise_customer_id = Some(core.customer_id.clone());
}
debug!(
"Cross-referenced {} banking customers with {} core customers",
banking_customers.len(),
core_customers.len()
);
}
Ok(BankingSnapshot {
customers: banking_customers,
accounts: result.accounts,
transactions: result.transactions,
transaction_labels: result.transaction_labels,
customer_labels: result.customer_labels,
account_labels: result.account_labels,
relationship_labels: result.relationship_labels,
narratives: result.narratives,
suspicious_count: result.stats.suspicious_count,
scenario_count: result.scenarios.len(),
})
}
fn calculate_total_transactions(&self) -> u64 {
let months = self.config.global.period_months as f64;
self.config
.companies
.iter()
.map(|c| {
let annual = c.annual_transaction_volume.count() as f64;
let weighted = annual * c.volume_weight;
(weighted * months / 12.0) as u64
})
.sum()
}
fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
if !self.phase_config.show_progress {
return None;
}
let pb = if let Some(mp) = &self.multi_progress {
mp.add(ProgressBar::new(total))
} else {
ProgressBar::new(total)
};
pb.set_style(
ProgressStyle::default_bar()
.template(&format!(
"{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
))
.expect("Progress bar template should be valid - uses only standard indicatif placeholders")
.progress_chars("#>-"),
);
Some(pb)
}
pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
self.coa.clone()
}
pub fn get_master_data(&self) -> &MasterDataSnapshot {
&self.master_data
}
fn phase_compliance_regulations(
&mut self,
_stats: &mut EnhancedGenerationStatistics,
) -> SynthResult<ComplianceRegulationsSnapshot> {
if !self.phase_config.generate_compliance_regulations {
return Ok(ComplianceRegulationsSnapshot::default());
}
info!("Phase: Generating Compliance Regulations Data");
let cr_config = &self.config.compliance_regulations;
let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
self.config
.companies
.iter()
.map(|c| c.country.clone())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect()
} else {
cr_config.jurisdictions.clone()
};
let fallback_date =
NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
let reference_date = cr_config
.reference_date
.as_ref()
.and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
.unwrap_or_else(|| {
NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.unwrap_or(fallback_date)
});
let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
let cross_reference_records = reg_gen.generate_cross_reference_records();
let jurisdiction_records =
reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
info!(
" Standards: {} records, {} cross-references, {} jurisdictions",
standard_records.len(),
cross_reference_records.len(),
jurisdiction_records.len()
);
let audit_procedures = if cr_config.audit_procedures.enabled {
let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
sampling_method: cr_config.audit_procedures.sampling_method.clone(),
confidence_level: cr_config.audit_procedures.confidence_level,
tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
};
let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
self.seed + 9000,
proc_config,
);
let registry = reg_gen.registry();
let mut all_procs = Vec::new();
for jurisdiction in &jurisdictions {
let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
all_procs.extend(procs);
}
info!(" Audit procedures: {}", all_procs.len());
all_procs
} else {
Vec::new()
};
let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
let finding_config =
datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
finding_rate: cr_config.findings.finding_rate,
material_weakness_rate: cr_config.findings.material_weakness_rate,
significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
generate_remediation: cr_config.findings.generate_remediation,
};
let mut finding_gen =
datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
self.seed + 9100,
finding_config,
);
let mut all_findings = Vec::new();
for company in &self.config.companies {
let company_findings =
finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
all_findings.extend(company_findings);
}
info!(" Compliance findings: {}", all_findings.len());
all_findings
} else {
Vec::new()
};
let filings = if cr_config.filings.enabled {
let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
filing_types: cr_config.filings.filing_types.clone(),
generate_status_progression: cr_config.filings.generate_status_progression,
};
let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
self.seed + 9200,
filing_config,
);
let company_codes: Vec<String> = self
.config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
.unwrap_or(fallback_date);
let filings = filing_gen.generate_filings(
&company_codes,
&jurisdictions,
start_date,
self.config.global.period_months,
);
info!(" Regulatory filings: {}", filings.len());
filings
} else {
Vec::new()
};
let compliance_graph = if cr_config.graph.enabled {
let graph_config = datasynth_graph::ComplianceGraphConfig {
include_standard_nodes: cr_config.graph.include_compliance_nodes,
include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
include_cross_references: cr_config.graph.include_cross_references,
include_supersession_edges: cr_config.graph.include_supersession_edges,
include_account_links: cr_config.graph.include_account_links,
include_control_links: cr_config.graph.include_control_links,
include_company_links: cr_config.graph.include_company_links,
};
let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
.iter()
.map(|r| datasynth_graph::StandardNodeInput {
standard_id: r.standard_id.clone(),
title: r.title.clone(),
category: r.category.clone(),
domain: r.domain.clone(),
is_active: r.is_active,
features: vec![if r.is_active { 1.0 } else { 0.0 }],
applicable_account_types: r.applicable_account_types.clone(),
applicable_processes: r.applicable_processes.clone(),
})
.collect();
builder.add_standards(&standard_inputs);
let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
jurisdiction_records
.iter()
.map(|r| datasynth_graph::JurisdictionNodeInput {
country_code: r.country_code.clone(),
country_name: r.country_name.clone(),
framework: r.accounting_framework.clone(),
standard_count: r.standard_count,
tax_rate: r.statutory_tax_rate,
})
.collect();
builder.add_jurisdictions(&jurisdiction_inputs);
let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
cross_reference_records
.iter()
.map(|r| datasynth_graph::CrossReferenceEdgeInput {
from_standard: r.from_standard.clone(),
to_standard: r.to_standard.clone(),
relationship: r.relationship.clone(),
convergence_level: r.convergence_level,
})
.collect();
builder.add_cross_references(&xref_inputs);
let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
.iter()
.map(|r| datasynth_graph::JurisdictionMappingInput {
country_code: r.jurisdiction.clone(),
standard_id: r.standard_id.clone(),
})
.collect();
builder.add_jurisdiction_mappings(&mapping_inputs);
let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
.iter()
.map(|p| datasynth_graph::ProcedureNodeInput {
procedure_id: p.procedure_id.clone(),
standard_id: p.standard_id.clone(),
procedure_type: p.procedure_type.clone(),
sample_size: p.sample_size,
confidence_level: p.confidence_level,
})
.collect();
builder.add_procedures(&proc_inputs);
let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
.iter()
.map(|f| datasynth_graph::FindingNodeInput {
finding_id: f.finding_id.to_string(),
standard_id: f
.related_standards
.first()
.map(|s| s.as_str().to_string())
.unwrap_or_default(),
severity: f.severity.to_string(),
deficiency_level: f.deficiency_level.to_string(),
severity_score: f.deficiency_level.severity_score(),
control_id: f.control_id.clone(),
affected_accounts: f.affected_accounts.clone(),
})
.collect();
builder.add_findings(&finding_inputs);
if cr_config.graph.include_account_links {
let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
for std_record in &standard_records {
if let Some(std_obj) =
registry.get(&datasynth_core::models::compliance::StandardId::parse(
&std_record.standard_id,
))
{
for acct_type in &std_obj.applicable_account_types {
account_links.push(datasynth_graph::AccountLinkInput {
standard_id: std_record.standard_id.clone(),
account_code: acct_type.clone(),
account_name: acct_type.clone(),
});
}
}
}
builder.add_account_links(&account_links);
}
if cr_config.graph.include_control_links {
let mut control_links = Vec::new();
let sox_like_ids: Vec<String> = standard_records
.iter()
.filter(|r| {
r.standard_id.starts_with("SOX")
|| r.standard_id.starts_with("PCAOB-AS-2201")
})
.map(|r| r.standard_id.clone())
.collect();
let control_ids = [
("C001", "Cash Controls"),
("C002", "Large Transaction Approval"),
("C010", "PO Approval"),
("C011", "Three-Way Match"),
("C020", "Revenue Recognition"),
("C021", "Credit Check"),
("C030", "Manual JE Approval"),
("C031", "Period Close Review"),
("C032", "Account Reconciliation"),
("C040", "Payroll Processing"),
("C050", "Fixed Asset Capitalization"),
("C060", "Intercompany Elimination"),
];
for sox_id in &sox_like_ids {
for (ctrl_id, ctrl_name) in &control_ids {
control_links.push(datasynth_graph::ControlLinkInput {
standard_id: sox_id.clone(),
control_id: ctrl_id.to_string(),
control_name: ctrl_name.to_string(),
});
}
}
builder.add_control_links(&control_links);
}
if cr_config.graph.include_company_links {
let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
.iter()
.enumerate()
.map(|(i, f)| datasynth_graph::FilingNodeInput {
filing_id: format!("F{:04}", i + 1),
filing_type: f.filing_type.to_string(),
company_code: f.company_code.clone(),
jurisdiction: f.jurisdiction.clone(),
status: format!("{:?}", f.status),
})
.collect();
builder.add_filings(&filing_inputs);
}
let graph = builder.build();
info!(
" Compliance graph: {} nodes, {} edges",
graph.nodes.len(),
graph.edges.len()
);
Some(graph)
} else {
None
};
self.check_resources_with_log("post-compliance-regulations")?;
Ok(ComplianceRegulationsSnapshot {
standard_records,
cross_reference_records,
jurisdiction_records,
audit_procedures,
findings,
filings,
compliance_graph,
})
}
fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
use super::lineage::LineageGraphBuilder;
let mut builder = LineageGraphBuilder::new();
builder.add_config_section("config:global", "Global Config");
builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
builder.add_config_section("config:transactions", "Transaction Config");
builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
builder.add_generator_phase("phase:je", "Journal Entry Generation");
builder.configured_by("phase:coa", "config:chart_of_accounts");
builder.configured_by("phase:je", "config:transactions");
builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
builder.produced_by("output:je", "phase:je");
if self.phase_config.generate_master_data {
builder.add_config_section("config:master_data", "Master Data Config");
builder.add_generator_phase("phase:master_data", "Master Data Generation");
builder.configured_by("phase:master_data", "config:master_data");
builder.input_to("phase:master_data", "phase:je");
}
if self.phase_config.generate_document_flows {
builder.add_config_section("config:document_flows", "Document Flow Config");
builder.add_generator_phase("phase:p2p", "P2P Document Flow");
builder.add_generator_phase("phase:o2c", "O2C Document Flow");
builder.configured_by("phase:p2p", "config:document_flows");
builder.configured_by("phase:o2c", "config:document_flows");
builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
builder.produced_by("output:po", "phase:p2p");
builder.produced_by("output:gr", "phase:p2p");
builder.produced_by("output:vi", "phase:p2p");
builder.produced_by("output:so", "phase:o2c");
builder.produced_by("output:ci", "phase:o2c");
}
if self.phase_config.inject_anomalies {
builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
builder.configured_by("phase:anomaly", "config:fraud");
builder.add_output_file(
"output:labels",
"Anomaly Labels",
"labels/anomaly_labels.csv",
);
builder.produced_by("output:labels", "phase:anomaly");
}
if self.phase_config.generate_audit {
builder.add_config_section("config:audit", "Audit Config");
builder.add_generator_phase("phase:audit", "Audit Data Generation");
builder.configured_by("phase:audit", "config:audit");
}
if self.phase_config.generate_banking {
builder.add_config_section("config:banking", "Banking Config");
builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
builder.configured_by("phase:banking", "config:banking");
}
if self.config.llm.enabled {
builder.add_config_section("config:llm", "LLM Enrichment Config");
builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
builder.configured_by("phase:llm_enrichment", "config:llm");
}
if self.config.diffusion.enabled {
builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
builder.configured_by("phase:diffusion", "config:diffusion");
}
if self.config.causal.enabled {
builder.add_config_section("config:causal", "Causal Generation Config");
builder.add_generator_phase("phase:causal", "Causal Overlay");
builder.configured_by("phase:causal", "config:causal");
}
builder.build()
}
fn compute_company_revenue(
entries: &[JournalEntry],
company_code: &str,
) -> rust_decimal::Decimal {
use rust_decimal::Decimal;
let mut revenue = Decimal::ZERO;
for je in entries {
if je.header.company_code != company_code {
continue;
}
for line in &je.lines {
if line.gl_account.starts_with('4') {
revenue += line.credit_amount - line.debit_amount;
}
}
}
revenue.max(Decimal::ZERO)
}
fn compute_entity_net_assets(
entries: &[JournalEntry],
entity_code: &str,
) -> rust_decimal::Decimal {
use rust_decimal::Decimal;
let mut asset_net = Decimal::ZERO;
let mut liability_net = Decimal::ZERO;
for je in entries {
if je.header.company_code != entity_code {
continue;
}
for line in &je.lines {
if line.gl_account.starts_with('1') {
asset_net += line.debit_amount - line.credit_amount;
} else if line.gl_account.starts_with('2') {
liability_net += line.credit_amount - line.debit_amount;
}
}
}
asset_net - liability_net
}
}
fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
match format {
datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
}
}
fn compute_trial_balance_entries(
entries: &[JournalEntry],
entity_code: &str,
fiscal_year: i32,
coa: Option<&ChartOfAccounts>,
) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
use std::collections::BTreeMap;
let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
BTreeMap::new();
for je in entries {
for line in &je.lines {
let entry = balances.entry(line.account_code.clone()).or_default();
entry.0 += line.debit_amount;
entry.1 += line.credit_amount;
}
}
balances
.into_iter()
.map(
|(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
account_description: coa
.and_then(|c| c.get_account(&account_code))
.map(|a| a.description().to_string())
.unwrap_or_else(|| account_code.clone()),
account_code,
debit_balance: debit,
credit_balance: credit,
net_balance: debit - credit,
entity_code: entity_code.to_string(),
period: format!("FY{}", fiscal_year),
},
)
.collect()
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use datasynth_config::schema::*;
fn create_test_config() -> GeneratorConfig {
GeneratorConfig {
global: GlobalConfig {
industry: IndustrySector::Manufacturing,
start_date: "2024-01-01".to_string(),
period_months: 1,
seed: Some(42),
parallel: false,
group_currency: "USD".to_string(),
presentation_currency: None,
worker_threads: 0,
memory_limit_mb: 0,
fiscal_year_months: None,
},
companies: vec![CompanyConfig {
code: "1000".to_string(),
name: "Test Company".to_string(),
currency: "USD".to_string(),
functional_currency: None,
country: "US".to_string(),
annual_transaction_volume: TransactionVolume::TenK,
volume_weight: 1.0,
fiscal_year_variant: "K4".to_string(),
}],
chart_of_accounts: ChartOfAccountsConfig {
complexity: CoAComplexity::Small,
industry_specific: true,
custom_accounts: None,
min_hierarchy_depth: 2,
max_hierarchy_depth: 4,
},
transactions: TransactionConfig::default(),
output: OutputConfig::default(),
fraud: FraudConfig::default(),
internal_controls: InternalControlsConfig::default(),
business_processes: BusinessProcessConfig::default(),
user_personas: UserPersonaConfig::default(),
templates: TemplateConfig::default(),
approval: ApprovalConfig::default(),
departments: DepartmentConfig::default(),
master_data: MasterDataConfig::default(),
document_flows: DocumentFlowConfig::default(),
intercompany: IntercompanyConfig::default(),
balance: BalanceConfig::default(),
ocpm: OcpmConfig::default(),
audit: AuditGenerationConfig::default(),
banking: datasynth_banking::BankingConfig::default(),
data_quality: DataQualitySchemaConfig::default(),
scenario: ScenarioConfig::default(),
temporal: TemporalDriftConfig::default(),
graph_export: GraphExportConfig::default(),
streaming: StreamingSchemaConfig::default(),
rate_limit: RateLimitSchemaConfig::default(),
temporal_attributes: TemporalAttributeSchemaConfig::default(),
relationships: RelationshipSchemaConfig::default(),
accounting_standards: AccountingStandardsConfig::default(),
audit_standards: AuditStandardsConfig::default(),
distributions: Default::default(),
temporal_patterns: Default::default(),
vendor_network: VendorNetworkSchemaConfig::default(),
customer_segmentation: CustomerSegmentationSchemaConfig::default(),
relationship_strength: RelationshipStrengthSchemaConfig::default(),
cross_process_links: CrossProcessLinksSchemaConfig::default(),
organizational_events: OrganizationalEventsSchemaConfig::default(),
behavioral_drift: BehavioralDriftSchemaConfig::default(),
market_drift: MarketDriftSchemaConfig::default(),
drift_labeling: DriftLabelingSchemaConfig::default(),
anomaly_injection: Default::default(),
industry_specific: Default::default(),
fingerprint_privacy: Default::default(),
quality_gates: Default::default(),
compliance: Default::default(),
webhooks: Default::default(),
llm: Default::default(),
diffusion: Default::default(),
causal: Default::default(),
source_to_pay: Default::default(),
financial_reporting: Default::default(),
hr: Default::default(),
manufacturing: Default::default(),
sales_quotes: Default::default(),
tax: Default::default(),
treasury: Default::default(),
project_accounting: Default::default(),
esg: Default::default(),
country_packs: None,
scenarios: Default::default(),
session: Default::default(),
compliance_regulations: Default::default(),
}
}
#[test]
fn test_enhanced_orchestrator_creation() {
let config = create_test_config();
let orchestrator = EnhancedOrchestrator::with_defaults(config);
assert!(orchestrator.is_ok());
}
#[test]
fn test_minimal_generation() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate();
assert!(result.is_ok());
let result = result.unwrap();
assert!(!result.journal_entries.is_empty());
}
#[test]
fn test_master_data_generation() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: false,
inject_anomalies: false,
show_progress: false,
vendors_per_company: 5,
customers_per_company: 5,
materials_per_company: 10,
assets_per_company: 5,
employees_per_company: 10,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(!result.master_data.vendors.is_empty());
assert!(!result.master_data.customers.is_empty());
assert!(!result.master_data.materials.is_empty());
}
#[test]
fn test_document_flow_generation() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: false,
inject_anomalies: false,
inject_data_quality: false,
validate_balances: false,
generate_ocpm_events: false,
show_progress: false,
vendors_per_company: 5,
customers_per_company: 5,
materials_per_company: 10,
assets_per_company: 5,
employees_per_company: 10,
p2p_chains: 5,
o2c_chains: 5,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(!result.document_flows.p2p_chains.is_empty());
assert!(!result.document_flows.o2c_chains.is_empty());
assert!(!result.document_flows.purchase_orders.is_empty());
assert!(!result.document_flows.sales_orders.is_empty());
}
#[test]
fn test_anomaly_injection() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: true,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(!result.journal_entries.is_empty());
assert!(result.anomaly_labels.summary.is_some());
}
#[test]
fn test_full_generation_pipeline() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: true,
inject_anomalies: false,
inject_data_quality: false,
validate_balances: true,
generate_ocpm_events: false,
show_progress: false,
vendors_per_company: 3,
customers_per_company: 3,
materials_per_company: 5,
assets_per_company: 3,
employees_per_company: 5,
p2p_chains: 3,
o2c_chains: 3,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(!result.master_data.vendors.is_empty());
assert!(!result.master_data.customers.is_empty());
assert!(!result.document_flows.p2p_chains.is_empty());
assert!(!result.document_flows.o2c_chains.is_empty());
assert!(!result.journal_entries.is_empty());
assert!(result.statistics.accounts_count > 0);
assert!(!result.subledger.ap_invoices.is_empty());
assert!(!result.subledger.ar_invoices.is_empty());
assert!(result.balance_validation.validated);
assert!(result.balance_validation.entries_processed > 0);
}
#[test]
fn test_subledger_linking() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: false,
inject_anomalies: false,
inject_data_quality: false,
validate_balances: false,
generate_ocpm_events: false,
show_progress: false,
vendors_per_company: 5,
customers_per_company: 5,
materials_per_company: 10,
assets_per_company: 3,
employees_per_company: 5,
p2p_chains: 5,
o2c_chains: 5,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(!result.document_flows.vendor_invoices.is_empty());
assert!(!result.document_flows.customer_invoices.is_empty());
assert!(!result.subledger.ap_invoices.is_empty());
assert!(!result.subledger.ar_invoices.is_empty());
assert_eq!(
result.subledger.ap_invoices.len(),
result.document_flows.vendor_invoices.len()
);
assert_eq!(
result.subledger.ar_invoices.len(),
result.document_flows.customer_invoices.len()
);
assert_eq!(
result.statistics.ap_invoice_count,
result.subledger.ap_invoices.len()
);
assert_eq!(
result.statistics.ar_invoice_count,
result.subledger.ar_invoices.len()
);
}
#[test]
fn test_balance_validation() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
validate_balances: true,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(result.balance_validation.validated);
assert!(result.balance_validation.entries_processed > 0);
assert!(!result.balance_validation.has_unbalanced_entries);
assert_eq!(
result.balance_validation.total_debits,
result.balance_validation.total_credits
);
}
#[test]
fn test_statistics_accuracy() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
vendors_per_company: 10,
customers_per_company: 20,
materials_per_company: 15,
assets_per_company: 5,
employees_per_company: 8,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert_eq!(
result.statistics.vendor_count,
result.master_data.vendors.len()
);
assert_eq!(
result.statistics.customer_count,
result.master_data.customers.len()
);
assert_eq!(
result.statistics.material_count,
result.master_data.materials.len()
);
assert_eq!(
result.statistics.total_entries as usize,
result.journal_entries.len()
);
}
#[test]
fn test_phase_config_defaults() {
let config = PhaseConfig::default();
assert!(config.generate_master_data);
assert!(config.generate_document_flows);
assert!(config.generate_journal_entries);
assert!(!config.inject_anomalies);
assert!(config.validate_balances);
assert!(config.show_progress);
assert!(config.vendors_per_company > 0);
assert!(config.customers_per_company > 0);
}
#[test]
fn test_get_coa_before_generation() {
let config = create_test_config();
let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
assert!(orchestrator.get_coa().is_none());
}
#[test]
fn test_get_coa_after_generation() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let _ = orchestrator.generate().unwrap();
assert!(orchestrator.get_coa().is_some());
}
#[test]
fn test_get_master_data() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: false,
inject_anomalies: false,
show_progress: false,
vendors_per_company: 5,
customers_per_company: 5,
materials_per_company: 5,
assets_per_company: 5,
employees_per_company: 5,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(!result.master_data.vendors.is_empty());
}
#[test]
fn test_with_progress_builder() {
let config = create_test_config();
let orchestrator = EnhancedOrchestrator::with_defaults(config)
.unwrap()
.with_progress(false);
assert!(!orchestrator.phase_config.show_progress);
}
#[test]
fn test_multi_company_generation() {
let mut config = create_test_config();
config.companies.push(CompanyConfig {
code: "2000".to_string(),
name: "Subsidiary".to_string(),
currency: "EUR".to_string(),
functional_currency: None,
country: "DE".to_string(),
annual_transaction_volume: TransactionVolume::TenK,
volume_weight: 0.5,
fiscal_year_variant: "K4".to_string(),
});
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
vendors_per_company: 5,
customers_per_company: 5,
materials_per_company: 5,
assets_per_company: 5,
employees_per_company: 5,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
assert!(result.statistics.companies_count == 2);
}
#[test]
fn test_empty_master_data_skips_document_flows() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(result.document_flows.p2p_chains.is_empty());
assert!(result.document_flows.o2c_chains.is_empty());
}
#[test]
fn test_journal_entry_line_item_count() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
let calculated_line_items: u64 = result
.journal_entries
.iter()
.map(|e| e.line_count() as u64)
.sum();
assert_eq!(result.statistics.total_line_items, calculated_line_items);
}
#[test]
fn test_audit_generation() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
generate_audit: true,
audit_engagements: 2,
workpapers_per_engagement: 5,
evidence_per_workpaper: 2,
risks_per_engagement: 3,
findings_per_engagement: 2,
judgments_per_engagement: 2,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert_eq!(result.audit.engagements.len(), 2);
assert!(!result.audit.workpapers.is_empty());
assert!(!result.audit.evidence.is_empty());
assert!(!result.audit.risk_assessments.is_empty());
assert!(!result.audit.findings.is_empty());
assert!(!result.audit.judgments.is_empty());
assert!(
!result.audit.confirmations.is_empty(),
"ISA 505 confirmations should be generated"
);
assert!(
!result.audit.confirmation_responses.is_empty(),
"ISA 505 confirmation responses should be generated"
);
assert!(
!result.audit.procedure_steps.is_empty(),
"ISA 330 procedure steps should be generated"
);
assert!(
!result.audit.analytical_results.is_empty(),
"ISA 520 analytical procedures should be generated"
);
assert!(
!result.audit.ia_functions.is_empty(),
"ISA 610 IA functions should be generated (one per engagement)"
);
assert!(
!result.audit.related_parties.is_empty(),
"ISA 550 related parties should be generated"
);
assert_eq!(
result.statistics.audit_engagement_count,
result.audit.engagements.len()
);
assert_eq!(
result.statistics.audit_workpaper_count,
result.audit.workpapers.len()
);
assert_eq!(
result.statistics.audit_evidence_count,
result.audit.evidence.len()
);
assert_eq!(
result.statistics.audit_risk_count,
result.audit.risk_assessments.len()
);
assert_eq!(
result.statistics.audit_finding_count,
result.audit.findings.len()
);
assert_eq!(
result.statistics.audit_judgment_count,
result.audit.judgments.len()
);
assert_eq!(
result.statistics.audit_confirmation_count,
result.audit.confirmations.len()
);
assert_eq!(
result.statistics.audit_confirmation_response_count,
result.audit.confirmation_responses.len()
);
assert_eq!(
result.statistics.audit_procedure_step_count,
result.audit.procedure_steps.len()
);
assert_eq!(
result.statistics.audit_sample_count,
result.audit.samples.len()
);
assert_eq!(
result.statistics.audit_analytical_result_count,
result.audit.analytical_results.len()
);
assert_eq!(
result.statistics.audit_ia_function_count,
result.audit.ia_functions.len()
);
assert_eq!(
result.statistics.audit_ia_report_count,
result.audit.ia_reports.len()
);
assert_eq!(
result.statistics.audit_related_party_count,
result.audit.related_parties.len()
);
assert_eq!(
result.statistics.audit_related_party_transaction_count,
result.audit.related_party_transactions.len()
);
}
#[test]
fn test_new_phases_disabled_by_default() {
let config = create_test_config();
assert!(!config.llm.enabled);
assert!(!config.diffusion.enabled);
assert!(!config.causal.enabled);
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert_eq!(result.statistics.llm_enrichment_ms, 0);
assert_eq!(result.statistics.llm_vendors_enriched, 0);
assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
assert_eq!(result.statistics.diffusion_samples_generated, 0);
assert_eq!(result.statistics.causal_generation_ms, 0);
assert_eq!(result.statistics.causal_samples_generated, 0);
assert!(result.statistics.causal_validation_passed.is_none());
assert_eq!(result.statistics.counterfactual_pair_count, 0);
assert!(result.counterfactual_pairs.is_empty());
}
#[test]
fn test_counterfactual_generation_enabled() {
let config = create_test_config();
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
generate_counterfactuals: true,
generate_period_close: false, ..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
if !result.journal_entries.is_empty() {
assert_eq!(
result.counterfactual_pairs.len(),
result.journal_entries.len()
);
assert_eq!(
result.statistics.counterfactual_pair_count,
result.journal_entries.len()
);
let ids: std::collections::HashSet<_> = result
.counterfactual_pairs
.iter()
.map(|p| p.pair_id.clone())
.collect();
assert_eq!(ids.len(), result.counterfactual_pairs.len());
}
}
#[test]
fn test_llm_enrichment_enabled() {
let mut config = create_test_config();
config.llm.enabled = true;
config.llm.max_vendor_enrichments = 3;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: false,
inject_anomalies: false,
show_progress: false,
vendors_per_company: 5,
customers_per_company: 3,
materials_per_company: 3,
assets_per_company: 3,
employees_per_company: 3,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(result.statistics.llm_vendors_enriched > 0);
assert!(result.statistics.llm_vendors_enriched <= 3);
}
#[test]
fn test_diffusion_enhancement_enabled() {
let mut config = create_test_config();
config.diffusion.enabled = true;
config.diffusion.n_steps = 50;
config.diffusion.sample_size = 20;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert_eq!(result.statistics.diffusion_samples_generated, 20);
}
#[test]
fn test_causal_overlay_enabled() {
let mut config = create_test_config();
config.causal.enabled = true;
config.causal.template = "fraud_detection".to_string();
config.causal.sample_size = 100;
config.causal.validate = true;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert_eq!(result.statistics.causal_samples_generated, 100);
assert!(result.statistics.causal_validation_passed.is_some());
}
#[test]
fn test_causal_overlay_revenue_cycle_template() {
let mut config = create_test_config();
config.causal.enabled = true;
config.causal.template = "revenue_cycle".to_string();
config.causal.sample_size = 50;
config.causal.validate = false;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert_eq!(result.statistics.causal_samples_generated, 50);
assert!(result.statistics.causal_validation_passed.is_none());
}
#[test]
fn test_all_new_phases_enabled_together() {
let mut config = create_test_config();
config.llm.enabled = true;
config.llm.max_vendor_enrichments = 2;
config.diffusion.enabled = true;
config.diffusion.n_steps = 20;
config.diffusion.sample_size = 10;
config.causal.enabled = true;
config.causal.sample_size = 50;
config.causal.validate = true;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
vendors_per_company: 5,
customers_per_company: 3,
materials_per_company: 3,
assets_per_company: 3,
employees_per_company: 3,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
let result = orchestrator.generate().unwrap();
assert!(result.statistics.llm_vendors_enriched > 0);
assert_eq!(result.statistics.diffusion_samples_generated, 10);
assert_eq!(result.statistics.causal_samples_generated, 50);
assert!(result.statistics.causal_validation_passed.is_some());
}
#[test]
fn test_statistics_serialization_with_new_fields() {
let stats = EnhancedGenerationStatistics {
total_entries: 100,
total_line_items: 500,
llm_enrichment_ms: 42,
llm_vendors_enriched: 10,
diffusion_enhancement_ms: 100,
diffusion_samples_generated: 50,
causal_generation_ms: 200,
causal_samples_generated: 100,
causal_validation_passed: Some(true),
..Default::default()
};
let json = serde_json::to_string(&stats).unwrap();
let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.llm_enrichment_ms, 42);
assert_eq!(deserialized.llm_vendors_enriched, 10);
assert_eq!(deserialized.diffusion_enhancement_ms, 100);
assert_eq!(deserialized.diffusion_samples_generated, 50);
assert_eq!(deserialized.causal_generation_ms, 200);
assert_eq!(deserialized.causal_samples_generated, 100);
assert_eq!(deserialized.causal_validation_passed, Some(true));
}
#[test]
fn test_statistics_backward_compat_deserialization() {
let old_json = r#"{
"total_entries": 100,
"total_line_items": 500,
"accounts_count": 50,
"companies_count": 1,
"period_months": 12,
"vendor_count": 10,
"customer_count": 20,
"material_count": 15,
"asset_count": 5,
"employee_count": 8,
"p2p_chain_count": 5,
"o2c_chain_count": 5,
"ap_invoice_count": 5,
"ar_invoice_count": 5,
"ocpm_event_count": 0,
"ocpm_object_count": 0,
"ocpm_case_count": 0,
"audit_engagement_count": 0,
"audit_workpaper_count": 0,
"audit_evidence_count": 0,
"audit_risk_count": 0,
"audit_finding_count": 0,
"audit_judgment_count": 0,
"anomalies_injected": 0,
"data_quality_issues": 0,
"banking_customer_count": 0,
"banking_account_count": 0,
"banking_transaction_count": 0,
"banking_suspicious_count": 0,
"graph_export_count": 0,
"graph_node_count": 0,
"graph_edge_count": 0
}"#;
let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
assert_eq!(stats.llm_enrichment_ms, 0);
assert_eq!(stats.llm_vendors_enriched, 0);
assert_eq!(stats.diffusion_enhancement_ms, 0);
assert_eq!(stats.diffusion_samples_generated, 0);
assert_eq!(stats.causal_generation_ms, 0);
assert_eq!(stats.causal_samples_generated, 0);
assert!(stats.causal_validation_passed.is_none());
}
}