1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335 pub generate_legal_documents: bool,
339 pub generate_it_controls: bool,
343 pub generate_analytics_metadata: bool,
348}
349
350impl Default for PhaseConfig {
351 fn default() -> Self {
352 Self {
353 generate_master_data: true,
354 generate_document_flows: true,
355 generate_ocpm_events: false, generate_journal_entries: true,
357 inject_anomalies: false,
358 inject_data_quality: false, validate_balances: true,
360 show_progress: true,
361 vendors_per_company: 50,
362 customers_per_company: 100,
363 materials_per_company: 200,
364 assets_per_company: 50,
365 employees_per_company: 100,
366 p2p_chains: 100,
367 o2c_chains: 100,
368 generate_audit: false, audit_engagements: 5,
370 workpapers_per_engagement: 20,
371 evidence_per_workpaper: 5,
372 risks_per_engagement: 15,
373 findings_per_engagement: 8,
374 judgments_per_engagement: 10,
375 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
397 }
398}
399
400impl PhaseConfig {
401 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
406 Self {
407 generate_master_data: true,
409 generate_document_flows: true,
410 generate_journal_entries: true,
411 validate_balances: true,
412 generate_period_close: true,
413 generate_evolution_events: true,
414 show_progress: true,
415
416 generate_audit: cfg.audit.enabled,
418 generate_banking: cfg.banking.enabled,
419 generate_graph_export: cfg.graph_export.enabled,
420 generate_sourcing: cfg.source_to_pay.enabled,
421 generate_intercompany: cfg.intercompany.enabled,
422 generate_financial_statements: cfg.financial_reporting.enabled,
423 generate_bank_reconciliation: cfg.financial_reporting.enabled,
424 generate_accounting_standards: cfg.accounting_standards.enabled,
425 generate_manufacturing: cfg.manufacturing.enabled,
426 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
427 generate_tax: cfg.tax.enabled,
428 generate_esg: cfg.esg.enabled,
429 generate_ocpm_events: cfg.ocpm.enabled,
430 generate_compliance_regulations: cfg.compliance_regulations.enabled,
431 generate_hr: cfg.hr.enabled,
432 generate_treasury: cfg.treasury.enabled,
433 generate_project_accounting: cfg.project_accounting.enabled,
434
435 generate_legal_documents: cfg.compliance_regulations.enabled
439 && cfg.compliance_regulations.legal_documents.enabled,
440 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
443 generate_analytics_metadata: cfg.analytics_metadata.enabled,
446
447 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
449
450 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
451 inject_data_quality: cfg.data_quality.enabled,
452
453 vendors_per_company: 50,
455 customers_per_company: 100,
456 materials_per_company: 200,
457 assets_per_company: 50,
458 employees_per_company: 100,
459 p2p_chains: 100,
460 o2c_chains: 100,
461 audit_engagements: 5,
462 workpapers_per_engagement: 20,
463 evidence_per_workpaper: 5,
464 risks_per_engagement: 15,
465 findings_per_engagement: 8,
466 judgments_per_engagement: 10,
467 }
468 }
469}
470
471#[derive(Debug, Clone, Default)]
473pub struct MasterDataSnapshot {
474 pub vendors: Vec<Vendor>,
476 pub customers: Vec<Customer>,
478 pub materials: Vec<Material>,
480 pub assets: Vec<FixedAsset>,
482 pub employees: Vec<Employee>,
484 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
486 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
488 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
492}
493
494#[derive(Debug, Clone)]
496pub struct HypergraphExportInfo {
497 pub node_count: usize,
499 pub edge_count: usize,
501 pub hyperedge_count: usize,
503 pub output_path: PathBuf,
505}
506
507#[derive(Debug, Clone, Default)]
509pub struct DocumentFlowSnapshot {
510 pub p2p_chains: Vec<P2PDocumentChain>,
512 pub o2c_chains: Vec<O2CDocumentChain>,
514 pub purchase_orders: Vec<documents::PurchaseOrder>,
516 pub goods_receipts: Vec<documents::GoodsReceipt>,
518 pub vendor_invoices: Vec<documents::VendorInvoice>,
520 pub sales_orders: Vec<documents::SalesOrder>,
522 pub deliveries: Vec<documents::Delivery>,
524 pub customer_invoices: Vec<documents::CustomerInvoice>,
526 pub payments: Vec<documents::Payment>,
528 pub document_references: Vec<documents::DocumentReference>,
531}
532
533#[derive(Debug, Clone, Default)]
535pub struct SubledgerSnapshot {
536 pub ap_invoices: Vec<APInvoice>,
538 pub ar_invoices: Vec<ARInvoice>,
540 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
542 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
544 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
546 pub ar_aging_reports: Vec<ARAgingReport>,
548 pub ap_aging_reports: Vec<APAgingReport>,
550 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
552 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
554 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
556 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
558}
559
560#[derive(Debug, Clone, Default)]
562pub struct OcpmSnapshot {
563 pub event_log: Option<OcpmEventLog>,
565 pub event_count: usize,
567 pub object_count: usize,
569 pub case_count: usize,
571}
572
573#[derive(Debug, Clone, Default)]
575pub struct AuditSnapshot {
576 pub engagements: Vec<AuditEngagement>,
578 pub workpapers: Vec<Workpaper>,
580 pub evidence: Vec<AuditEvidence>,
582 pub risk_assessments: Vec<RiskAssessment>,
584 pub findings: Vec<AuditFinding>,
586 pub judgments: Vec<ProfessionalJudgment>,
588 pub confirmations: Vec<ExternalConfirmation>,
590 pub confirmation_responses: Vec<ConfirmationResponse>,
592 pub procedure_steps: Vec<AuditProcedureStep>,
594 pub samples: Vec<AuditSample>,
596 pub analytical_results: Vec<AnalyticalProcedureResult>,
598 pub ia_functions: Vec<InternalAuditFunction>,
600 pub ia_reports: Vec<InternalAuditReport>,
602 pub related_parties: Vec<RelatedParty>,
604 pub related_party_transactions: Vec<RelatedPartyTransaction>,
606 pub component_auditors: Vec<ComponentAuditor>,
609 pub group_audit_plan: Option<GroupAuditPlan>,
611 pub component_instructions: Vec<ComponentInstruction>,
613 pub component_reports: Vec<ComponentAuditorReport>,
615 pub engagement_letters: Vec<EngagementLetter>,
618 pub subsequent_events: Vec<SubsequentEvent>,
621 pub service_organizations: Vec<ServiceOrganization>,
624 pub soc_reports: Vec<SocReport>,
626 pub user_entity_controls: Vec<UserEntityControl>,
628 pub going_concern_assessments:
631 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
632 pub accounting_estimates:
635 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
636 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
639 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
641 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
644 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
646 pub materiality_calculations:
649 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
650 pub combined_risk_assessments:
653 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
654 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
657 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
659 pub significant_transaction_classes:
662 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
663 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
666 pub analytical_relationships:
669 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
670 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
673 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
676 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
679 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
684 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
690 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
694 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
697}
698
699#[derive(Debug, Clone, Default)]
701pub struct BankingSnapshot {
702 pub customers: Vec<BankingCustomer>,
704 pub accounts: Vec<BankAccount>,
706 pub transactions: Vec<BankTransaction>,
708 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
710 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
712 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
714 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
716 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
718 pub suspicious_count: usize,
720 pub scenario_count: usize,
722}
723
724#[derive(Debug, Clone, Default, Serialize)]
726pub struct GraphExportSnapshot {
727 pub exported: bool,
729 pub graph_count: usize,
731 pub exports: HashMap<String, GraphExportInfo>,
733}
734
735#[derive(Debug, Clone, Serialize)]
737pub struct GraphExportInfo {
738 pub name: String,
740 pub format: String,
742 pub output_path: PathBuf,
744 pub node_count: usize,
746 pub edge_count: usize,
748}
749
750#[derive(Debug, Clone, Default)]
752pub struct SourcingSnapshot {
753 pub spend_analyses: Vec<SpendAnalysis>,
755 pub sourcing_projects: Vec<SourcingProject>,
757 pub qualifications: Vec<SupplierQualification>,
759 pub rfx_events: Vec<RfxEvent>,
761 pub bids: Vec<SupplierBid>,
763 pub bid_evaluations: Vec<BidEvaluation>,
765 pub contracts: Vec<ProcurementContract>,
767 pub catalog_items: Vec<CatalogItem>,
769 pub scorecards: Vec<SupplierScorecard>,
771}
772
773#[derive(Debug, Clone, Serialize, Deserialize)]
775pub struct PeriodTrialBalance {
776 pub fiscal_year: u16,
778 pub fiscal_period: u8,
780 pub period_start: NaiveDate,
782 pub period_end: NaiveDate,
784 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
786}
787
788#[derive(Debug, Clone, Default)]
790pub struct FinancialReportingSnapshot {
791 pub financial_statements: Vec<FinancialStatement>,
794 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
797 pub consolidated_statements: Vec<FinancialStatement>,
799 pub consolidation_schedules: Vec<ConsolidationSchedule>,
801 pub bank_reconciliations: Vec<BankReconciliation>,
803 pub trial_balances: Vec<PeriodTrialBalance>,
805 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
807 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
809 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct HrSnapshot {
816 pub payroll_runs: Vec<PayrollRun>,
818 pub payroll_line_items: Vec<PayrollLineItem>,
820 pub time_entries: Vec<TimeEntry>,
822 pub expense_reports: Vec<ExpenseReport>,
824 pub benefit_enrollments: Vec<BenefitEnrollment>,
826 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
828 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
830 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
832 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
834 pub pension_journal_entries: Vec<JournalEntry>,
836 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
838 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
840 pub stock_comp_journal_entries: Vec<JournalEntry>,
842 pub payroll_run_count: usize,
844 pub payroll_line_item_count: usize,
846 pub time_entry_count: usize,
848 pub expense_report_count: usize,
850 pub benefit_enrollment_count: usize,
852 pub pension_plan_count: usize,
854 pub stock_grant_count: usize,
856}
857
858#[derive(Debug, Clone, Default)]
860pub struct AccountingStandardsSnapshot {
861 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
863 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
865 pub business_combinations:
867 Vec<datasynth_core::models::business_combination::BusinessCombination>,
868 pub business_combination_journal_entries: Vec<JournalEntry>,
870 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
872 pub ecl_provision_movements:
874 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
875 pub ecl_journal_entries: Vec<JournalEntry>,
877 pub provisions: Vec<datasynth_core::models::provision::Provision>,
879 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
881 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
883 pub provision_journal_entries: Vec<JournalEntry>,
885 pub currency_translation_results:
887 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
888 pub revenue_contract_count: usize,
890 pub impairment_test_count: usize,
892 pub business_combination_count: usize,
894 pub ecl_model_count: usize,
896 pub provision_count: usize,
898 pub currency_translation_count: usize,
900 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
904 pub fair_value_measurements:
906 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
907 pub framework_differences:
909 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
910 pub framework_reconciliations:
912 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
913 pub lease_count: usize,
915 pub fair_value_measurement_count: usize,
916 pub framework_difference_count: usize,
917}
918
919#[derive(Debug, Clone, Default)]
921pub struct ComplianceRegulationsSnapshot {
922 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
924 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
926 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
928 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
930 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
932 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
934 pub compliance_graph: Option<datasynth_graph::Graph>,
936}
937
938#[derive(Debug, Clone, Default)]
940pub struct ManufacturingSnapshot {
941 pub production_orders: Vec<ProductionOrder>,
943 pub quality_inspections: Vec<QualityInspection>,
945 pub cycle_counts: Vec<CycleCount>,
947 pub bom_components: Vec<BomComponent>,
949 pub inventory_movements: Vec<InventoryMovement>,
951 pub production_order_count: usize,
953 pub quality_inspection_count: usize,
955 pub cycle_count_count: usize,
957 pub bom_component_count: usize,
959 pub inventory_movement_count: usize,
961}
962
963#[derive(Debug, Clone, Default)]
965pub struct SalesKpiBudgetsSnapshot {
966 pub sales_quotes: Vec<SalesQuote>,
968 pub kpis: Vec<ManagementKpi>,
970 pub budgets: Vec<Budget>,
972 pub sales_quote_count: usize,
974 pub kpi_count: usize,
976 pub budget_line_count: usize,
978}
979
980#[derive(Debug, Clone, Default)]
982pub struct AnomalyLabels {
983 pub labels: Vec<LabeledAnomaly>,
985 pub summary: Option<AnomalySummary>,
987 pub by_type: HashMap<String, usize>,
989}
990
991#[derive(Debug, Clone, Default)]
993pub struct BalanceValidationResult {
994 pub validated: bool,
996 pub is_balanced: bool,
998 pub entries_processed: u64,
1000 pub total_debits: rust_decimal::Decimal,
1002 pub total_credits: rust_decimal::Decimal,
1004 pub accounts_tracked: usize,
1006 pub companies_tracked: usize,
1008 pub validation_errors: Vec<ValidationError>,
1010 pub has_unbalanced_entries: bool,
1012}
1013
1014#[derive(Debug, Clone, Default)]
1016pub struct TaxSnapshot {
1017 pub jurisdictions: Vec<TaxJurisdiction>,
1019 pub codes: Vec<TaxCode>,
1021 pub tax_lines: Vec<TaxLine>,
1023 pub tax_returns: Vec<TaxReturn>,
1025 pub tax_provisions: Vec<TaxProvision>,
1027 pub withholding_records: Vec<WithholdingTaxRecord>,
1029 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1031 pub jurisdiction_count: usize,
1033 pub code_count: usize,
1035 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1037 pub tax_posting_journal_entries: Vec<JournalEntry>,
1039}
1040
1041#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1043pub struct IntercompanySnapshot {
1044 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1046 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1048 pub seller_journal_entries: Vec<JournalEntry>,
1050 pub buyer_journal_entries: Vec<JournalEntry>,
1052 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1054 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1056 #[serde(skip)]
1058 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1059 pub matched_pair_count: usize,
1061 pub elimination_entry_count: usize,
1063 pub match_rate: f64,
1065}
1066
1067#[derive(Debug, Clone, Default)]
1069pub struct EsgSnapshot {
1070 pub emissions: Vec<EmissionRecord>,
1072 pub energy: Vec<EnergyConsumption>,
1074 pub water: Vec<WaterUsage>,
1076 pub waste: Vec<WasteRecord>,
1078 pub diversity: Vec<WorkforceDiversityMetric>,
1080 pub pay_equity: Vec<PayEquityMetric>,
1082 pub safety_incidents: Vec<SafetyIncident>,
1084 pub safety_metrics: Vec<SafetyMetric>,
1086 pub governance: Vec<GovernanceMetric>,
1088 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1090 pub materiality: Vec<MaterialityAssessment>,
1092 pub disclosures: Vec<EsgDisclosure>,
1094 pub climate_scenarios: Vec<ClimateScenario>,
1096 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1098 pub emission_count: usize,
1100 pub disclosure_count: usize,
1102}
1103
1104#[derive(Debug, Clone, Default)]
1106pub struct TreasurySnapshot {
1107 pub cash_positions: Vec<CashPosition>,
1109 pub cash_forecasts: Vec<CashForecast>,
1111 pub cash_pools: Vec<CashPool>,
1113 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1115 pub hedging_instruments: Vec<HedgingInstrument>,
1117 pub hedge_relationships: Vec<HedgeRelationship>,
1119 pub debt_instruments: Vec<DebtInstrument>,
1121 pub bank_guarantees: Vec<BankGuarantee>,
1123 pub netting_runs: Vec<NettingRun>,
1125 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1127 pub journal_entries: Vec<JournalEntry>,
1130}
1131
1132#[derive(Debug, Clone, Default)]
1134pub struct ProjectAccountingSnapshot {
1135 pub projects: Vec<Project>,
1137 pub cost_lines: Vec<ProjectCostLine>,
1139 pub revenue_records: Vec<ProjectRevenue>,
1141 pub earned_value_metrics: Vec<EarnedValueMetric>,
1143 pub change_orders: Vec<ChangeOrder>,
1145 pub milestones: Vec<ProjectMilestone>,
1147}
1148
1149#[derive(Debug, Default)]
1151pub struct EnhancedGenerationResult {
1152 pub chart_of_accounts: ChartOfAccounts,
1154 pub master_data: MasterDataSnapshot,
1156 pub document_flows: DocumentFlowSnapshot,
1158 pub subledger: SubledgerSnapshot,
1160 pub ocpm: OcpmSnapshot,
1162 pub audit: AuditSnapshot,
1164 pub banking: BankingSnapshot,
1166 pub graph_export: GraphExportSnapshot,
1168 pub sourcing: SourcingSnapshot,
1170 pub financial_reporting: FinancialReportingSnapshot,
1172 pub hr: HrSnapshot,
1174 pub accounting_standards: AccountingStandardsSnapshot,
1176 pub manufacturing: ManufacturingSnapshot,
1178 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1180 pub tax: TaxSnapshot,
1182 pub esg: EsgSnapshot,
1184 pub treasury: TreasurySnapshot,
1186 pub project_accounting: ProjectAccountingSnapshot,
1188 pub process_evolution: Vec<ProcessEvolutionEvent>,
1190 pub organizational_events: Vec<OrganizationalEvent>,
1192 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1194 pub intercompany: IntercompanySnapshot,
1196 pub journal_entries: Vec<JournalEntry>,
1198 pub anomaly_labels: AnomalyLabels,
1200 pub balance_validation: BalanceValidationResult,
1202 pub data_quality_stats: DataQualityStats,
1204 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1206 pub statistics: EnhancedGenerationStatistics,
1208 pub lineage: Option<super::lineage::LineageGraph>,
1210 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1212 pub internal_controls: Vec<InternalControl>,
1214 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1218 pub opening_balances: Vec<GeneratedOpeningBalance>,
1220 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1222 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1224 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1226 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1228 pub temporal_vendor_chains:
1230 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1231 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1233 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1235 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1237 pub compliance_regulations: ComplianceRegulationsSnapshot,
1239 pub analytics_metadata: AnalyticsMetadataSnapshot,
1243 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1247}
1248
1249#[derive(Debug, Clone, Default)]
1251pub struct AnalyticsMetadataSnapshot {
1252 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1254 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1256 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1258 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1260}
1261
1262#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1264pub struct EnhancedGenerationStatistics {
1265 pub total_entries: u64,
1267 pub total_line_items: u64,
1269 pub accounts_count: usize,
1271 pub companies_count: usize,
1273 pub period_months: u32,
1275 pub vendor_count: usize,
1277 pub customer_count: usize,
1278 pub material_count: usize,
1279 pub asset_count: usize,
1280 pub employee_count: usize,
1281 pub p2p_chain_count: usize,
1283 pub o2c_chain_count: usize,
1284 pub ap_invoice_count: usize,
1286 pub ar_invoice_count: usize,
1287 pub ocpm_event_count: usize,
1289 pub ocpm_object_count: usize,
1290 pub ocpm_case_count: usize,
1291 pub audit_engagement_count: usize,
1293 pub audit_workpaper_count: usize,
1294 pub audit_evidence_count: usize,
1295 pub audit_risk_count: usize,
1296 pub audit_finding_count: usize,
1297 pub audit_judgment_count: usize,
1298 #[serde(default)]
1300 pub audit_confirmation_count: usize,
1301 #[serde(default)]
1302 pub audit_confirmation_response_count: usize,
1303 #[serde(default)]
1305 pub audit_procedure_step_count: usize,
1306 #[serde(default)]
1307 pub audit_sample_count: usize,
1308 #[serde(default)]
1310 pub audit_analytical_result_count: usize,
1311 #[serde(default)]
1313 pub audit_ia_function_count: usize,
1314 #[serde(default)]
1315 pub audit_ia_report_count: usize,
1316 #[serde(default)]
1318 pub audit_related_party_count: usize,
1319 #[serde(default)]
1320 pub audit_related_party_transaction_count: usize,
1321 pub anomalies_injected: usize,
1323 pub data_quality_issues: usize,
1325 pub banking_customer_count: usize,
1327 pub banking_account_count: usize,
1328 pub banking_transaction_count: usize,
1329 pub banking_suspicious_count: usize,
1330 pub graph_export_count: usize,
1332 pub graph_node_count: usize,
1333 pub graph_edge_count: usize,
1334 #[serde(default)]
1336 pub llm_enrichment_ms: u64,
1337 #[serde(default)]
1339 pub llm_vendors_enriched: usize,
1340 #[serde(default)]
1342 pub diffusion_enhancement_ms: u64,
1343 #[serde(default)]
1345 pub diffusion_samples_generated: usize,
1346 #[serde(default, skip_serializing_if = "Option::is_none")]
1349 pub neural_hybrid_weight: Option<f64>,
1350 #[serde(default, skip_serializing_if = "Option::is_none")]
1352 pub neural_hybrid_strategy: Option<String>,
1353 #[serde(default, skip_serializing_if = "Option::is_none")]
1355 pub neural_routed_column_count: Option<usize>,
1356 #[serde(default)]
1358 pub causal_generation_ms: u64,
1359 #[serde(default)]
1361 pub causal_samples_generated: usize,
1362 #[serde(default)]
1364 pub causal_validation_passed: Option<bool>,
1365 #[serde(default)]
1367 pub sourcing_project_count: usize,
1368 #[serde(default)]
1369 pub rfx_event_count: usize,
1370 #[serde(default)]
1371 pub bid_count: usize,
1372 #[serde(default)]
1373 pub contract_count: usize,
1374 #[serde(default)]
1375 pub catalog_item_count: usize,
1376 #[serde(default)]
1377 pub scorecard_count: usize,
1378 #[serde(default)]
1380 pub financial_statement_count: usize,
1381 #[serde(default)]
1382 pub bank_reconciliation_count: usize,
1383 #[serde(default)]
1385 pub payroll_run_count: usize,
1386 #[serde(default)]
1387 pub time_entry_count: usize,
1388 #[serde(default)]
1389 pub expense_report_count: usize,
1390 #[serde(default)]
1391 pub benefit_enrollment_count: usize,
1392 #[serde(default)]
1393 pub pension_plan_count: usize,
1394 #[serde(default)]
1395 pub stock_grant_count: usize,
1396 #[serde(default)]
1398 pub revenue_contract_count: usize,
1399 #[serde(default)]
1400 pub impairment_test_count: usize,
1401 #[serde(default)]
1402 pub business_combination_count: usize,
1403 #[serde(default)]
1404 pub ecl_model_count: usize,
1405 #[serde(default)]
1406 pub provision_count: usize,
1407 #[serde(default)]
1409 pub production_order_count: usize,
1410 #[serde(default)]
1411 pub quality_inspection_count: usize,
1412 #[serde(default)]
1413 pub cycle_count_count: usize,
1414 #[serde(default)]
1415 pub bom_component_count: usize,
1416 #[serde(default)]
1417 pub inventory_movement_count: usize,
1418 #[serde(default)]
1420 pub sales_quote_count: usize,
1421 #[serde(default)]
1422 pub kpi_count: usize,
1423 #[serde(default)]
1424 pub budget_line_count: usize,
1425 #[serde(default)]
1427 pub tax_jurisdiction_count: usize,
1428 #[serde(default)]
1429 pub tax_code_count: usize,
1430 #[serde(default)]
1432 pub esg_emission_count: usize,
1433 #[serde(default)]
1434 pub esg_disclosure_count: usize,
1435 #[serde(default)]
1437 pub ic_matched_pair_count: usize,
1438 #[serde(default)]
1439 pub ic_elimination_count: usize,
1440 #[serde(default)]
1442 pub ic_transaction_count: usize,
1443 #[serde(default)]
1445 pub fa_subledger_count: usize,
1446 #[serde(default)]
1448 pub inventory_subledger_count: usize,
1449 #[serde(default)]
1451 pub treasury_debt_instrument_count: usize,
1452 #[serde(default)]
1454 pub treasury_hedging_instrument_count: usize,
1455 #[serde(default)]
1457 pub project_count: usize,
1458 #[serde(default)]
1460 pub project_change_order_count: usize,
1461 #[serde(default)]
1463 pub tax_provision_count: usize,
1464 #[serde(default)]
1466 pub opening_balance_count: usize,
1467 #[serde(default)]
1469 pub subledger_reconciliation_count: usize,
1470 #[serde(default)]
1472 pub tax_line_count: usize,
1473 #[serde(default)]
1475 pub project_cost_line_count: usize,
1476 #[serde(default)]
1478 pub cash_position_count: usize,
1479 #[serde(default)]
1481 pub cash_forecast_count: usize,
1482 #[serde(default)]
1484 pub cash_pool_count: usize,
1485 #[serde(default)]
1487 pub process_evolution_event_count: usize,
1488 #[serde(default)]
1490 pub organizational_event_count: usize,
1491 #[serde(default)]
1493 pub counterfactual_pair_count: usize,
1494 #[serde(default)]
1496 pub red_flag_count: usize,
1497 #[serde(default)]
1499 pub collusion_ring_count: usize,
1500 #[serde(default)]
1502 pub temporal_version_chain_count: usize,
1503 #[serde(default)]
1505 pub entity_relationship_node_count: usize,
1506 #[serde(default)]
1508 pub entity_relationship_edge_count: usize,
1509 #[serde(default)]
1511 pub cross_process_link_count: usize,
1512 #[serde(default)]
1514 pub disruption_event_count: usize,
1515 #[serde(default)]
1517 pub industry_gl_account_count: usize,
1518 #[serde(default)]
1520 pub period_close_je_count: usize,
1521}
1522
1523pub struct EnhancedOrchestrator {
1525 config: GeneratorConfig,
1526 phase_config: PhaseConfig,
1527 coa: Option<Arc<ChartOfAccounts>>,
1528 master_data: MasterDataSnapshot,
1529 seed: u64,
1530 multi_progress: Option<MultiProgress>,
1531 resource_guard: ResourceGuard,
1533 output_path: Option<PathBuf>,
1535 copula_generators: Vec<CopulaGeneratorSpec>,
1537 country_pack_registry: datasynth_core::CountryPackRegistry,
1539 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1541 template_provider: datasynth_core::templates::SharedTemplateProvider,
1548 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1555}
1556
1557impl EnhancedOrchestrator {
1558 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1560 datasynth_config::validate_config(&config)?;
1561
1562 let seed = config.global.seed.unwrap_or_else(rand::random);
1563
1564 let resource_guard = Self::build_resource_guard(&config, None);
1566
1567 let country_pack_registry = match &config.country_packs {
1569 Some(cp) => {
1570 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1571 .map_err(|e| SynthError::config(e.to_string()))?
1572 }
1573 None => datasynth_core::CountryPackRegistry::builtin_only()
1574 .map_err(|e| SynthError::config(e.to_string()))?,
1575 };
1576
1577 let template_provider = Self::build_template_provider(&config)?;
1581
1582 let temporal_context = Self::build_temporal_context(&config)?;
1586
1587 Ok(Self {
1588 config,
1589 phase_config,
1590 coa: None,
1591 master_data: MasterDataSnapshot::default(),
1592 seed,
1593 multi_progress: None,
1594 resource_guard,
1595 output_path: None,
1596 copula_generators: Vec::new(),
1597 country_pack_registry,
1598 phase_sink: None,
1599 template_provider,
1600 temporal_context,
1601 })
1602 }
1603
1604 fn build_temporal_context(
1610 config: &GeneratorConfig,
1611 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1612 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1613
1614 let tp = &config.temporal_patterns;
1615 if !tp.enabled || !tp.business_days.enabled {
1616 return Ok(None);
1617 }
1618
1619 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1620 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1621 let end_date = start_date + chrono::Months::new(config.global.period_months);
1622
1623 let region_code = tp
1624 .calendars
1625 .regions
1626 .first()
1627 .cloned()
1628 .unwrap_or_else(|| "US".to_string());
1629 let region = parse_region_code(®ion_code);
1630
1631 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1632 }
1633
1634 fn build_template_provider(
1642 config: &GeneratorConfig,
1643 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1644 use datasynth_core::templates::{
1645 loader::{MergeStrategy, TemplateLoader},
1646 DefaultTemplateProvider,
1647 };
1648 use std::sync::Arc;
1649
1650 let provider = match &config.templates.path {
1651 None => DefaultTemplateProvider::new(),
1652 Some(path) => {
1653 let data = if path.is_dir() {
1654 TemplateLoader::load_from_directory(path)
1655 } else {
1656 TemplateLoader::load_from_file(path)
1657 }
1658 .map_err(|e| {
1659 SynthError::config(format!(
1660 "Failed to load templates from {}: {e}",
1661 path.display()
1662 ))
1663 })?;
1664 let strategy = match config.templates.merge_strategy {
1665 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1666 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1667 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1668 MergeStrategy::MergePreferFile
1669 }
1670 };
1671 DefaultTemplateProvider::with_templates(data, strategy)
1672 }
1673 };
1674 Ok(Arc::new(provider))
1675 }
1676
1677 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1679 Self::new(config, PhaseConfig::default())
1680 }
1681
1682 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1684 self.phase_sink = Some(sink);
1685 self
1686 }
1687
1688 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1690 self.phase_sink = Some(sink);
1691 }
1692
1693 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1695 if let Some(ref sink) = self.phase_sink {
1696 for item in items {
1697 if let Ok(value) = serde_json::to_value(item) {
1698 if let Err(e) = sink.emit(phase, type_name, &value) {
1699 warn!(
1700 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1701 );
1702 }
1703 }
1704 }
1705 if let Err(e) = sink.phase_complete(phase) {
1706 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1707 }
1708 }
1709 }
1710
1711 pub fn with_progress(mut self, show: bool) -> Self {
1713 self.phase_config.show_progress = show;
1714 if show {
1715 self.multi_progress = Some(MultiProgress::new());
1716 }
1717 self
1718 }
1719
1720 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1722 let path = path.into();
1723 self.output_path = Some(path.clone());
1724 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1726 self
1727 }
1728
1729 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1731 &self.country_pack_registry
1732 }
1733
1734 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1736 self.country_pack_registry.get_by_str(country)
1737 }
1738
1739 fn primary_country_code(&self) -> &str {
1742 self.config
1743 .companies
1744 .first()
1745 .map(|c| c.country.as_str())
1746 .unwrap_or("US")
1747 }
1748
1749 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1751 self.country_pack_for(self.primary_country_code())
1752 }
1753
1754 fn resolve_coa_framework(&self) -> CoAFramework {
1756 if self.config.accounting_standards.enabled {
1757 match self.config.accounting_standards.framework {
1758 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1759 return CoAFramework::FrenchPcg;
1760 }
1761 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1762 return CoAFramework::GermanSkr04;
1763 }
1764 _ => {}
1765 }
1766 }
1767 let pack = self.primary_pack();
1769 match pack.accounting.framework.as_str() {
1770 "french_gaap" => CoAFramework::FrenchPcg,
1771 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1772 _ => CoAFramework::UsGaap,
1773 }
1774 }
1775
1776 pub fn has_copulas(&self) -> bool {
1781 !self.copula_generators.is_empty()
1782 }
1783
1784 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1790 &self.copula_generators
1791 }
1792
1793 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1797 &mut self.copula_generators
1798 }
1799
1800 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1804 self.copula_generators
1805 .iter_mut()
1806 .find(|c| c.name == copula_name)
1807 .map(|c| c.generator.sample())
1808 }
1809
1810 pub fn from_fingerprint(
1833 fingerprint_path: &std::path::Path,
1834 phase_config: PhaseConfig,
1835 scale: f64,
1836 ) -> SynthResult<Self> {
1837 info!("Loading fingerprint from: {}", fingerprint_path.display());
1838
1839 let reader = FingerprintReader::new();
1841 let fingerprint = reader
1842 .read_from_file(fingerprint_path)
1843 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1844
1845 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1846 }
1847
1848 pub fn from_fingerprint_data(
1855 fingerprint: Fingerprint,
1856 phase_config: PhaseConfig,
1857 scale: f64,
1858 ) -> SynthResult<Self> {
1859 info!(
1860 "Synthesizing config from fingerprint (version: {}, tables: {})",
1861 fingerprint.manifest.version,
1862 fingerprint.schema.tables.len()
1863 );
1864
1865 let seed: u64 = rand::random();
1867 info!("Fingerprint synthesis seed: {}", seed);
1868
1869 let options = SynthesisOptions {
1871 scale,
1872 seed: Some(seed),
1873 preserve_correlations: true,
1874 inject_anomalies: true,
1875 };
1876 let synthesizer = ConfigSynthesizer::with_options(options);
1877
1878 let synthesis_result = synthesizer
1880 .synthesize_full(&fingerprint, seed)
1881 .map_err(|e| {
1882 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1883 })?;
1884
1885 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1887 Self::base_config_for_industry(industry)
1888 } else {
1889 Self::base_config_for_industry("manufacturing")
1890 };
1891
1892 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1894
1895 info!(
1897 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1898 fingerprint.schema.tables.len(),
1899 scale,
1900 synthesis_result.copula_generators.len()
1901 );
1902
1903 if !synthesis_result.copula_generators.is_empty() {
1904 for spec in &synthesis_result.copula_generators {
1905 info!(
1906 " Copula '{}' for table '{}': {} columns",
1907 spec.name,
1908 spec.table,
1909 spec.columns.len()
1910 );
1911 }
1912 }
1913
1914 let mut orchestrator = Self::new(config, phase_config)?;
1916
1917 orchestrator.copula_generators = synthesis_result.copula_generators;
1919
1920 Ok(orchestrator)
1921 }
1922
1923 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1925 use datasynth_config::presets::create_preset;
1926 use datasynth_config::TransactionVolume;
1927 use datasynth_core::models::{CoAComplexity, IndustrySector};
1928
1929 let sector = match industry.to_lowercase().as_str() {
1930 "manufacturing" => IndustrySector::Manufacturing,
1931 "retail" => IndustrySector::Retail,
1932 "financial" | "financial_services" => IndustrySector::FinancialServices,
1933 "healthcare" => IndustrySector::Healthcare,
1934 "technology" | "tech" => IndustrySector::Technology,
1935 _ => IndustrySector::Manufacturing,
1936 };
1937
1938 create_preset(
1940 sector,
1941 1, 12, CoAComplexity::Medium,
1944 TransactionVolume::TenK,
1945 )
1946 }
1947
1948 fn apply_config_patch(
1950 mut config: GeneratorConfig,
1951 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1952 ) -> GeneratorConfig {
1953 use datasynth_fingerprint::synthesis::ConfigValue;
1954
1955 for (key, value) in patch.values() {
1956 match (key.as_str(), value) {
1957 ("transactions.count", ConfigValue::Integer(n)) => {
1960 info!(
1961 "Fingerprint suggests {} transactions (apply via company volumes)",
1962 n
1963 );
1964 }
1965 ("global.period_months", ConfigValue::Integer(n)) => {
1966 config.global.period_months = (*n).clamp(1, 120) as u32;
1967 }
1968 ("global.start_date", ConfigValue::String(s)) => {
1969 config.global.start_date = s.clone();
1970 }
1971 ("global.seed", ConfigValue::Integer(n)) => {
1972 config.global.seed = Some(*n as u64);
1973 }
1974 ("fraud.enabled", ConfigValue::Bool(b)) => {
1975 config.fraud.enabled = *b;
1976 }
1977 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1978 config.fraud.fraud_rate = *f;
1979 }
1980 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1981 config.data_quality.enabled = *b;
1982 }
1983 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1985 config.fraud.enabled = *b;
1986 }
1987 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1988 config.fraud.fraud_rate = *f;
1989 }
1990 _ => {
1991 debug!("Ignoring unknown config patch key: {}", key);
1992 }
1993 }
1994 }
1995
1996 config
1997 }
1998
1999 fn build_resource_guard(
2001 config: &GeneratorConfig,
2002 output_path: Option<PathBuf>,
2003 ) -> ResourceGuard {
2004 let mut builder = ResourceGuardBuilder::new();
2005
2006 if config.global.memory_limit_mb > 0 {
2008 builder = builder.memory_limit(config.global.memory_limit_mb);
2009 }
2010
2011 if let Some(path) = output_path {
2013 builder = builder.output_path(path).min_free_disk(100); }
2015
2016 builder = builder.conservative();
2018
2019 builder.build()
2020 }
2021
2022 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2027 self.resource_guard.check()
2028 }
2029
2030 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2032 let level = self.resource_guard.check()?;
2033
2034 if level != DegradationLevel::Normal {
2035 warn!(
2036 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2037 phase,
2038 level,
2039 self.resource_guard.current_memory_mb(),
2040 self.resource_guard.available_disk_mb()
2041 );
2042 }
2043
2044 Ok(level)
2045 }
2046
2047 fn get_degradation_actions(&self) -> DegradationActions {
2049 self.resource_guard.get_actions()
2050 }
2051
2052 fn check_memory_limit(&self) -> SynthResult<()> {
2054 self.check_resources()?;
2055 Ok(())
2056 }
2057
2058 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2060 info!("Starting enhanced generation workflow");
2061 info!(
2062 "Config: industry={:?}, period_months={}, companies={}",
2063 self.config.global.industry,
2064 self.config.global.period_months,
2065 self.config.companies.len()
2066 );
2067
2068 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2071 datasynth_core::serde_decimal::set_numeric_native(is_native);
2072 struct NumericModeGuard;
2073 impl Drop for NumericModeGuard {
2074 fn drop(&mut self) {
2075 datasynth_core::serde_decimal::set_numeric_native(false);
2076 }
2077 }
2078 let _numeric_guard = if is_native {
2079 Some(NumericModeGuard)
2080 } else {
2081 None
2082 };
2083
2084 let initial_level = self.check_resources_with_log("initial")?;
2086 if initial_level == DegradationLevel::Emergency {
2087 return Err(SynthError::resource(
2088 "Insufficient resources to start generation",
2089 ));
2090 }
2091
2092 let mut stats = EnhancedGenerationStatistics {
2093 companies_count: self.config.companies.len(),
2094 period_months: self.config.global.period_months,
2095 ..Default::default()
2096 };
2097
2098 let coa = self.phase_chart_of_accounts(&mut stats)?;
2100
2101 self.phase_master_data(&mut stats)?;
2103
2104 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2106 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2107 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2108
2109 let (mut document_flows, mut subledger, fa_journal_entries) =
2111 self.phase_document_flows(&mut stats)?;
2112
2113 self.emit_phase_items(
2115 "document_flows",
2116 "PurchaseOrder",
2117 &document_flows.purchase_orders,
2118 );
2119 self.emit_phase_items(
2120 "document_flows",
2121 "GoodsReceipt",
2122 &document_flows.goods_receipts,
2123 );
2124 self.emit_phase_items(
2125 "document_flows",
2126 "VendorInvoice",
2127 &document_flows.vendor_invoices,
2128 );
2129 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2130 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2131
2132 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2134
2135 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2140 .iter()
2141 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2142 .collect();
2143 if !opening_balance_jes.is_empty() {
2144 debug!(
2145 "Prepending {} opening balance JEs to entries",
2146 opening_balance_jes.len()
2147 );
2148 }
2149
2150 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2152
2153 if !opening_balance_jes.is_empty() {
2156 let mut combined = opening_balance_jes;
2157 combined.extend(entries);
2158 entries = combined;
2159 }
2160
2161 if !fa_journal_entries.is_empty() {
2163 debug!(
2164 "Appending {} FA acquisition JEs to main entries",
2165 fa_journal_entries.len()
2166 );
2167 entries.extend(fa_journal_entries);
2168 }
2169
2170 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2172
2173 let actions = self.get_degradation_actions();
2175
2176 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2178
2179 if !sourcing.contracts.is_empty() {
2182 let mut linked_count = 0usize;
2183 let po_vendor_pairs: Vec<(String, String)> = document_flows
2185 .p2p_chains
2186 .iter()
2187 .map(|chain| {
2188 (
2189 chain.purchase_order.vendor_id.clone(),
2190 chain.purchase_order.header.document_id.clone(),
2191 )
2192 })
2193 .collect();
2194
2195 for chain in &mut document_flows.p2p_chains {
2196 if chain.purchase_order.contract_id.is_none() {
2197 if let Some(contract) = sourcing
2198 .contracts
2199 .iter()
2200 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2201 {
2202 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2203 linked_count += 1;
2204 }
2205 }
2206 }
2207
2208 for contract in &mut sourcing.contracts {
2210 let po_ids: Vec<String> = po_vendor_pairs
2211 .iter()
2212 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2213 .map(|(_, po_id)| po_id.clone())
2214 .collect();
2215 if !po_ids.is_empty() {
2216 contract.purchase_order_ids = po_ids;
2217 }
2218 }
2219
2220 if linked_count > 0 {
2221 debug!(
2222 "Linked {} purchase orders to S2C contracts by vendor match",
2223 linked_count
2224 );
2225 }
2226 }
2227
2228 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2230
2231 if !intercompany.seller_journal_entries.is_empty()
2233 || !intercompany.buyer_journal_entries.is_empty()
2234 {
2235 let ic_je_count = intercompany.seller_journal_entries.len()
2236 + intercompany.buyer_journal_entries.len();
2237 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2238 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2239 debug!(
2240 "Appended {} IC journal entries to main entries",
2241 ic_je_count
2242 );
2243 }
2244
2245 if !intercompany.elimination_entries.is_empty() {
2247 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2248 &intercompany.elimination_entries,
2249 );
2250 if !elim_jes.is_empty() {
2251 debug!(
2252 "Appended {} elimination journal entries to main entries",
2253 elim_jes.len()
2254 );
2255 let elim_debit: rust_decimal::Decimal =
2257 elim_jes.iter().map(|je| je.total_debit()).sum();
2258 let elim_credit: rust_decimal::Decimal =
2259 elim_jes.iter().map(|je| je.total_credit()).sum();
2260 let elim_diff = (elim_debit - elim_credit).abs();
2261 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2263 return Err(datasynth_core::error::SynthError::generation(format!(
2264 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2265 elim_debit, elim_credit, elim_diff, tolerance
2266 )));
2267 }
2268 debug!(
2269 "IC elimination balance verified: debits={}, credits={} (diff={})",
2270 elim_debit, elim_credit, elim_diff
2271 );
2272 entries.extend(elim_jes);
2273 }
2274 }
2275
2276 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2278 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2279 document_flows
2280 .customer_invoices
2281 .extend(ic_docs.seller_invoices.iter().cloned());
2282 document_flows
2283 .purchase_orders
2284 .extend(ic_docs.buyer_orders.iter().cloned());
2285 document_flows
2286 .goods_receipts
2287 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2288 document_flows
2289 .vendor_invoices
2290 .extend(ic_docs.buyer_invoices.iter().cloned());
2291 debug!(
2292 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2293 ic_docs.seller_invoices.len(),
2294 ic_docs.buyer_orders.len(),
2295 ic_docs.buyer_goods_receipts.len(),
2296 ic_docs.buyer_invoices.len(),
2297 );
2298 }
2299 }
2300
2301 let hr = self.phase_hr_data(&mut stats)?;
2303
2304 if !hr.payroll_runs.is_empty() {
2306 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2307 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2308 entries.extend(payroll_jes);
2309 }
2310
2311 if !hr.pension_journal_entries.is_empty() {
2313 debug!(
2314 "Generated {} JEs from pension plans",
2315 hr.pension_journal_entries.len()
2316 );
2317 entries.extend(hr.pension_journal_entries.iter().cloned());
2318 }
2319
2320 if !hr.stock_comp_journal_entries.is_empty() {
2322 debug!(
2323 "Generated {} JEs from stock-based compensation",
2324 hr.stock_comp_journal_entries.len()
2325 );
2326 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2327 }
2328
2329 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2331
2332 if !manufacturing_snap.production_orders.is_empty() {
2334 let currency = self
2335 .config
2336 .companies
2337 .first()
2338 .map(|c| c.currency.as_str())
2339 .unwrap_or("USD");
2340 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2341 &manufacturing_snap.production_orders,
2342 &manufacturing_snap.quality_inspections,
2343 currency,
2344 );
2345 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2346 entries.extend(mfg_jes);
2347 }
2348
2349 if !manufacturing_snap.quality_inspections.is_empty() {
2351 let framework = match self.config.accounting_standards.framework {
2352 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2353 _ => "US_GAAP",
2354 };
2355 for company in &self.config.companies {
2356 let company_orders: Vec<_> = manufacturing_snap
2357 .production_orders
2358 .iter()
2359 .filter(|o| o.company_code == company.code)
2360 .cloned()
2361 .collect();
2362 let company_inspections: Vec<_> = manufacturing_snap
2363 .quality_inspections
2364 .iter()
2365 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2366 .cloned()
2367 .collect();
2368 if company_inspections.is_empty() {
2369 continue;
2370 }
2371 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2372 let warranty_result = warranty_gen.generate(
2373 &company.code,
2374 &company_orders,
2375 &company_inspections,
2376 &company.currency,
2377 framework,
2378 );
2379 if !warranty_result.journal_entries.is_empty() {
2380 debug!(
2381 "Generated {} warranty provision JEs for {}",
2382 warranty_result.journal_entries.len(),
2383 company.code
2384 );
2385 entries.extend(warranty_result.journal_entries);
2386 }
2387 }
2388 }
2389
2390 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2392 {
2393 let cogs_currency = self
2394 .config
2395 .companies
2396 .first()
2397 .map(|c| c.currency.as_str())
2398 .unwrap_or("USD");
2399 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2400 &document_flows.deliveries,
2401 &manufacturing_snap.production_orders,
2402 cogs_currency,
2403 );
2404 if !cogs_jes.is_empty() {
2405 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2406 entries.extend(cogs_jes);
2407 }
2408 }
2409
2410 if !manufacturing_snap.inventory_movements.is_empty()
2416 && !subledger.inventory_positions.is_empty()
2417 {
2418 use datasynth_core::models::MovementType as MfgMovementType;
2419 let mut receipt_count = 0usize;
2420 let mut issue_count = 0usize;
2421 for movement in &manufacturing_snap.inventory_movements {
2422 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2424 p.material_id == movement.material_code
2425 && p.company_code == movement.entity_code
2426 }) {
2427 match movement.movement_type {
2428 MfgMovementType::GoodsReceipt => {
2429 pos.add_quantity(
2431 movement.quantity,
2432 movement.value,
2433 movement.movement_date,
2434 );
2435 receipt_count += 1;
2436 }
2437 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2438 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2440 issue_count += 1;
2441 }
2442 _ => {}
2443 }
2444 }
2445 }
2446 debug!(
2447 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2448 manufacturing_snap.inventory_movements.len(),
2449 receipt_count,
2450 issue_count,
2451 );
2452 }
2453
2454 if !entries.is_empty() {
2457 stats.total_entries = entries.len() as u64;
2458 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2459 debug!(
2460 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2461 stats.total_entries, stats.total_line_items
2462 );
2463 }
2464
2465 if self.config.internal_controls.enabled && !entries.is_empty() {
2467 info!("Phase 7b: Applying internal controls to journal entries");
2468 let control_config = ControlGeneratorConfig {
2469 exception_rate: self.config.internal_controls.exception_rate,
2470 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2471 enable_sox_marking: true,
2472 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2473 self.config.internal_controls.sox_materiality_threshold,
2474 )
2475 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2476 ..Default::default()
2477 };
2478 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2479 for entry in &mut entries {
2480 control_gen.apply_controls(entry, &coa);
2481 }
2482 let with_controls = entries
2483 .iter()
2484 .filter(|e| !e.header.control_ids.is_empty())
2485 .count();
2486 info!(
2487 "Applied controls to {} entries ({} with control IDs assigned)",
2488 entries.len(),
2489 with_controls
2490 );
2491 }
2492
2493 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2497 .iter()
2498 .filter(|e| e.header.sod_violation)
2499 .filter_map(|e| {
2500 e.header.sod_conflict_type.map(|ct| {
2501 use datasynth_core::models::{RiskLevel, SodViolation};
2502 let severity = match ct {
2503 datasynth_core::models::SodConflictType::PaymentReleaser
2504 | datasynth_core::models::SodConflictType::RequesterApprover => {
2505 RiskLevel::Critical
2506 }
2507 datasynth_core::models::SodConflictType::PreparerApprover
2508 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2509 | datasynth_core::models::SodConflictType::JournalEntryPoster
2510 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2511 RiskLevel::High
2512 }
2513 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2514 RiskLevel::Medium
2515 }
2516 };
2517 let action = format!(
2518 "SoD conflict {:?} on entry {} ({})",
2519 ct, e.header.document_id, e.header.company_code
2520 );
2521 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2522 })
2523 })
2524 .collect();
2525 if !sod_violations.is_empty() {
2526 info!(
2527 "Phase 7c: Extracted {} SoD violations from {} entries",
2528 sod_violations.len(),
2529 entries.len()
2530 );
2531 }
2532
2533 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2535
2536 {
2544 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2545 if self.config.fraud.enabled && doc_rate > 0.0 {
2546 use datasynth_core::fraud_propagation::{
2547 inject_document_fraud, propagate_documents_to_entries,
2548 };
2549 use datasynth_core::utils::weighted_select;
2550 use datasynth_core::FraudType;
2551 use rand_chacha::rand_core::SeedableRng;
2552
2553 let dist = &self.config.fraud.fraud_type_distribution;
2554 let fraud_type_weights: [(FraudType, f64); 8] = [
2555 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2556 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2557 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2558 (
2559 FraudType::ImproperCapitalization,
2560 dist.expense_capitalization,
2561 ),
2562 (FraudType::SplitTransaction, dist.split_transaction),
2563 (FraudType::TimingAnomaly, dist.timing_anomaly),
2564 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2565 (FraudType::DuplicatePayment, dist.duplicate_payment),
2566 ];
2567 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2568 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2569 if weights_sum <= 0.0 {
2570 FraudType::FictitiousEntry
2571 } else {
2572 *weighted_select(rng, &fraud_type_weights)
2573 }
2574 };
2575
2576 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2577 let mut doc_tagged = 0usize;
2578 macro_rules! inject_into {
2579 ($collection:expr) => {{
2580 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2581 $collection.iter_mut().map(|d| &mut d.header).collect();
2582 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2583 }};
2584 }
2585 inject_into!(document_flows.purchase_orders);
2586 inject_into!(document_flows.goods_receipts);
2587 inject_into!(document_flows.vendor_invoices);
2588 inject_into!(document_flows.payments);
2589 inject_into!(document_flows.sales_orders);
2590 inject_into!(document_flows.deliveries);
2591 inject_into!(document_flows.customer_invoices);
2592 if doc_tagged > 0 {
2593 info!(
2594 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2595 );
2596 }
2597
2598 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2599 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2600 Vec::new();
2601 headers.extend(
2602 document_flows
2603 .purchase_orders
2604 .iter()
2605 .map(|d| d.header.clone()),
2606 );
2607 headers.extend(
2608 document_flows
2609 .goods_receipts
2610 .iter()
2611 .map(|d| d.header.clone()),
2612 );
2613 headers.extend(
2614 document_flows
2615 .vendor_invoices
2616 .iter()
2617 .map(|d| d.header.clone()),
2618 );
2619 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2620 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2621 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2622 headers.extend(
2623 document_flows
2624 .customer_invoices
2625 .iter()
2626 .map(|d| d.header.clone()),
2627 );
2628 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2629 if propagated > 0 {
2630 info!(
2631 "Propagated document-level fraud to {propagated} derived journal entries"
2632 );
2633 }
2634 }
2635 }
2636 }
2637
2638 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2640
2641 {
2659 use datasynth_core::fraud_bias::{
2660 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2661 };
2662 use rand_chacha::rand_core::SeedableRng;
2663 let cfg = FraudBehavioralBiasConfig::default();
2664 if cfg.enabled {
2665 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2666 let mut swept = 0usize;
2667 for entry in entries.iter_mut() {
2668 if entry.header.is_fraud && !entry.header.is_anomaly {
2669 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2670 swept += 1;
2671 }
2672 }
2673 if swept > 0 {
2674 info!(
2675 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2676 (doc-propagated + je_generator intrinsic fraud)"
2677 );
2678 }
2679 }
2680 }
2681
2682 self.emit_phase_items(
2684 "anomaly_injection",
2685 "LabeledAnomaly",
2686 &anomaly_labels.labels,
2687 );
2688
2689 if self.config.fraud.propagate_to_document {
2697 use std::collections::HashMap;
2698 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2711 for je in &entries {
2712 if je.header.is_fraud {
2713 if let Some(ref fraud_type) = je.header.fraud_type {
2714 if let Some(ref reference) = je.header.reference {
2715 fraud_map.insert(reference.clone(), *fraud_type);
2717 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2720 if !bare.is_empty() {
2721 fraud_map.insert(bare.to_string(), *fraud_type);
2722 }
2723 }
2724 }
2725 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2727 }
2728 }
2729 }
2730 if !fraud_map.is_empty() {
2731 let mut propagated = 0usize;
2732 macro_rules! propagate_to {
2734 ($collection:expr) => {
2735 for doc in &mut $collection {
2736 if doc.header.propagate_fraud(&fraud_map) {
2737 propagated += 1;
2738 }
2739 }
2740 };
2741 }
2742 propagate_to!(document_flows.purchase_orders);
2743 propagate_to!(document_flows.goods_receipts);
2744 propagate_to!(document_flows.vendor_invoices);
2745 propagate_to!(document_flows.payments);
2746 propagate_to!(document_flows.sales_orders);
2747 propagate_to!(document_flows.deliveries);
2748 propagate_to!(document_flows.customer_invoices);
2749 if propagated > 0 {
2750 info!(
2751 "Propagated fraud labels to {} document flow records",
2752 propagated
2753 );
2754 }
2755 }
2756 }
2757
2758 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2760
2761 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2763
2764 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2766
2767 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2769
2770 let balance_validation = self.phase_balance_validation(&entries)?;
2772
2773 let subledger_reconciliation =
2775 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2776
2777 let (data_quality_stats, quality_issues) =
2779 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2780
2781 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2783
2784 {
2786 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2791 for je in &entries {
2792 if je.header.is_fraud || je.header.is_anomaly {
2793 continue;
2794 }
2795 let diff = (je.total_debit() - je.total_credit()).abs();
2796 if diff > tolerance {
2797 unbalanced_clean += 1;
2798 if unbalanced_clean <= 3 {
2799 warn!(
2800 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2801 je.header.document_id,
2802 je.total_debit(),
2803 je.total_credit(),
2804 diff
2805 );
2806 }
2807 }
2808 }
2809 if unbalanced_clean > 0 {
2810 return Err(datasynth_core::error::SynthError::generation(format!(
2811 "{} non-anomaly JEs are unbalanced (debits != credits). \
2812 First few logged above. Tolerance={}",
2813 unbalanced_clean, tolerance
2814 )));
2815 }
2816 debug!(
2817 "Phase 10c: All {} non-anomaly JEs individually balanced",
2818 entries
2819 .iter()
2820 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2821 .count()
2822 );
2823
2824 let company_codes: Vec<String> = self
2826 .config
2827 .companies
2828 .iter()
2829 .map(|c| c.code.clone())
2830 .collect();
2831 for company_code in &company_codes {
2832 let mut assets = rust_decimal::Decimal::ZERO;
2833 let mut liab_equity = rust_decimal::Decimal::ZERO;
2834
2835 for entry in &entries {
2836 if entry.header.company_code != *company_code {
2837 continue;
2838 }
2839 for line in &entry.lines {
2840 let acct = &line.gl_account;
2841 let net = line.debit_amount - line.credit_amount;
2842 if acct.starts_with('1') {
2844 assets += net;
2845 }
2846 else if acct.starts_with('2') || acct.starts_with('3') {
2848 liab_equity -= net; }
2850 }
2853 }
2854
2855 let bs_diff = (assets - liab_equity).abs();
2856 if bs_diff > tolerance {
2857 warn!(
2858 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2859 revenue/expense closing entries may not fully offset",
2860 company_code, assets, liab_equity, bs_diff
2861 );
2862 } else {
2866 debug!(
2867 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2868 company_code, assets, liab_equity, bs_diff
2869 );
2870 }
2871 }
2872
2873 info!("Phase 10c: All generation-time accounting assertions passed");
2874 }
2875
2876 let audit = self.phase_audit_data(&entries, &mut stats)?;
2878
2879 let mut banking = self.phase_banking_data(&mut stats)?;
2881
2882 if self.phase_config.generate_banking
2887 && !document_flows.payments.is_empty()
2888 && !banking.accounts.is_empty()
2889 {
2890 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2891 if bridge_rate > 0.0 {
2892 let mut bridge =
2893 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2894 self.seed,
2895 );
2896 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2897 &document_flows.payments,
2898 &banking.customers,
2899 &banking.accounts,
2900 bridge_rate,
2901 );
2902 info!(
2903 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2904 bridge_stats.bridged_count,
2905 bridge_stats.transactions_emitted,
2906 bridge_stats.fraud_propagated,
2907 );
2908 let bridged_count = bridged_txns.len();
2909 banking.transactions.extend(bridged_txns);
2910
2911 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2914 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2915 &mut banking.transactions,
2916 );
2917 }
2918
2919 banking.suspicious_count = banking
2921 .transactions
2922 .iter()
2923 .filter(|t| t.is_suspicious)
2924 .count();
2925 stats.banking_transaction_count = banking.transactions.len();
2926 stats.banking_suspicious_count = banking.suspicious_count;
2927 }
2928 }
2929
2930 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2932
2933 self.phase_llm_enrichment(&mut stats);
2935
2936 self.phase_diffusion_enhancement(&mut stats);
2938
2939 self.phase_causal_overlay(&mut stats);
2941
2942 let mut financial_reporting = self.phase_financial_reporting(
2946 &document_flows,
2947 &entries,
2948 &coa,
2949 &hr,
2950 &audit,
2951 &mut stats,
2952 )?;
2953
2954 {
2956 use datasynth_core::models::StatementType;
2957 for stmt in &financial_reporting.consolidated_statements {
2958 if stmt.statement_type == StatementType::BalanceSheet {
2959 let total_assets: rust_decimal::Decimal = stmt
2960 .line_items
2961 .iter()
2962 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2963 .map(|li| li.amount)
2964 .sum();
2965 let total_le: rust_decimal::Decimal = stmt
2966 .line_items
2967 .iter()
2968 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2969 .map(|li| li.amount)
2970 .sum();
2971 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2972 warn!(
2973 "BS equation imbalance: assets={}, L+E={}",
2974 total_assets, total_le
2975 );
2976 }
2977 }
2978 }
2979 }
2980
2981 let accounting_standards =
2983 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2984
2985 if !accounting_standards.ecl_journal_entries.is_empty() {
2987 debug!(
2988 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2989 accounting_standards.ecl_journal_entries.len()
2990 );
2991 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2992 }
2993
2994 if !accounting_standards.provision_journal_entries.is_empty() {
2996 debug!(
2997 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2998 accounting_standards.provision_journal_entries.len()
2999 );
3000 entries.extend(
3001 accounting_standards
3002 .provision_journal_entries
3003 .iter()
3004 .cloned(),
3005 );
3006 }
3007
3008 let mut ocpm = self.phase_ocpm_events(
3010 &document_flows,
3011 &sourcing,
3012 &hr,
3013 &manufacturing_snap,
3014 &banking,
3015 &audit,
3016 &financial_reporting,
3017 &mut stats,
3018 )?;
3019
3020 if let Some(ref event_log) = ocpm.event_log {
3022 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3023 }
3024
3025 if let Some(ref event_log) = ocpm.event_log {
3027 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3029 std::collections::HashMap::new();
3030 for (idx, event) in event_log.events.iter().enumerate() {
3031 if let Some(ref doc_ref) = event.document_ref {
3032 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3033 }
3034 }
3035
3036 if !doc_index.is_empty() {
3037 let mut annotated = 0usize;
3038 for entry in &mut entries {
3039 let doc_id_str = entry.header.document_id.to_string();
3040 let mut matched_indices: Vec<usize> = Vec::new();
3042 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3043 matched_indices.extend(indices);
3044 }
3045 if let Some(ref reference) = entry.header.reference {
3046 let bare_ref = reference
3047 .find(':')
3048 .map(|i| &reference[i + 1..])
3049 .unwrap_or(reference.as_str());
3050 if let Some(indices) = doc_index.get(bare_ref) {
3051 for &idx in indices {
3052 if !matched_indices.contains(&idx) {
3053 matched_indices.push(idx);
3054 }
3055 }
3056 }
3057 }
3058 if !matched_indices.is_empty() {
3060 for &idx in &matched_indices {
3061 let event = &event_log.events[idx];
3062 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3063 entry.header.ocpm_event_ids.push(event.event_id);
3064 }
3065 for obj_ref in &event.object_refs {
3066 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3067 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3068 }
3069 }
3070 if entry.header.ocpm_case_id.is_none() {
3071 entry.header.ocpm_case_id = event.case_id;
3072 }
3073 }
3074 annotated += 1;
3075 }
3076 }
3077 debug!(
3078 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3079 annotated
3080 );
3081 }
3082 }
3083
3084 if let Some(ref mut event_log) = ocpm.event_log {
3088 let synthesized =
3089 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3090 if synthesized > 0 {
3091 info!(
3092 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3093 );
3094 }
3095
3096 let anomaly_events =
3101 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3102 if anomaly_events > 0 {
3103 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3104 }
3105
3106 let p2p_cfg = &self.config.ocpm.p2p_process;
3111 let any_imperfection = p2p_cfg.rework_probability > 0.0
3112 || p2p_cfg.skip_step_probability > 0.0
3113 || p2p_cfg.out_of_order_probability > 0.0;
3114 if any_imperfection {
3115 use rand_chacha::rand_core::SeedableRng;
3116 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3117 rework_rate: p2p_cfg.rework_probability,
3118 skip_rate: p2p_cfg.skip_step_probability,
3119 out_of_order_rate: p2p_cfg.out_of_order_probability,
3120 };
3121 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3122 let stats =
3123 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3124 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3125 info!(
3126 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3127 stats.rework, stats.skipped, stats.out_of_order
3128 );
3129 }
3130 }
3131 }
3132
3133 let sales_kpi_budgets =
3135 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3136
3137 let treasury =
3141 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3142
3143 if !treasury.journal_entries.is_empty() {
3145 debug!(
3146 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3147 treasury.journal_entries.len()
3148 );
3149 entries.extend(treasury.journal_entries.iter().cloned());
3150 }
3151
3152 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3154
3155 if !tax.tax_posting_journal_entries.is_empty() {
3157 debug!(
3158 "Merging {} tax posting JEs into GL",
3159 tax.tax_posting_journal_entries.len()
3160 );
3161 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3162 }
3163
3164 {
3182 use datasynth_core::fraud_bias::{
3183 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3184 };
3185 use rand_chacha::rand_core::SeedableRng;
3186 let cfg = FraudBehavioralBiasConfig::default();
3187 if cfg.enabled {
3188 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3189 let mut swept = 0usize;
3190 for entry in entries.iter_mut() {
3191 if entry.header.is_fraud && !entry.header.is_anomaly {
3192 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3193 swept += 1;
3194 }
3195 }
3196 if swept > 0 {
3197 info!(
3198 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3199 non-anomaly fraud entries (covers late-added JEs from \
3200 ECL / provisions / treasury / tax / period-close)"
3201 );
3202 }
3203 }
3204 }
3205
3206 {
3210 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3211
3212 let framework_str = {
3213 use datasynth_config::schema::AccountingFrameworkConfig;
3214 match self
3215 .config
3216 .accounting_standards
3217 .framework
3218 .unwrap_or_default()
3219 {
3220 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3221 "IFRS"
3222 }
3223 _ => "US_GAAP",
3224 }
3225 };
3226
3227 let depreciation_total: rust_decimal::Decimal = entries
3229 .iter()
3230 .filter(|je| je.header.document_type == "CL")
3231 .flat_map(|je| je.lines.iter())
3232 .filter(|l| l.gl_account.starts_with("6000"))
3233 .map(|l| l.debit_amount)
3234 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3235
3236 let interest_paid: rust_decimal::Decimal = entries
3238 .iter()
3239 .flat_map(|je| je.lines.iter())
3240 .filter(|l| l.gl_account.starts_with("7100"))
3241 .map(|l| l.debit_amount)
3242 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3243
3244 let tax_paid: rust_decimal::Decimal = entries
3246 .iter()
3247 .flat_map(|je| je.lines.iter())
3248 .filter(|l| l.gl_account.starts_with("8000"))
3249 .map(|l| l.debit_amount)
3250 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3251
3252 let capex: rust_decimal::Decimal = entries
3254 .iter()
3255 .flat_map(|je| je.lines.iter())
3256 .filter(|l| l.gl_account.starts_with("1500"))
3257 .map(|l| l.debit_amount)
3258 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3259
3260 let dividends_paid: rust_decimal::Decimal = entries
3262 .iter()
3263 .flat_map(|je| je.lines.iter())
3264 .filter(|l| l.gl_account == "2170")
3265 .map(|l| l.debit_amount)
3266 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3267
3268 let cf_data = CashFlowSourceData {
3269 depreciation_total,
3270 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3272 delta_ap: rust_decimal::Decimal::ZERO,
3273 delta_inventory: rust_decimal::Decimal::ZERO,
3274 capex,
3275 debt_issuance: rust_decimal::Decimal::ZERO,
3276 debt_repayment: rust_decimal::Decimal::ZERO,
3277 interest_paid,
3278 tax_paid,
3279 dividends_paid,
3280 framework: framework_str.to_string(),
3281 };
3282
3283 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3284 if !enhanced_cf_items.is_empty() {
3285 use datasynth_core::models::StatementType;
3287 let merge_count = enhanced_cf_items.len();
3288 for stmt in financial_reporting
3289 .financial_statements
3290 .iter_mut()
3291 .chain(financial_reporting.consolidated_statements.iter_mut())
3292 .chain(
3293 financial_reporting
3294 .standalone_statements
3295 .values_mut()
3296 .flat_map(|v| v.iter_mut()),
3297 )
3298 {
3299 if stmt.statement_type == StatementType::CashFlowStatement {
3300 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3301 }
3302 }
3303 info!(
3304 "Enhanced cash flow: {} supplementary items merged into CF statements",
3305 merge_count
3306 );
3307 }
3308 }
3309
3310 self.generate_notes_to_financial_statements(
3313 &mut financial_reporting,
3314 &accounting_standards,
3315 &tax,
3316 &hr,
3317 &audit,
3318 &treasury,
3319 );
3320
3321 if self.config.companies.len() >= 2 && !entries.is_empty() {
3325 let companies: Vec<(String, String)> = self
3326 .config
3327 .companies
3328 .iter()
3329 .map(|c| (c.code.clone(), c.name.clone()))
3330 .collect();
3331 let ic_elim: rust_decimal::Decimal =
3332 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3333 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3334 .unwrap_or(NaiveDate::MIN);
3335 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3336 let period_label = format!(
3337 "{}-{:02}",
3338 end_date.year(),
3339 (end_date - chrono::Days::new(1)).month()
3340 );
3341
3342 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3343 let (je_segments, je_recon) =
3344 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3345 if !je_segments.is_empty() {
3346 info!(
3347 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3348 je_segments.len(),
3349 ic_elim,
3350 );
3351 if financial_reporting.segment_reports.is_empty() {
3353 financial_reporting.segment_reports = je_segments;
3354 financial_reporting.segment_reconciliations = vec![je_recon];
3355 } else {
3356 financial_reporting.segment_reports.extend(je_segments);
3357 financial_reporting.segment_reconciliations.push(je_recon);
3358 }
3359 }
3360 }
3361
3362 let esg_snap =
3364 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3365
3366 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3368
3369 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3371
3372 let disruption_events = self.phase_disruption_events(&mut stats)?;
3374
3375 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3377
3378 let (entity_relationship_graph, cross_process_links) =
3380 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3381
3382 let industry_output = self.phase_industry_data(&mut stats);
3384
3385 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3387
3388 if self.config.diffusion.enabled
3406 && (self.config.diffusion.backend == "neural"
3407 || self.config.diffusion.backend == "hybrid")
3408 {
3409 let neural = &self.config.diffusion.neural;
3410 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3411 stats.neural_hybrid_weight = Some(weight);
3412 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3413 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3414 warn!(
3415 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3416 the neural/hybrid training path is not yet shipped. Config \
3417 is captured in stats (weight={weight:.2}, strategy={}, \
3418 columns={}) but no neural training runs. Statistical \
3419 diffusion (backend='statistical') continues to work.",
3420 self.config.diffusion.backend,
3421 neural.hybrid_strategy,
3422 neural.neural_columns.len(),
3423 );
3424 }
3425
3426 self.phase_hypergraph_export(
3428 &coa,
3429 &entries,
3430 &document_flows,
3431 &sourcing,
3432 &hr,
3433 &manufacturing_snap,
3434 &banking,
3435 &audit,
3436 &financial_reporting,
3437 &ocpm,
3438 &compliance_regulations,
3439 &mut stats,
3440 )?;
3441
3442 if self.phase_config.generate_graph_export {
3445 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3446 }
3447
3448 if self.config.streaming.enabled {
3450 info!("Note: streaming config is enabled but batch mode does not use it");
3451 }
3452 if self.config.vendor_network.enabled {
3453 debug!("Vendor network config available; relationship graph generation is partial");
3454 }
3455 if self.config.customer_segmentation.enabled {
3456 debug!("Customer segmentation config available; segment-aware generation is partial");
3457 }
3458
3459 let resource_stats = self.resource_guard.stats();
3461 info!(
3462 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3463 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3464 resource_stats.disk.estimated_bytes_written,
3465 resource_stats.degradation_level
3466 );
3467
3468 if let Some(ref sink) = self.phase_sink {
3470 if let Err(e) = sink.flush() {
3471 warn!("Stream sink flush failed: {e}");
3472 }
3473 }
3474
3475 let lineage = self.build_lineage_graph();
3477
3478 let gate_result = if self.config.quality_gates.enabled {
3480 let profile_name = &self.config.quality_gates.profile;
3481 match datasynth_eval::gates::get_profile(profile_name) {
3482 Some(profile) => {
3483 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3485
3486 if balance_validation.validated {
3488 eval.coherence.balance =
3489 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3490 equation_balanced: balance_validation.is_balanced,
3491 max_imbalance: (balance_validation.total_debits
3492 - balance_validation.total_credits)
3493 .abs(),
3494 periods_evaluated: 1,
3495 periods_imbalanced: if balance_validation.is_balanced {
3496 0
3497 } else {
3498 1
3499 },
3500 period_results: Vec::new(),
3501 companies_evaluated: self.config.companies.len(),
3502 });
3503 }
3504
3505 eval.coherence.passes = balance_validation.is_balanced;
3507 if !balance_validation.is_balanced {
3508 eval.coherence
3509 .failures
3510 .push("Balance sheet equation not satisfied".to_string());
3511 }
3512
3513 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3515 eval.statistical.passes = !entries.is_empty();
3516
3517 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3520
3521 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3522 info!(
3523 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3524 profile_name, result.gates_passed, result.gates_total, result.summary
3525 );
3526 Some(result)
3527 }
3528 None => {
3529 warn!(
3530 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3531 profile_name
3532 );
3533 None
3534 }
3535 }
3536 } else {
3537 None
3538 };
3539
3540 let internal_controls = if self.config.internal_controls.enabled {
3542 InternalControl::standard_controls()
3543 } else {
3544 Vec::new()
3545 };
3546
3547 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3551
3552 let statistical_validation = self.phase_statistical_validation(&entries)?;
3557
3558 Ok(EnhancedGenerationResult {
3559 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3560 master_data: std::mem::take(&mut self.master_data),
3561 document_flows,
3562 subledger,
3563 ocpm,
3564 audit,
3565 banking,
3566 graph_export,
3567 sourcing,
3568 financial_reporting,
3569 hr,
3570 accounting_standards,
3571 manufacturing: manufacturing_snap,
3572 sales_kpi_budgets,
3573 tax,
3574 esg: esg_snap,
3575 treasury,
3576 project_accounting,
3577 process_evolution,
3578 organizational_events,
3579 disruption_events,
3580 intercompany,
3581 journal_entries: entries,
3582 anomaly_labels,
3583 balance_validation,
3584 data_quality_stats,
3585 quality_issues,
3586 statistics: stats,
3587 lineage: Some(lineage),
3588 gate_result,
3589 internal_controls,
3590 sod_violations,
3591 opening_balances,
3592 subledger_reconciliation,
3593 counterfactual_pairs,
3594 red_flags,
3595 collusion_rings,
3596 temporal_vendor_chains,
3597 entity_relationship_graph,
3598 cross_process_links,
3599 industry_output,
3600 compliance_regulations,
3601 analytics_metadata,
3602 statistical_validation,
3603 })
3604 }
3605
3606 fn phase_chart_of_accounts(
3612 &mut self,
3613 stats: &mut EnhancedGenerationStatistics,
3614 ) -> SynthResult<Arc<ChartOfAccounts>> {
3615 info!("Phase 1: Generating Chart of Accounts");
3616 let coa = self.generate_coa()?;
3617 stats.accounts_count = coa.account_count();
3618 info!(
3619 "Chart of Accounts generated: {} accounts",
3620 stats.accounts_count
3621 );
3622 self.check_resources_with_log("post-coa")?;
3623 Ok(coa)
3624 }
3625
3626 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3628 if self.phase_config.generate_master_data {
3629 info!("Phase 2: Generating Master Data");
3630 self.generate_master_data()?;
3631 stats.vendor_count = self.master_data.vendors.len();
3632 stats.customer_count = self.master_data.customers.len();
3633 stats.material_count = self.master_data.materials.len();
3634 stats.asset_count = self.master_data.assets.len();
3635 stats.employee_count = self.master_data.employees.len();
3636 info!(
3637 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3638 stats.vendor_count, stats.customer_count, stats.material_count,
3639 stats.asset_count, stats.employee_count
3640 );
3641 self.check_resources_with_log("post-master-data")?;
3642 } else {
3643 debug!("Phase 2: Skipped (master data generation disabled)");
3644 }
3645 Ok(())
3646 }
3647
3648 fn phase_document_flows(
3650 &mut self,
3651 stats: &mut EnhancedGenerationStatistics,
3652 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3653 let mut document_flows = DocumentFlowSnapshot::default();
3654 let mut subledger = SubledgerSnapshot::default();
3655 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3658
3659 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3660 info!("Phase 3: Generating Document Flows");
3661 self.generate_document_flows(&mut document_flows)?;
3662 stats.p2p_chain_count = document_flows.p2p_chains.len();
3663 stats.o2c_chain_count = document_flows.o2c_chains.len();
3664 info!(
3665 "Document flows generated: {} P2P chains, {} O2C chains",
3666 stats.p2p_chain_count, stats.o2c_chain_count
3667 );
3668
3669 debug!("Phase 3b: Linking document flows to subledgers");
3671 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3672 stats.ap_invoice_count = subledger.ap_invoices.len();
3673 stats.ar_invoice_count = subledger.ar_invoices.len();
3674 debug!(
3675 "Subledgers linked: {} AP invoices, {} AR invoices",
3676 stats.ap_invoice_count, stats.ar_invoice_count
3677 );
3678
3679 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3684 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3685 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3686 debug!("Payment settlements applied to AP and AR subledgers");
3687
3688 if let Ok(start_date) =
3691 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3692 {
3693 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3694 - chrono::Days::new(1);
3695 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3696 for company in &self.config.companies {
3703 let ar_report = ARAgingReport::from_invoices(
3704 company.code.clone(),
3705 &subledger.ar_invoices,
3706 as_of_date,
3707 );
3708 subledger.ar_aging_reports.push(ar_report);
3709
3710 let ap_report = APAgingReport::from_invoices(
3711 company.code.clone(),
3712 &subledger.ap_invoices,
3713 as_of_date,
3714 );
3715 subledger.ap_aging_reports.push(ap_report);
3716 }
3717 debug!(
3718 "AR/AP aging reports built: {} AR, {} AP",
3719 subledger.ar_aging_reports.len(),
3720 subledger.ap_aging_reports.len()
3721 );
3722
3723 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3725 {
3726 use datasynth_generators::DunningGenerator;
3727 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3728 for company in &self.config.companies {
3729 let currency = company.currency.as_str();
3730 let mut company_invoices: Vec<
3733 datasynth_core::models::subledger::ar::ARInvoice,
3734 > = subledger
3735 .ar_invoices
3736 .iter()
3737 .filter(|inv| inv.company_code == company.code)
3738 .cloned()
3739 .collect();
3740
3741 if company_invoices.is_empty() {
3742 continue;
3743 }
3744
3745 let result = dunning_gen.execute_dunning_run(
3746 &company.code,
3747 as_of_date,
3748 &mut company_invoices,
3749 currency,
3750 );
3751
3752 for updated in &company_invoices {
3754 if let Some(orig) = subledger
3755 .ar_invoices
3756 .iter_mut()
3757 .find(|i| i.invoice_number == updated.invoice_number)
3758 {
3759 orig.dunning_info = updated.dunning_info.clone();
3760 }
3761 }
3762
3763 subledger.dunning_runs.push(result.dunning_run);
3764 subledger.dunning_letters.extend(result.letters);
3765 dunning_journal_entries.extend(result.journal_entries);
3767 }
3768 debug!(
3769 "Dunning runs complete: {} runs, {} letters",
3770 subledger.dunning_runs.len(),
3771 subledger.dunning_letters.len()
3772 );
3773 }
3774 }
3775
3776 self.check_resources_with_log("post-document-flows")?;
3777 } else {
3778 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3779 }
3780
3781 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3783 if !self.master_data.assets.is_empty() {
3784 debug!("Generating FA subledger records");
3785 let company_code = self
3786 .config
3787 .companies
3788 .first()
3789 .map(|c| c.code.as_str())
3790 .unwrap_or("1000");
3791 let currency = self
3792 .config
3793 .companies
3794 .first()
3795 .map(|c| c.currency.as_str())
3796 .unwrap_or("USD");
3797
3798 let mut fa_gen = datasynth_generators::FAGenerator::new(
3799 datasynth_generators::FAGeneratorConfig::default(),
3800 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3801 );
3802
3803 for asset in &self.master_data.assets {
3804 let (record, je) = fa_gen.generate_asset_acquisition(
3805 company_code,
3806 &format!("{:?}", asset.asset_class),
3807 &asset.description,
3808 asset.acquisition_date,
3809 currency,
3810 asset.cost_center.as_deref(),
3811 );
3812 subledger.fa_records.push(record);
3813 fa_journal_entries.push(je);
3814 }
3815
3816 stats.fa_subledger_count = subledger.fa_records.len();
3817 debug!(
3818 "FA subledger records generated: {} (with {} acquisition JEs)",
3819 stats.fa_subledger_count,
3820 fa_journal_entries.len()
3821 );
3822 }
3823
3824 if !self.master_data.materials.is_empty() {
3826 debug!("Generating Inventory subledger records");
3827 let first_company = self.config.companies.first();
3828 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3829 let inv_currency = first_company
3830 .map(|c| c.currency.clone())
3831 .unwrap_or_else(|| "USD".to_string());
3832
3833 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3834 datasynth_generators::InventoryGeneratorConfig::default(),
3835 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3836 inv_currency.clone(),
3837 );
3838
3839 for (i, material) in self.master_data.materials.iter().enumerate() {
3840 let plant = format!("PLANT{:02}", (i % 3) + 1);
3841 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3842 let initial_qty = rust_decimal::Decimal::from(
3843 material
3844 .safety_stock
3845 .to_string()
3846 .parse::<i64>()
3847 .unwrap_or(100),
3848 );
3849
3850 let position = inv_gen.generate_position(
3851 company_code,
3852 &plant,
3853 &storage_loc,
3854 &material.material_id,
3855 &material.description,
3856 initial_qty,
3857 Some(material.standard_cost),
3858 &inv_currency,
3859 );
3860 subledger.inventory_positions.push(position);
3861 }
3862
3863 stats.inventory_subledger_count = subledger.inventory_positions.len();
3864 debug!(
3865 "Inventory subledger records generated: {}",
3866 stats.inventory_subledger_count
3867 );
3868 }
3869
3870 if !subledger.fa_records.is_empty() {
3872 if let Ok(start_date) =
3873 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3874 {
3875 let company_code = self
3876 .config
3877 .companies
3878 .first()
3879 .map(|c| c.code.as_str())
3880 .unwrap_or("1000");
3881 let fiscal_year = start_date.year();
3882 let start_period = start_date.month();
3883 let end_period =
3884 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3885
3886 let depr_cfg = FaDepreciationScheduleConfig {
3887 fiscal_year,
3888 start_period,
3889 end_period,
3890 seed_offset: 800,
3891 };
3892 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3893 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3894 let run_count = runs.len();
3895 subledger.depreciation_runs = runs;
3896 debug!(
3897 "Depreciation runs generated: {} runs for {} periods",
3898 run_count, self.config.global.period_months
3899 );
3900 }
3901 }
3902
3903 if !subledger.inventory_positions.is_empty() {
3905 if let Ok(start_date) =
3906 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3907 {
3908 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3909 - chrono::Days::new(1);
3910
3911 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3912 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3913
3914 for company in &self.config.companies {
3915 let result = inv_val_gen.generate(
3916 &company.code,
3917 &subledger.inventory_positions,
3918 as_of_date,
3919 );
3920 subledger.inventory_valuations.push(result);
3921 }
3922 debug!(
3923 "Inventory valuations generated: {} company reports",
3924 subledger.inventory_valuations.len()
3925 );
3926 }
3927 }
3928
3929 Ok((document_flows, subledger, fa_journal_entries))
3930 }
3931
3932 #[allow(clippy::too_many_arguments)]
3934 fn phase_ocpm_events(
3935 &mut self,
3936 document_flows: &DocumentFlowSnapshot,
3937 sourcing: &SourcingSnapshot,
3938 hr: &HrSnapshot,
3939 manufacturing: &ManufacturingSnapshot,
3940 banking: &BankingSnapshot,
3941 audit: &AuditSnapshot,
3942 financial_reporting: &FinancialReportingSnapshot,
3943 stats: &mut EnhancedGenerationStatistics,
3944 ) -> SynthResult<OcpmSnapshot> {
3945 let degradation = self.check_resources()?;
3946 if degradation >= DegradationLevel::Reduced {
3947 debug!(
3948 "Phase skipped due to resource pressure (degradation: {:?})",
3949 degradation
3950 );
3951 return Ok(OcpmSnapshot::default());
3952 }
3953 if self.phase_config.generate_ocpm_events {
3954 info!("Phase 3c: Generating OCPM Events");
3955 let ocpm_snapshot = self.generate_ocpm_events(
3956 document_flows,
3957 sourcing,
3958 hr,
3959 manufacturing,
3960 banking,
3961 audit,
3962 financial_reporting,
3963 )?;
3964 stats.ocpm_event_count = ocpm_snapshot.event_count;
3965 stats.ocpm_object_count = ocpm_snapshot.object_count;
3966 stats.ocpm_case_count = ocpm_snapshot.case_count;
3967 info!(
3968 "OCPM events generated: {} events, {} objects, {} cases",
3969 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3970 );
3971 self.check_resources_with_log("post-ocpm")?;
3972 Ok(ocpm_snapshot)
3973 } else {
3974 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3975 Ok(OcpmSnapshot::default())
3976 }
3977 }
3978
3979 fn phase_journal_entries(
3981 &mut self,
3982 coa: &Arc<ChartOfAccounts>,
3983 document_flows: &DocumentFlowSnapshot,
3984 _stats: &mut EnhancedGenerationStatistics,
3985 ) -> SynthResult<Vec<JournalEntry>> {
3986 let mut entries = Vec::new();
3987
3988 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3990 debug!("Phase 4a: Generating JEs from document flows");
3991 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3992 debug!("Generated {} JEs from document flows", flow_entries.len());
3993 entries.extend(flow_entries);
3994 }
3995
3996 if self.phase_config.generate_journal_entries {
3998 info!("Phase 4: Generating Journal Entries");
3999 let je_entries = self.generate_journal_entries(coa)?;
4000 info!("Generated {} standalone journal entries", je_entries.len());
4001 entries.extend(je_entries);
4002 } else {
4003 debug!("Phase 4: Skipped (journal entry generation disabled)");
4004 }
4005
4006 if !entries.is_empty() {
4007 self.check_resources_with_log("post-journal-entries")?;
4010 }
4011
4012 Ok(entries)
4013 }
4014
4015 fn phase_anomaly_injection(
4017 &mut self,
4018 entries: &mut [JournalEntry],
4019 actions: &DegradationActions,
4020 stats: &mut EnhancedGenerationStatistics,
4021 ) -> SynthResult<AnomalyLabels> {
4022 if self.phase_config.inject_anomalies
4023 && !entries.is_empty()
4024 && !actions.skip_anomaly_injection
4025 {
4026 info!("Phase 5: Injecting Anomalies");
4027 let result = self.inject_anomalies(entries)?;
4028 stats.anomalies_injected = result.labels.len();
4029 info!("Injected {} anomalies", stats.anomalies_injected);
4030 self.check_resources_with_log("post-anomaly-injection")?;
4031 Ok(result)
4032 } else if actions.skip_anomaly_injection {
4033 warn!("Phase 5: Skipped due to resource degradation");
4034 Ok(AnomalyLabels::default())
4035 } else {
4036 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4037 Ok(AnomalyLabels::default())
4038 }
4039 }
4040
4041 fn phase_balance_validation(
4043 &mut self,
4044 entries: &[JournalEntry],
4045 ) -> SynthResult<BalanceValidationResult> {
4046 if self.phase_config.validate_balances && !entries.is_empty() {
4047 debug!("Phase 6: Validating Balances");
4048 let balance_validation = self.validate_journal_entries(entries)?;
4049 if balance_validation.is_balanced {
4050 debug!("Balance validation passed");
4051 } else {
4052 warn!(
4053 "Balance validation found {} errors",
4054 balance_validation.validation_errors.len()
4055 );
4056 }
4057 Ok(balance_validation)
4058 } else {
4059 Ok(BalanceValidationResult::default())
4060 }
4061 }
4062
4063 fn phase_data_quality_injection(
4065 &mut self,
4066 entries: &mut [JournalEntry],
4067 actions: &DegradationActions,
4068 stats: &mut EnhancedGenerationStatistics,
4069 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4070 if self.phase_config.inject_data_quality
4071 && !entries.is_empty()
4072 && !actions.skip_data_quality
4073 {
4074 info!("Phase 7: Injecting Data Quality Variations");
4075 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4076 stats.data_quality_issues = dq_stats.records_with_issues;
4077 info!("Injected {} data quality issues", stats.data_quality_issues);
4078 self.check_resources_with_log("post-data-quality")?;
4079 Ok((dq_stats, quality_issues))
4080 } else if actions.skip_data_quality {
4081 warn!("Phase 7: Skipped due to resource degradation");
4082 Ok((DataQualityStats::default(), Vec::new()))
4083 } else {
4084 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4085 Ok((DataQualityStats::default(), Vec::new()))
4086 }
4087 }
4088
4089 fn phase_period_close(
4099 &mut self,
4100 entries: &mut Vec<JournalEntry>,
4101 subledger: &SubledgerSnapshot,
4102 stats: &mut EnhancedGenerationStatistics,
4103 ) -> SynthResult<()> {
4104 if !self.phase_config.generate_period_close || entries.is_empty() {
4105 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4106 return Ok(());
4107 }
4108
4109 info!("Phase 10b: Generating period-close journal entries");
4110
4111 use datasynth_core::accounts::{
4112 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4113 };
4114 use rust_decimal::Decimal;
4115
4116 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4117 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4118 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4119 let close_date = end_date - chrono::Days::new(1);
4121
4122 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4127 .config
4128 .companies
4129 .iter()
4130 .map(|c| c.code.clone())
4131 .collect();
4132
4133 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4135 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4136
4137 let period_months = self.config.global.period_months;
4141 for asset in &subledger.fa_records {
4142 use datasynth_core::models::subledger::fa::AssetStatus;
4144 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4145 continue;
4146 }
4147 let useful_life_months = asset.useful_life_months();
4148 if useful_life_months == 0 {
4149 continue;
4151 }
4152 let salvage_value = asset.salvage_value();
4153 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4154 if depreciable_base == Decimal::ZERO {
4155 continue;
4156 }
4157 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4158 * Decimal::from(period_months))
4159 .round_dp(2);
4160 if period_depr <= Decimal::ZERO {
4161 continue;
4162 }
4163
4164 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4165 depr_header.document_type = "CL".to_string();
4166 depr_header.header_text = Some(format!(
4167 "Depreciation - {} {}",
4168 asset.asset_number, asset.description
4169 ));
4170 depr_header.created_by = "CLOSE_ENGINE".to_string();
4171 depr_header.source = TransactionSource::Automated;
4172 depr_header.business_process = Some(BusinessProcess::R2R);
4173
4174 let doc_id = depr_header.document_id;
4175 let mut depr_je = JournalEntry::new(depr_header);
4176
4177 depr_je.add_line(JournalEntryLine::debit(
4179 doc_id,
4180 1,
4181 expense_accounts::DEPRECIATION.to_string(),
4182 period_depr,
4183 ));
4184 depr_je.add_line(JournalEntryLine::credit(
4186 doc_id,
4187 2,
4188 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4189 period_depr,
4190 ));
4191
4192 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4193 close_jes.push(depr_je);
4194 }
4195
4196 if !subledger.fa_records.is_empty() {
4197 debug!(
4198 "Generated {} depreciation JEs from {} FA records",
4199 close_jes.len(),
4200 subledger.fa_records.len()
4201 );
4202 }
4203
4204 {
4208 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4209 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4210 if let Some(ctx) = &self.temporal_context {
4213 accrual_gen.set_temporal_context(Arc::clone(ctx));
4214 }
4215
4216 let accrual_items: &[(&str, &str, &str)] = &[
4218 ("Accrued Utilities", "6200", "2100"),
4219 ("Accrued Rent", "6300", "2100"),
4220 ("Accrued Interest", "6100", "2150"),
4221 ];
4222
4223 for company_code in &company_codes {
4224 let company_revenue: Decimal = entries
4226 .iter()
4227 .filter(|e| e.header.company_code == *company_code)
4228 .flat_map(|e| e.lines.iter())
4229 .filter(|l| l.gl_account.starts_with('4'))
4230 .map(|l| l.credit_amount - l.debit_amount)
4231 .fold(Decimal::ZERO, |acc, v| acc + v);
4232
4233 if company_revenue <= Decimal::ZERO {
4234 continue;
4235 }
4236
4237 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4239 if accrual_base <= Decimal::ZERO {
4240 continue;
4241 }
4242
4243 for (description, expense_acct, liability_acct) in accrual_items {
4244 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4245 company_code,
4246 description,
4247 accrual_base,
4248 expense_acct,
4249 liability_acct,
4250 close_date,
4251 None,
4252 );
4253 close_jes.push(accrual_je);
4254 if let Some(rev_je) = reversal_je {
4255 close_jes.push(rev_je);
4256 }
4257 }
4258 }
4259
4260 debug!(
4261 "Generated accrual entries for {} companies",
4262 company_codes.len()
4263 );
4264 }
4265
4266 for company_code in &company_codes {
4267 let mut total_revenue = Decimal::ZERO;
4272 let mut total_expenses = Decimal::ZERO;
4273
4274 for entry in entries.iter() {
4275 if entry.header.company_code != *company_code {
4276 continue;
4277 }
4278 for line in &entry.lines {
4279 let category = AccountCategory::from_account(&line.gl_account);
4280 match category {
4281 AccountCategory::Revenue => {
4282 total_revenue += line.credit_amount - line.debit_amount;
4284 }
4285 AccountCategory::Cogs
4286 | AccountCategory::OperatingExpense
4287 | AccountCategory::OtherIncomeExpense
4288 | AccountCategory::Tax => {
4289 total_expenses += line.debit_amount - line.credit_amount;
4291 }
4292 _ => {}
4293 }
4294 }
4295 }
4296
4297 let pre_tax_income = total_revenue - total_expenses;
4298
4299 if pre_tax_income == Decimal::ZERO {
4301 debug!(
4302 "Company {}: no pre-tax income, skipping period close",
4303 company_code
4304 );
4305 continue;
4306 }
4307
4308 if pre_tax_income > Decimal::ZERO {
4310 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4312
4313 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4314 tax_header.document_type = "CL".to_string();
4315 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4316 tax_header.created_by = "CLOSE_ENGINE".to_string();
4317 tax_header.source = TransactionSource::Automated;
4318 tax_header.business_process = Some(BusinessProcess::R2R);
4319
4320 let doc_id = tax_header.document_id;
4321 let mut tax_je = JournalEntry::new(tax_header);
4322
4323 tax_je.add_line(JournalEntryLine::debit(
4325 doc_id,
4326 1,
4327 tax_accounts::TAX_EXPENSE.to_string(),
4328 tax_amount,
4329 ));
4330 tax_je.add_line(JournalEntryLine::credit(
4332 doc_id,
4333 2,
4334 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4335 tax_amount,
4336 ));
4337
4338 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4339 close_jes.push(tax_je);
4340 } else {
4341 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4344 if dta_amount > Decimal::ZERO {
4345 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4346 dta_header.document_type = "CL".to_string();
4347 dta_header.header_text =
4348 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4349 dta_header.created_by = "CLOSE_ENGINE".to_string();
4350 dta_header.source = TransactionSource::Automated;
4351 dta_header.business_process = Some(BusinessProcess::R2R);
4352
4353 let doc_id = dta_header.document_id;
4354 let mut dta_je = JournalEntry::new(dta_header);
4355
4356 dta_je.add_line(JournalEntryLine::debit(
4358 doc_id,
4359 1,
4360 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4361 dta_amount,
4362 ));
4363 dta_je.add_line(JournalEntryLine::credit(
4366 doc_id,
4367 2,
4368 tax_accounts::TAX_EXPENSE.to_string(),
4369 dta_amount,
4370 ));
4371
4372 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4373 close_jes.push(dta_je);
4374 debug!(
4375 "Company {}: loss year — recognised DTA of {}",
4376 company_code, dta_amount
4377 );
4378 }
4379 }
4380
4381 let tax_provision = if pre_tax_income > Decimal::ZERO {
4387 (pre_tax_income * tax_rate).round_dp(2)
4388 } else {
4389 Decimal::ZERO
4390 };
4391 let net_income = pre_tax_income - tax_provision;
4392
4393 if net_income > Decimal::ZERO {
4394 use datasynth_generators::DividendGenerator;
4395 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4397 let currency_str = self
4398 .config
4399 .companies
4400 .iter()
4401 .find(|c| c.code == *company_code)
4402 .map(|c| c.currency.as_str())
4403 .unwrap_or("USD");
4404 let div_result = div_gen.generate(
4405 company_code,
4406 close_date,
4407 Decimal::new(1, 0), dividend_amount,
4409 currency_str,
4410 );
4411 let div_je_count = div_result.journal_entries.len();
4412 close_jes.extend(div_result.journal_entries);
4413 debug!(
4414 "Company {}: declared dividend of {} ({} JEs)",
4415 company_code, dividend_amount, div_je_count
4416 );
4417 }
4418
4419 if net_income != Decimal::ZERO {
4424 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4425 close_header.document_type = "CL".to_string();
4426 close_header.header_text =
4427 Some(format!("Income statement close - {}", company_code));
4428 close_header.created_by = "CLOSE_ENGINE".to_string();
4429 close_header.source = TransactionSource::Automated;
4430 close_header.business_process = Some(BusinessProcess::R2R);
4431
4432 let doc_id = close_header.document_id;
4433 let mut close_je = JournalEntry::new(close_header);
4434
4435 let abs_net_income = net_income.abs();
4436
4437 if net_income > Decimal::ZERO {
4438 close_je.add_line(JournalEntryLine::debit(
4440 doc_id,
4441 1,
4442 equity_accounts::INCOME_SUMMARY.to_string(),
4443 abs_net_income,
4444 ));
4445 close_je.add_line(JournalEntryLine::credit(
4446 doc_id,
4447 2,
4448 equity_accounts::RETAINED_EARNINGS.to_string(),
4449 abs_net_income,
4450 ));
4451 } else {
4452 close_je.add_line(JournalEntryLine::debit(
4454 doc_id,
4455 1,
4456 equity_accounts::RETAINED_EARNINGS.to_string(),
4457 abs_net_income,
4458 ));
4459 close_je.add_line(JournalEntryLine::credit(
4460 doc_id,
4461 2,
4462 equity_accounts::INCOME_SUMMARY.to_string(),
4463 abs_net_income,
4464 ));
4465 }
4466
4467 debug_assert!(
4468 close_je.is_balanced(),
4469 "Income statement closing JE must be balanced"
4470 );
4471 close_jes.push(close_je);
4472 }
4473 }
4474
4475 let close_count = close_jes.len();
4476 if close_count > 0 {
4477 info!("Generated {} period-close journal entries", close_count);
4478 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4479 entries.extend(close_jes);
4480 stats.period_close_je_count = close_count;
4481
4482 stats.total_entries = entries.len() as u64;
4484 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4485 } else {
4486 debug!("No period-close entries generated (no income statement activity)");
4487 }
4488
4489 Ok(())
4490 }
4491
4492 fn phase_audit_data(
4494 &mut self,
4495 entries: &[JournalEntry],
4496 stats: &mut EnhancedGenerationStatistics,
4497 ) -> SynthResult<AuditSnapshot> {
4498 if self.phase_config.generate_audit {
4499 info!("Phase 8: Generating Audit Data");
4500 let audit_snapshot = self.generate_audit_data(entries)?;
4501 stats.audit_engagement_count = audit_snapshot.engagements.len();
4502 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4503 stats.audit_evidence_count = audit_snapshot.evidence.len();
4504 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4505 stats.audit_finding_count = audit_snapshot.findings.len();
4506 stats.audit_judgment_count = audit_snapshot.judgments.len();
4507 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4508 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4509 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4510 stats.audit_sample_count = audit_snapshot.samples.len();
4511 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4512 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4513 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4514 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4515 stats.audit_related_party_transaction_count =
4516 audit_snapshot.related_party_transactions.len();
4517 info!(
4518 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4519 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4520 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4521 {} RP transactions",
4522 stats.audit_engagement_count,
4523 stats.audit_workpaper_count,
4524 stats.audit_evidence_count,
4525 stats.audit_risk_count,
4526 stats.audit_finding_count,
4527 stats.audit_judgment_count,
4528 stats.audit_confirmation_count,
4529 stats.audit_procedure_step_count,
4530 stats.audit_sample_count,
4531 stats.audit_analytical_result_count,
4532 stats.audit_ia_function_count,
4533 stats.audit_ia_report_count,
4534 stats.audit_related_party_count,
4535 stats.audit_related_party_transaction_count,
4536 );
4537 self.check_resources_with_log("post-audit")?;
4538 Ok(audit_snapshot)
4539 } else {
4540 debug!("Phase 8: Skipped (audit generation disabled)");
4541 Ok(AuditSnapshot::default())
4542 }
4543 }
4544
4545 fn phase_banking_data(
4547 &mut self,
4548 stats: &mut EnhancedGenerationStatistics,
4549 ) -> SynthResult<BankingSnapshot> {
4550 if self.phase_config.generate_banking {
4551 info!("Phase 9: Generating Banking KYC/AML Data");
4552 let banking_snapshot = self.generate_banking_data()?;
4553 stats.banking_customer_count = banking_snapshot.customers.len();
4554 stats.banking_account_count = banking_snapshot.accounts.len();
4555 stats.banking_transaction_count = banking_snapshot.transactions.len();
4556 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4557 info!(
4558 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4559 stats.banking_customer_count, stats.banking_account_count,
4560 stats.banking_transaction_count, stats.banking_suspicious_count
4561 );
4562 self.check_resources_with_log("post-banking")?;
4563 Ok(banking_snapshot)
4564 } else {
4565 debug!("Phase 9: Skipped (banking generation disabled)");
4566 Ok(BankingSnapshot::default())
4567 }
4568 }
4569
4570 fn phase_graph_export(
4572 &mut self,
4573 entries: &[JournalEntry],
4574 coa: &Arc<ChartOfAccounts>,
4575 stats: &mut EnhancedGenerationStatistics,
4576 ) -> SynthResult<GraphExportSnapshot> {
4577 if self.phase_config.generate_graph_export && !entries.is_empty() {
4578 info!("Phase 10: Exporting Accounting Network Graphs");
4579 match self.export_graphs(entries, coa, stats) {
4580 Ok(snapshot) => {
4581 info!(
4582 "Graph export complete: {} graphs ({} nodes, {} edges)",
4583 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4584 );
4585 Ok(snapshot)
4586 }
4587 Err(e) => {
4588 warn!("Phase 10: Graph export failed: {}", e);
4589 Ok(GraphExportSnapshot::default())
4590 }
4591 }
4592 } else {
4593 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4594 Ok(GraphExportSnapshot::default())
4595 }
4596 }
4597
4598 #[allow(clippy::too_many_arguments)]
4600 fn phase_hypergraph_export(
4601 &self,
4602 coa: &Arc<ChartOfAccounts>,
4603 entries: &[JournalEntry],
4604 document_flows: &DocumentFlowSnapshot,
4605 sourcing: &SourcingSnapshot,
4606 hr: &HrSnapshot,
4607 manufacturing: &ManufacturingSnapshot,
4608 banking: &BankingSnapshot,
4609 audit: &AuditSnapshot,
4610 financial_reporting: &FinancialReportingSnapshot,
4611 ocpm: &OcpmSnapshot,
4612 compliance: &ComplianceRegulationsSnapshot,
4613 stats: &mut EnhancedGenerationStatistics,
4614 ) -> SynthResult<()> {
4615 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4616 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4617 match self.export_hypergraph(
4618 coa,
4619 entries,
4620 document_flows,
4621 sourcing,
4622 hr,
4623 manufacturing,
4624 banking,
4625 audit,
4626 financial_reporting,
4627 ocpm,
4628 compliance,
4629 stats,
4630 ) {
4631 Ok(info) => {
4632 info!(
4633 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4634 info.node_count, info.edge_count, info.hyperedge_count
4635 );
4636 }
4637 Err(e) => {
4638 warn!("Phase 10b: Hypergraph export failed: {}", e);
4639 }
4640 }
4641 } else {
4642 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4643 }
4644 Ok(())
4645 }
4646
4647 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4653 if !self.config.llm.enabled {
4654 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4655 return;
4656 }
4657
4658 info!("Phase 11: Starting LLM Enrichment");
4659 let start = std::time::Instant::now();
4660
4661 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4662 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4665 let schema_provider = &self.config.llm.provider;
4666 let api_key_env = match schema_provider.as_str() {
4667 "openai" => Some("OPENAI_API_KEY"),
4668 "anthropic" => Some("ANTHROPIC_API_KEY"),
4669 "custom" => Some("LLM_API_KEY"),
4670 _ => None,
4671 };
4672 if let Some(key_env) = api_key_env {
4673 if std::env::var(key_env).is_ok() {
4674 let llm_config = datasynth_core::llm::LlmConfig {
4675 model: self.config.llm.model.clone(),
4676 api_key_env: key_env.to_string(),
4677 ..datasynth_core::llm::LlmConfig::default()
4678 };
4679 match HttpLlmProvider::new(llm_config) {
4680 Ok(p) => Arc::new(p),
4681 Err(e) => {
4682 warn!(
4683 "Failed to create HttpLlmProvider: {}; falling back to mock",
4684 e
4685 );
4686 Arc::new(MockLlmProvider::new(self.seed))
4687 }
4688 }
4689 } else {
4690 Arc::new(MockLlmProvider::new(self.seed))
4691 }
4692 } else {
4693 Arc::new(MockLlmProvider::new(self.seed))
4694 }
4695 };
4696 let enricher = VendorLlmEnricher::new(provider);
4697
4698 let industry = format!("{:?}", self.config.global.industry);
4699 let max_enrichments = self
4700 .config
4701 .llm
4702 .max_vendor_enrichments
4703 .min(self.master_data.vendors.len());
4704
4705 let mut enriched_count = 0usize;
4706 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4707 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4708 Ok(name) => {
4709 vendor.name = name;
4710 enriched_count += 1;
4711 }
4712 Err(e) => {
4713 warn!(
4714 "LLM vendor enrichment failed for {}: {}",
4715 vendor.vendor_id, e
4716 );
4717 }
4718 }
4719 }
4720
4721 enriched_count
4722 }));
4723
4724 match result {
4725 Ok(enriched_count) => {
4726 stats.llm_vendors_enriched = enriched_count;
4727 let elapsed = start.elapsed();
4728 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4729 info!(
4730 "Phase 11 complete: {} vendors enriched in {}ms",
4731 enriched_count, stats.llm_enrichment_ms
4732 );
4733 }
4734 Err(_) => {
4735 let elapsed = start.elapsed();
4736 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4737 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4738 }
4739 }
4740 }
4741
4742 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4748 if !self.config.diffusion.enabled {
4749 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4750 return;
4751 }
4752
4753 info!("Phase 12: Starting Diffusion Enhancement");
4754 let start = std::time::Instant::now();
4755
4756 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4757 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4760
4761 let diffusion_config = DiffusionConfig {
4762 n_steps: self.config.diffusion.n_steps,
4763 seed: self.seed,
4764 ..Default::default()
4765 };
4766
4767 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4768
4769 let n_samples = self.config.diffusion.sample_size;
4770 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
4772
4773 samples.len()
4774 }));
4775
4776 match result {
4777 Ok(sample_count) => {
4778 stats.diffusion_samples_generated = sample_count;
4779 let elapsed = start.elapsed();
4780 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4781 info!(
4782 "Phase 12 complete: {} diffusion samples generated in {}ms",
4783 sample_count, stats.diffusion_enhancement_ms
4784 );
4785 }
4786 Err(_) => {
4787 let elapsed = start.elapsed();
4788 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4789 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4790 }
4791 }
4792 }
4793
4794 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4801 if !self.config.causal.enabled {
4802 debug!("Phase 13: Skipped (causal generation disabled)");
4803 return;
4804 }
4805
4806 info!("Phase 13: Starting Causal Overlay");
4807 let start = std::time::Instant::now();
4808
4809 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4810 let graph = match self.config.causal.template.as_str() {
4812 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4813 _ => CausalGraph::fraud_detection_template(),
4814 };
4815
4816 let scm = StructuralCausalModel::new(graph.clone())
4817 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4818
4819 let n_samples = self.config.causal.sample_size;
4820 let samples = scm
4821 .generate(n_samples, self.seed)
4822 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4823
4824 let validation_passed = if self.config.causal.validate {
4826 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4827 if report.valid {
4828 info!(
4829 "Causal validation passed: all {} checks OK",
4830 report.checks.len()
4831 );
4832 } else {
4833 warn!(
4834 "Causal validation: {} violations detected: {:?}",
4835 report.violations.len(),
4836 report.violations
4837 );
4838 }
4839 Some(report.valid)
4840 } else {
4841 None
4842 };
4843
4844 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4845 }));
4846
4847 match result {
4848 Ok(Ok((sample_count, validation_passed))) => {
4849 stats.causal_samples_generated = sample_count;
4850 stats.causal_validation_passed = validation_passed;
4851 let elapsed = start.elapsed();
4852 stats.causal_generation_ms = elapsed.as_millis() as u64;
4853 info!(
4854 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4855 sample_count, stats.causal_generation_ms, validation_passed,
4856 );
4857 }
4858 Ok(Err(e)) => {
4859 let elapsed = start.elapsed();
4860 stats.causal_generation_ms = elapsed.as_millis() as u64;
4861 warn!("Phase 13: Causal generation failed: {}", e);
4862 }
4863 Err(_) => {
4864 let elapsed = start.elapsed();
4865 stats.causal_generation_ms = elapsed.as_millis() as u64;
4866 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4867 }
4868 }
4869 }
4870
4871 fn phase_sourcing_data(
4873 &mut self,
4874 stats: &mut EnhancedGenerationStatistics,
4875 ) -> SynthResult<SourcingSnapshot> {
4876 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4877 debug!("Phase 14: Skipped (sourcing generation disabled)");
4878 return Ok(SourcingSnapshot::default());
4879 }
4880 let degradation = self.check_resources()?;
4881 if degradation >= DegradationLevel::Reduced {
4882 debug!(
4883 "Phase skipped due to resource pressure (degradation: {:?})",
4884 degradation
4885 );
4886 return Ok(SourcingSnapshot::default());
4887 }
4888
4889 info!("Phase 14: Generating S2C Sourcing Data");
4890 let seed = self.seed;
4891
4892 let vendor_ids: Vec<String> = self
4894 .master_data
4895 .vendors
4896 .iter()
4897 .map(|v| v.vendor_id.clone())
4898 .collect();
4899 if vendor_ids.is_empty() {
4900 debug!("Phase 14: Skipped (no vendors available)");
4901 return Ok(SourcingSnapshot::default());
4902 }
4903
4904 let categories: Vec<(String, String)> = vec![
4905 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4906 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4907 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4908 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4909 ("CAT-LOG".to_string(), "Logistics".to_string()),
4910 ];
4911 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4912 .iter()
4913 .map(|(id, name)| {
4914 (
4915 id.clone(),
4916 name.clone(),
4917 rust_decimal::Decimal::from(100_000),
4918 )
4919 })
4920 .collect();
4921
4922 let company_code = self
4923 .config
4924 .companies
4925 .first()
4926 .map(|c| c.code.as_str())
4927 .unwrap_or("1000");
4928 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4929 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4930 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4931 let fiscal_year = start_date.year() as u16;
4932 let owner_ids: Vec<String> = self
4933 .master_data
4934 .employees
4935 .iter()
4936 .take(5)
4937 .map(|e| e.employee_id.clone())
4938 .collect();
4939 let owner_id = owner_ids
4940 .first()
4941 .map(std::string::String::as_str)
4942 .unwrap_or("BUYER-001");
4943
4944 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4946 let spend_analyses =
4947 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4948
4949 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4951 let sourcing_projects = if owner_ids.is_empty() {
4952 Vec::new()
4953 } else {
4954 project_gen.generate(
4955 company_code,
4956 &categories_with_spend,
4957 &owner_ids,
4958 start_date,
4959 self.config.global.period_months,
4960 )
4961 };
4962 stats.sourcing_project_count = sourcing_projects.len();
4963
4964 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4966 let mut qual_gen = QualificationGenerator::new(seed + 2);
4967 let qualifications = qual_gen.generate(
4968 company_code,
4969 &qual_vendor_ids,
4970 sourcing_projects.first().map(|p| p.project_id.as_str()),
4971 owner_id,
4972 start_date,
4973 );
4974
4975 let mut rfx_gen = RfxGenerator::new(seed + 3);
4977 let rfx_events: Vec<RfxEvent> = sourcing_projects
4978 .iter()
4979 .map(|proj| {
4980 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4981 rfx_gen.generate(
4982 company_code,
4983 &proj.project_id,
4984 &proj.category_id,
4985 &qualified_vids,
4986 owner_id,
4987 start_date,
4988 50000.0,
4989 )
4990 })
4991 .collect();
4992 stats.rfx_event_count = rfx_events.len();
4993
4994 let mut bid_gen = BidGenerator::new(seed + 4);
4996 let mut all_bids = Vec::new();
4997 for rfx in &rfx_events {
4998 let bidder_count = vendor_ids.len().clamp(2, 5);
4999 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5000 let bids = bid_gen.generate(rfx, &responding, start_date);
5001 all_bids.extend(bids);
5002 }
5003 stats.bid_count = all_bids.len();
5004
5005 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5007 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5008 .iter()
5009 .map(|rfx| {
5010 let rfx_bids: Vec<SupplierBid> = all_bids
5011 .iter()
5012 .filter(|b| b.rfx_id == rfx.rfx_id)
5013 .cloned()
5014 .collect();
5015 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5016 })
5017 .collect();
5018
5019 let mut contract_gen = ContractGenerator::new(seed + 6);
5021 let contracts: Vec<ProcurementContract> = bid_evaluations
5022 .iter()
5023 .zip(rfx_events.iter())
5024 .filter_map(|(eval, rfx)| {
5025 eval.ranked_bids.first().and_then(|winner| {
5026 all_bids
5027 .iter()
5028 .find(|b| b.bid_id == winner.bid_id)
5029 .map(|winning_bid| {
5030 contract_gen.generate_from_bid(
5031 winning_bid,
5032 Some(&rfx.sourcing_project_id),
5033 &rfx.category_id,
5034 owner_id,
5035 start_date,
5036 )
5037 })
5038 })
5039 })
5040 .collect();
5041 stats.contract_count = contracts.len();
5042
5043 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5045 let catalog_items = catalog_gen.generate(&contracts);
5046 stats.catalog_item_count = catalog_items.len();
5047
5048 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5050 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5051 .iter()
5052 .fold(
5053 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5054 |mut acc, c| {
5055 acc.entry(c.vendor_id.clone()).or_default().push(c);
5056 acc
5057 },
5058 )
5059 .into_iter()
5060 .collect();
5061 let scorecards = scorecard_gen.generate(
5062 company_code,
5063 &vendor_contracts,
5064 start_date,
5065 end_date,
5066 owner_id,
5067 );
5068 stats.scorecard_count = scorecards.len();
5069
5070 let mut sourcing_projects = sourcing_projects;
5073 for project in &mut sourcing_projects {
5074 project.rfx_ids = rfx_events
5076 .iter()
5077 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5078 .map(|rfx| rfx.rfx_id.clone())
5079 .collect();
5080
5081 project.contract_id = contracts
5083 .iter()
5084 .find(|c| {
5085 c.sourcing_project_id
5086 .as_deref()
5087 .is_some_and(|sp| sp == project.project_id)
5088 })
5089 .map(|c| c.contract_id.clone());
5090
5091 project.spend_analysis_id = spend_analyses
5093 .iter()
5094 .find(|sa| sa.category_id == project.category_id)
5095 .map(|sa| sa.category_id.clone());
5096 }
5097
5098 info!(
5099 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5100 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5101 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5102 );
5103 self.check_resources_with_log("post-sourcing")?;
5104
5105 Ok(SourcingSnapshot {
5106 spend_analyses,
5107 sourcing_projects,
5108 qualifications,
5109 rfx_events,
5110 bids: all_bids,
5111 bid_evaluations,
5112 contracts,
5113 catalog_items,
5114 scorecards,
5115 })
5116 }
5117
5118 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5124 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5125
5126 let parent_code = self
5127 .config
5128 .companies
5129 .first()
5130 .map(|c| c.code.clone())
5131 .unwrap_or_else(|| "PARENT".to_string());
5132
5133 let mut group = GroupStructure::new(parent_code);
5134
5135 for company in self.config.companies.iter().skip(1) {
5136 let sub =
5137 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5138 group.add_subsidiary(sub);
5139 }
5140
5141 group
5142 }
5143
5144 fn phase_intercompany(
5146 &mut self,
5147 journal_entries: &[JournalEntry],
5148 stats: &mut EnhancedGenerationStatistics,
5149 ) -> SynthResult<IntercompanySnapshot> {
5150 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5152 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5153 return Ok(IntercompanySnapshot::default());
5154 }
5155
5156 if self.config.companies.len() < 2 {
5158 debug!(
5159 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5160 self.config.companies.len()
5161 );
5162 return Ok(IntercompanySnapshot::default());
5163 }
5164
5165 info!("Phase 14b: Generating Intercompany Transactions");
5166
5167 let group_structure = self.build_group_structure();
5170 debug!(
5171 "Group structure built: parent={}, subsidiaries={}",
5172 group_structure.parent_entity,
5173 group_structure.subsidiaries.len()
5174 );
5175
5176 let seed = self.seed;
5177 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5178 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5179 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5180
5181 let parent_code = self.config.companies[0].code.clone();
5184 let mut ownership_structure =
5185 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5186
5187 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5188 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5189 format!("REL{:03}", i + 1),
5190 parent_code.clone(),
5191 company.code.clone(),
5192 rust_decimal::Decimal::from(100), start_date,
5194 );
5195 ownership_structure.add_relationship(relationship);
5196 }
5197
5198 let tp_method = match self.config.intercompany.transfer_pricing_method {
5200 datasynth_config::schema::TransferPricingMethod::CostPlus => {
5201 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5202 }
5203 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5204 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5205 }
5206 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5207 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5208 }
5209 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5210 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5211 }
5212 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5213 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5214 }
5215 };
5216
5217 let ic_currency = self
5219 .config
5220 .companies
5221 .first()
5222 .map(|c| c.currency.clone())
5223 .unwrap_or_else(|| "USD".to_string());
5224 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5225 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5226 transfer_pricing_method: tp_method,
5227 markup_percent: rust_decimal::Decimal::from_f64_retain(
5228 self.config.intercompany.markup_percent,
5229 )
5230 .unwrap_or(rust_decimal::Decimal::from(5)),
5231 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5232 default_currency: ic_currency,
5233 ..Default::default()
5234 };
5235
5236 let mut ic_generator = datasynth_generators::ICGenerator::new(
5238 ic_gen_config,
5239 ownership_structure.clone(),
5240 seed + 50,
5241 );
5242
5243 let transactions_per_day = 3;
5246 let matched_pairs = ic_generator.generate_transactions_for_period(
5247 start_date,
5248 end_date,
5249 transactions_per_day,
5250 );
5251
5252 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5254 debug!(
5255 "Generated {} IC seller invoices, {} IC buyer POs",
5256 ic_doc_chains.seller_invoices.len(),
5257 ic_doc_chains.buyer_orders.len()
5258 );
5259
5260 let mut seller_entries = Vec::new();
5262 let mut buyer_entries = Vec::new();
5263 let fiscal_year = start_date.year();
5264
5265 for pair in &matched_pairs {
5266 let fiscal_period = pair.posting_date.month();
5267 let (seller_je, buyer_je) =
5268 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5269 seller_entries.push(seller_je);
5270 buyer_entries.push(buyer_je);
5271 }
5272
5273 let matching_config = datasynth_generators::ICMatchingConfig {
5275 base_currency: self
5276 .config
5277 .companies
5278 .first()
5279 .map(|c| c.currency.clone())
5280 .unwrap_or_else(|| "USD".to_string()),
5281 ..Default::default()
5282 };
5283 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5284 matching_engine.load_matched_pairs(&matched_pairs);
5285 let matching_result = matching_engine.run_matching(end_date);
5286
5287 let mut elimination_entries = Vec::new();
5289 if self.config.intercompany.generate_eliminations {
5290 let elim_config = datasynth_generators::EliminationConfig {
5291 consolidation_entity: "GROUP".to_string(),
5292 base_currency: self
5293 .config
5294 .companies
5295 .first()
5296 .map(|c| c.currency.clone())
5297 .unwrap_or_else(|| "USD".to_string()),
5298 ..Default::default()
5299 };
5300
5301 let mut elim_generator =
5302 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5303
5304 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5305 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5306 matching_result
5307 .matched_balances
5308 .iter()
5309 .chain(matching_result.unmatched_balances.iter())
5310 .cloned()
5311 .collect();
5312
5313 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5325 std::collections::HashMap::new();
5326 let mut equity_amounts: std::collections::HashMap<
5327 String,
5328 std::collections::HashMap<String, rust_decimal::Decimal>,
5329 > = std::collections::HashMap::new();
5330 {
5331 use rust_decimal::Decimal;
5332 let hundred = Decimal::from(100u32);
5333 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5337 for sub in &group_structure.subsidiaries {
5338 let net_assets = {
5339 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5340 if na > Decimal::ZERO {
5341 na
5342 } else {
5343 Decimal::from(1_000_000u64)
5344 }
5345 };
5346 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5348 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5349
5350 let mut eq_map = std::collections::HashMap::new();
5353 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5354 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5355 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5356 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5357 }
5358 }
5359
5360 let journal = elim_generator.generate_eliminations(
5361 &fiscal_period,
5362 end_date,
5363 &all_balances,
5364 &matched_pairs,
5365 &investment_amounts,
5366 &equity_amounts,
5367 );
5368
5369 elimination_entries = journal.entries.clone();
5370 }
5371
5372 let matched_pair_count = matched_pairs.len();
5373 let elimination_entry_count = elimination_entries.len();
5374 let match_rate = matching_result.match_rate;
5375
5376 stats.ic_matched_pair_count = matched_pair_count;
5377 stats.ic_elimination_count = elimination_entry_count;
5378 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5379
5380 info!(
5381 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5382 matched_pair_count,
5383 stats.ic_transaction_count,
5384 seller_entries.len(),
5385 buyer_entries.len(),
5386 elimination_entry_count,
5387 match_rate * 100.0
5388 );
5389 self.check_resources_with_log("post-intercompany")?;
5390
5391 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5395 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5396 use rust_decimal::Decimal;
5397
5398 let eight_pct = Decimal::new(8, 2); group_structure
5401 .subsidiaries
5402 .iter()
5403 .filter(|sub| {
5404 sub.nci_percentage > Decimal::ZERO
5405 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5406 })
5407 .map(|sub| {
5408 let net_assets_from_jes =
5412 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5413
5414 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5415 net_assets_from_jes.round_dp(2)
5416 } else {
5417 Decimal::from(1_000_000u64)
5419 };
5420
5421 let net_income = (net_assets * eight_pct).round_dp(2);
5423
5424 NciMeasurement::compute(
5425 sub.entity_code.clone(),
5426 sub.nci_percentage,
5427 net_assets,
5428 net_income,
5429 )
5430 })
5431 .collect()
5432 };
5433
5434 if !nci_measurements.is_empty() {
5435 info!(
5436 "NCI measurements: {} subsidiaries with non-controlling interests",
5437 nci_measurements.len()
5438 );
5439 }
5440
5441 Ok(IntercompanySnapshot {
5442 group_structure: Some(group_structure),
5443 matched_pairs,
5444 seller_journal_entries: seller_entries,
5445 buyer_journal_entries: buyer_entries,
5446 elimination_entries,
5447 nci_measurements,
5448 ic_document_chains: Some(ic_doc_chains),
5449 matched_pair_count,
5450 elimination_entry_count,
5451 match_rate,
5452 })
5453 }
5454
5455 fn phase_financial_reporting(
5457 &mut self,
5458 document_flows: &DocumentFlowSnapshot,
5459 journal_entries: &[JournalEntry],
5460 coa: &Arc<ChartOfAccounts>,
5461 _hr: &HrSnapshot,
5462 _audit: &AuditSnapshot,
5463 stats: &mut EnhancedGenerationStatistics,
5464 ) -> SynthResult<FinancialReportingSnapshot> {
5465 let fs_enabled = self.phase_config.generate_financial_statements
5466 || self.config.financial_reporting.enabled;
5467 let br_enabled = self.phase_config.generate_bank_reconciliation;
5468
5469 if !fs_enabled && !br_enabled {
5470 debug!("Phase 15: Skipped (financial reporting disabled)");
5471 return Ok(FinancialReportingSnapshot::default());
5472 }
5473
5474 info!("Phase 15: Generating Financial Reporting Data");
5475
5476 let seed = self.seed;
5477 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5478 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5479
5480 let mut financial_statements = Vec::new();
5481 let mut bank_reconciliations = Vec::new();
5482 let mut trial_balances = Vec::new();
5483 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5484 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5485 Vec::new();
5486 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5488 std::collections::HashMap::new();
5489 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5491 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5493
5494 if fs_enabled {
5502 let has_journal_entries = !journal_entries.is_empty();
5503
5504 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5507 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5509
5510 let elimination_entries: Vec<&JournalEntry> = journal_entries
5512 .iter()
5513 .filter(|je| je.header.is_elimination)
5514 .collect();
5515
5516 for period in 0..self.config.global.period_months {
5518 let period_start = start_date + chrono::Months::new(period);
5519 let period_end =
5520 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5521 let fiscal_year = period_end.year() as u16;
5522 let fiscal_period = period_end.month() as u8;
5523 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5524
5525 let mut entity_tb_map: std::collections::HashMap<
5528 String,
5529 std::collections::HashMap<String, rust_decimal::Decimal>,
5530 > = std::collections::HashMap::new();
5531
5532 for (company_idx, company) in self.config.companies.iter().enumerate() {
5534 let company_code = company.code.as_str();
5535 let currency = company.currency.as_str();
5536 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5539 let mut company_fs_gen =
5540 FinancialStatementGenerator::new(seed + company_seed_offset);
5541
5542 if has_journal_entries {
5543 let tb_entries = Self::build_cumulative_trial_balance(
5544 journal_entries,
5545 coa,
5546 company_code,
5547 start_date,
5548 period_end,
5549 fiscal_year,
5550 fiscal_period,
5551 );
5552
5553 let entity_cat_map =
5555 entity_tb_map.entry(company_code.to_string()).or_default();
5556 for tb_entry in &tb_entries {
5557 let net = tb_entry.debit_balance - tb_entry.credit_balance;
5558 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5559 }
5560
5561 let stmts = company_fs_gen.generate(
5562 company_code,
5563 currency,
5564 &tb_entries,
5565 period_start,
5566 period_end,
5567 fiscal_year,
5568 fiscal_period,
5569 None,
5570 "SYS-AUTOCLOSE",
5571 );
5572
5573 let mut entity_stmts = Vec::new();
5574 for stmt in stmts {
5575 if stmt.statement_type == StatementType::CashFlowStatement {
5576 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5577 let cf_items = Self::build_cash_flow_from_trial_balances(
5578 &tb_entries,
5579 None,
5580 net_income,
5581 );
5582 entity_stmts.push(FinancialStatement {
5583 cash_flow_items: cf_items,
5584 ..stmt
5585 });
5586 } else {
5587 entity_stmts.push(stmt);
5588 }
5589 }
5590
5591 financial_statements.extend(entity_stmts.clone());
5593
5594 standalone_statements
5596 .entry(company_code.to_string())
5597 .or_default()
5598 .extend(entity_stmts);
5599
5600 if company_idx == 0 {
5603 trial_balances.push(PeriodTrialBalance {
5604 fiscal_year,
5605 fiscal_period,
5606 period_start,
5607 period_end,
5608 entries: tb_entries,
5609 });
5610 }
5611 } else {
5612 let tb_entries = Self::build_trial_balance_from_entries(
5614 journal_entries,
5615 coa,
5616 company_code,
5617 fiscal_year,
5618 fiscal_period,
5619 );
5620
5621 let stmts = company_fs_gen.generate(
5622 company_code,
5623 currency,
5624 &tb_entries,
5625 period_start,
5626 period_end,
5627 fiscal_year,
5628 fiscal_period,
5629 None,
5630 "SYS-AUTOCLOSE",
5631 );
5632 financial_statements.extend(stmts.clone());
5633 standalone_statements
5634 .entry(company_code.to_string())
5635 .or_default()
5636 .extend(stmts);
5637
5638 if company_idx == 0 && !tb_entries.is_empty() {
5639 trial_balances.push(PeriodTrialBalance {
5640 fiscal_year,
5641 fiscal_period,
5642 period_start,
5643 period_end,
5644 entries: tb_entries,
5645 });
5646 }
5647 }
5648 }
5649
5650 let group_currency = self
5653 .config
5654 .companies
5655 .first()
5656 .map(|c| c.currency.as_str())
5657 .unwrap_or("USD");
5658
5659 let period_eliminations: Vec<JournalEntry> = elimination_entries
5661 .iter()
5662 .filter(|je| {
5663 je.header.fiscal_year == fiscal_year
5664 && je.header.fiscal_period == fiscal_period
5665 })
5666 .map(|je| (*je).clone())
5667 .collect();
5668
5669 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5670 &entity_tb_map,
5671 &period_eliminations,
5672 &period_label,
5673 );
5674
5675 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5678 .line_items
5679 .iter()
5680 .map(|li| {
5681 let net = li.post_elimination_total;
5682 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5683 (net, rust_decimal::Decimal::ZERO)
5684 } else {
5685 (rust_decimal::Decimal::ZERO, -net)
5686 };
5687 datasynth_generators::TrialBalanceEntry {
5688 account_code: li.account_category.clone(),
5689 account_name: li.account_category.clone(),
5690 category: li.account_category.clone(),
5691 debit_balance: debit,
5692 credit_balance: credit,
5693 }
5694 })
5695 .collect();
5696
5697 let mut cons_stmts = cons_gen.generate(
5698 "GROUP",
5699 group_currency,
5700 &cons_tb,
5701 period_start,
5702 period_end,
5703 fiscal_year,
5704 fiscal_period,
5705 None,
5706 "SYS-AUTOCLOSE",
5707 );
5708
5709 let bs_categories: &[&str] = &[
5713 "CASH",
5714 "RECEIVABLES",
5715 "INVENTORY",
5716 "FIXEDASSETS",
5717 "PAYABLES",
5718 "ACCRUEDLIABILITIES",
5719 "LONGTERMDEBT",
5720 "EQUITY",
5721 ];
5722 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5723 cons_line_items.into_iter().partition(|li| {
5724 let upper = li.label.to_uppercase();
5725 bs_categories.iter().any(|c| upper == *c)
5726 });
5727
5728 for stmt in &mut cons_stmts {
5729 stmt.is_consolidated = true;
5730 match stmt.statement_type {
5731 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5732 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5733 _ => {} }
5735 }
5736
5737 consolidated_statements.extend(cons_stmts);
5738 consolidation_schedules.push(schedule);
5739 }
5740
5741 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5747 info!(
5748 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5749 stats.financial_statement_count,
5750 consolidated_statements.len(),
5751 has_journal_entries
5752 );
5753
5754 let entity_seeds: Vec<SegmentSeed> = self
5759 .config
5760 .companies
5761 .iter()
5762 .map(|c| SegmentSeed {
5763 code: c.code.clone(),
5764 name: c.name.clone(),
5765 currency: c.currency.clone(),
5766 })
5767 .collect();
5768
5769 let mut seg_gen = SegmentGenerator::new(seed + 30);
5770
5771 for period in 0..self.config.global.period_months {
5776 let period_end =
5777 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5778 let fiscal_year = period_end.year() as u16;
5779 let fiscal_period = period_end.month() as u8;
5780 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5781
5782 use datasynth_core::models::StatementType;
5783
5784 let cons_is = consolidated_statements.iter().find(|s| {
5786 s.fiscal_year == fiscal_year
5787 && s.fiscal_period == fiscal_period
5788 && s.statement_type == StatementType::IncomeStatement
5789 });
5790 let cons_bs = consolidated_statements.iter().find(|s| {
5791 s.fiscal_year == fiscal_year
5792 && s.fiscal_period == fiscal_period
5793 && s.statement_type == StatementType::BalanceSheet
5794 });
5795
5796 let is_stmt = cons_is.or_else(|| {
5798 financial_statements.iter().find(|s| {
5799 s.fiscal_year == fiscal_year
5800 && s.fiscal_period == fiscal_period
5801 && s.statement_type == StatementType::IncomeStatement
5802 })
5803 });
5804 let bs_stmt = cons_bs.or_else(|| {
5805 financial_statements.iter().find(|s| {
5806 s.fiscal_year == fiscal_year
5807 && s.fiscal_period == fiscal_period
5808 && s.statement_type == StatementType::BalanceSheet
5809 })
5810 });
5811
5812 let consolidated_revenue = is_stmt
5813 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5814 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5816
5817 let consolidated_profit = is_stmt
5818 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5819 .map(|li| li.amount)
5820 .unwrap_or(rust_decimal::Decimal::ZERO);
5821
5822 let consolidated_assets = bs_stmt
5823 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5824 .map(|li| li.amount)
5825 .unwrap_or(rust_decimal::Decimal::ZERO);
5826
5827 if consolidated_revenue == rust_decimal::Decimal::ZERO
5829 && consolidated_assets == rust_decimal::Decimal::ZERO
5830 {
5831 continue;
5832 }
5833
5834 let group_code = self
5835 .config
5836 .companies
5837 .first()
5838 .map(|c| c.code.as_str())
5839 .unwrap_or("GROUP");
5840
5841 let total_depr: rust_decimal::Decimal = journal_entries
5844 .iter()
5845 .filter(|je| je.header.document_type == "CL")
5846 .flat_map(|je| je.lines.iter())
5847 .filter(|l| l.gl_account.starts_with("6000"))
5848 .map(|l| l.debit_amount)
5849 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5850 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5851 Some(total_depr)
5852 } else {
5853 None
5854 };
5855
5856 let (segs, recon) = seg_gen.generate(
5857 group_code,
5858 &period_label,
5859 consolidated_revenue,
5860 consolidated_profit,
5861 consolidated_assets,
5862 &entity_seeds,
5863 depr_param,
5864 );
5865 segment_reports.extend(segs);
5866 segment_reconciliations.push(recon);
5867 }
5868
5869 info!(
5870 "Segment reports generated: {} segments, {} reconciliations",
5871 segment_reports.len(),
5872 segment_reconciliations.len()
5873 );
5874 }
5875
5876 if br_enabled && !document_flows.payments.is_empty() {
5878 let employee_ids: Vec<String> = self
5879 .master_data
5880 .employees
5881 .iter()
5882 .map(|e| e.employee_id.clone())
5883 .collect();
5884 let mut br_gen =
5885 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5886
5887 for company in &self.config.companies {
5889 let company_payments: Vec<PaymentReference> = document_flows
5890 .payments
5891 .iter()
5892 .filter(|p| p.header.company_code == company.code)
5893 .map(|p| PaymentReference {
5894 id: p.header.document_id.clone(),
5895 amount: if p.is_vendor { p.amount } else { -p.amount },
5896 date: p.header.document_date,
5897 reference: p
5898 .check_number
5899 .clone()
5900 .or_else(|| p.wire_reference.clone())
5901 .unwrap_or_else(|| p.header.document_id.clone()),
5902 })
5903 .collect();
5904
5905 if company_payments.is_empty() {
5906 continue;
5907 }
5908
5909 let bank_account_id = format!("{}-MAIN", company.code);
5910
5911 for period in 0..self.config.global.period_months {
5913 let period_start = start_date + chrono::Months::new(period);
5914 let period_end =
5915 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5916
5917 let period_payments: Vec<PaymentReference> = company_payments
5918 .iter()
5919 .filter(|p| p.date >= period_start && p.date <= period_end)
5920 .cloned()
5921 .collect();
5922
5923 let recon = br_gen.generate(
5924 &company.code,
5925 &bank_account_id,
5926 period_start,
5927 period_end,
5928 &company.currency,
5929 &period_payments,
5930 );
5931 bank_reconciliations.push(recon);
5932 }
5933 }
5934 info!(
5935 "Bank reconciliations generated: {} reconciliations",
5936 bank_reconciliations.len()
5937 );
5938 }
5939
5940 stats.bank_reconciliation_count = bank_reconciliations.len();
5941 self.check_resources_with_log("post-financial-reporting")?;
5942
5943 if !trial_balances.is_empty() {
5944 info!(
5945 "Period-close trial balances captured: {} periods",
5946 trial_balances.len()
5947 );
5948 }
5949
5950 let notes_to_financial_statements = Vec::new();
5954
5955 Ok(FinancialReportingSnapshot {
5956 financial_statements,
5957 standalone_statements,
5958 consolidated_statements,
5959 consolidation_schedules,
5960 bank_reconciliations,
5961 trial_balances,
5962 segment_reports,
5963 segment_reconciliations,
5964 notes_to_financial_statements,
5965 })
5966 }
5967
5968 fn generate_notes_to_financial_statements(
5975 &self,
5976 financial_reporting: &mut FinancialReportingSnapshot,
5977 accounting_standards: &AccountingStandardsSnapshot,
5978 tax: &TaxSnapshot,
5979 hr: &HrSnapshot,
5980 audit: &AuditSnapshot,
5981 treasury: &TreasurySnapshot,
5982 ) {
5983 use datasynth_config::schema::AccountingFrameworkConfig;
5984 use datasynth_core::models::StatementType;
5985 use datasynth_generators::period_close::notes_generator::{
5986 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5987 };
5988
5989 let seed = self.seed;
5990 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5991 {
5992 Ok(d) => d,
5993 Err(_) => return,
5994 };
5995
5996 let mut notes_gen = NotesGenerator::new(seed + 4235);
5997
5998 for company in &self.config.companies {
5999 let last_period_end = start_date
6000 + chrono::Months::new(self.config.global.period_months)
6001 - chrono::Days::new(1);
6002 let fiscal_year = last_period_end.year() as u16;
6003
6004 let entity_is = financial_reporting
6006 .standalone_statements
6007 .get(&company.code)
6008 .and_then(|stmts| {
6009 stmts.iter().find(|s| {
6010 s.fiscal_year == fiscal_year
6011 && s.statement_type == StatementType::IncomeStatement
6012 })
6013 });
6014 let entity_bs = financial_reporting
6015 .standalone_statements
6016 .get(&company.code)
6017 .and_then(|stmts| {
6018 stmts.iter().find(|s| {
6019 s.fiscal_year == fiscal_year
6020 && s.statement_type == StatementType::BalanceSheet
6021 })
6022 });
6023
6024 let revenue_amount = entity_is
6026 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6027 .map(|li| li.amount);
6028 let ppe_gross = entity_bs
6029 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6030 .map(|li| li.amount);
6031
6032 let framework = match self
6033 .config
6034 .accounting_standards
6035 .framework
6036 .unwrap_or_default()
6037 {
6038 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6039 "IFRS".to_string()
6040 }
6041 _ => "US GAAP".to_string(),
6042 };
6043
6044 let (entity_dta, entity_dtl) = {
6047 let mut dta = rust_decimal::Decimal::ZERO;
6048 let mut dtl = rust_decimal::Decimal::ZERO;
6049 for rf in &tax.deferred_tax.rollforwards {
6050 if rf.entity_code == company.code {
6051 dta += rf.closing_dta;
6052 dtl += rf.closing_dtl;
6053 }
6054 }
6055 (
6056 if dta > rust_decimal::Decimal::ZERO {
6057 Some(dta)
6058 } else {
6059 None
6060 },
6061 if dtl > rust_decimal::Decimal::ZERO {
6062 Some(dtl)
6063 } else {
6064 None
6065 },
6066 )
6067 };
6068
6069 let entity_provisions: Vec<_> = accounting_standards
6072 .provisions
6073 .iter()
6074 .filter(|p| p.entity_code == company.code)
6075 .collect();
6076 let provision_count = entity_provisions.len();
6077 let total_provisions = if provision_count > 0 {
6078 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6079 } else {
6080 None
6081 };
6082
6083 let entity_pension_plan_count = hr
6085 .pension_plans
6086 .iter()
6087 .filter(|p| p.entity_code == company.code)
6088 .count();
6089 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6090 let sum: rust_decimal::Decimal = hr
6091 .pension_disclosures
6092 .iter()
6093 .filter(|d| {
6094 hr.pension_plans
6095 .iter()
6096 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6097 })
6098 .map(|d| d.net_pension_liability)
6099 .sum();
6100 let plan_assets_sum: rust_decimal::Decimal = hr
6101 .pension_plan_assets
6102 .iter()
6103 .filter(|a| {
6104 hr.pension_plans
6105 .iter()
6106 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6107 })
6108 .map(|a| a.fair_value_closing)
6109 .sum();
6110 if entity_pension_plan_count > 0 {
6111 Some(sum + plan_assets_sum)
6112 } else {
6113 None
6114 }
6115 };
6116 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6117 let sum: rust_decimal::Decimal = hr
6118 .pension_plan_assets
6119 .iter()
6120 .filter(|a| {
6121 hr.pension_plans
6122 .iter()
6123 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6124 })
6125 .map(|a| a.fair_value_closing)
6126 .sum();
6127 if entity_pension_plan_count > 0 {
6128 Some(sum)
6129 } else {
6130 None
6131 }
6132 };
6133
6134 let rp_count = audit.related_party_transactions.len();
6137 let se_count = audit.subsequent_events.len();
6138 let adjusting_count = audit
6139 .subsequent_events
6140 .iter()
6141 .filter(|e| {
6142 matches!(
6143 e.classification,
6144 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6145 )
6146 })
6147 .count();
6148
6149 let ctx = NotesGeneratorContext {
6150 entity_code: company.code.clone(),
6151 framework,
6152 period: format!("FY{}", fiscal_year),
6153 period_end: last_period_end,
6154 currency: company.currency.clone(),
6155 revenue_amount,
6156 total_ppe_gross: ppe_gross,
6157 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6158 deferred_tax_asset: entity_dta,
6160 deferred_tax_liability: entity_dtl,
6161 provision_count,
6163 total_provisions,
6164 pension_plan_count: entity_pension_plan_count,
6166 total_dbo: entity_total_dbo,
6167 total_plan_assets: entity_total_plan_assets,
6168 related_party_transaction_count: rp_count,
6170 subsequent_event_count: se_count,
6171 adjusting_event_count: adjusting_count,
6172 ..NotesGeneratorContext::default()
6173 };
6174
6175 let entity_notes = notes_gen.generate(&ctx);
6176 let standard_note_count = entity_notes.len() as u32;
6177 info!(
6178 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6179 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6180 );
6181 financial_reporting
6182 .notes_to_financial_statements
6183 .extend(entity_notes);
6184
6185 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6187 .debt_instruments
6188 .iter()
6189 .filter(|d| d.entity_id == company.code)
6190 .map(|d| {
6191 (
6192 format!("{:?}", d.instrument_type),
6193 d.principal,
6194 d.maturity_date.to_string(),
6195 )
6196 })
6197 .collect();
6198
6199 let hedge_count = treasury.hedge_relationships.len();
6200 let effective_hedges = treasury
6201 .hedge_relationships
6202 .iter()
6203 .filter(|h| h.is_effective)
6204 .count();
6205 let total_notional: rust_decimal::Decimal = treasury
6206 .hedging_instruments
6207 .iter()
6208 .map(|h| h.notional_amount)
6209 .sum();
6210 let total_fair_value: rust_decimal::Decimal = treasury
6211 .hedging_instruments
6212 .iter()
6213 .map(|h| h.fair_value)
6214 .sum();
6215
6216 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6218 .provisions
6219 .iter()
6220 .filter(|p| p.entity_code == company.code)
6221 .map(|p| p.id.as_str())
6222 .collect();
6223 let provision_movements: Vec<(
6224 String,
6225 rust_decimal::Decimal,
6226 rust_decimal::Decimal,
6227 rust_decimal::Decimal,
6228 )> = accounting_standards
6229 .provision_movements
6230 .iter()
6231 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6232 .map(|m| {
6233 let prov_type = accounting_standards
6234 .provisions
6235 .iter()
6236 .find(|p| p.id == m.provision_id)
6237 .map(|p| format!("{:?}", p.provision_type))
6238 .unwrap_or_else(|| "Unknown".to_string());
6239 (prov_type, m.opening, m.additions, m.closing)
6240 })
6241 .collect();
6242
6243 let enhanced_ctx = EnhancedNotesContext {
6244 entity_code: company.code.clone(),
6245 period: format!("FY{}", fiscal_year),
6246 currency: company.currency.clone(),
6247 finished_goods_value: rust_decimal::Decimal::ZERO,
6249 wip_value: rust_decimal::Decimal::ZERO,
6250 raw_materials_value: rust_decimal::Decimal::ZERO,
6251 debt_instruments,
6252 hedge_count,
6253 effective_hedges,
6254 total_notional,
6255 total_fair_value,
6256 provision_movements,
6257 };
6258
6259 let enhanced_notes =
6260 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6261 if !enhanced_notes.is_empty() {
6262 info!(
6263 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6264 company.code,
6265 enhanced_notes.len(),
6266 enhanced_ctx.debt_instruments.len(),
6267 hedge_count,
6268 enhanced_ctx.provision_movements.len(),
6269 );
6270 financial_reporting
6271 .notes_to_financial_statements
6272 .extend(enhanced_notes);
6273 }
6274 }
6275 }
6276
6277 fn build_trial_balance_from_entries(
6283 journal_entries: &[JournalEntry],
6284 coa: &ChartOfAccounts,
6285 company_code: &str,
6286 fiscal_year: u16,
6287 fiscal_period: u8,
6288 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6289 use rust_decimal::Decimal;
6290
6291 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6293 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6294
6295 for je in journal_entries {
6296 if je.header.company_code != company_code
6298 || je.header.fiscal_year != fiscal_year
6299 || je.header.fiscal_period != fiscal_period
6300 {
6301 continue;
6302 }
6303
6304 for line in &je.lines {
6305 let acct = &line.gl_account;
6306 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6307 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6308 }
6309 }
6310
6311 let mut all_accounts: Vec<&String> = account_debits
6313 .keys()
6314 .chain(account_credits.keys())
6315 .collect::<std::collections::HashSet<_>>()
6316 .into_iter()
6317 .collect();
6318 all_accounts.sort();
6319
6320 let mut entries = Vec::new();
6321
6322 for acct_number in all_accounts {
6323 let debit = account_debits
6324 .get(acct_number)
6325 .copied()
6326 .unwrap_or(Decimal::ZERO);
6327 let credit = account_credits
6328 .get(acct_number)
6329 .copied()
6330 .unwrap_or(Decimal::ZERO);
6331
6332 if debit.is_zero() && credit.is_zero() {
6333 continue;
6334 }
6335
6336 let account_name = coa
6338 .get_account(acct_number)
6339 .map(|gl| gl.short_description.clone())
6340 .unwrap_or_else(|| format!("Account {acct_number}"));
6341
6342 let category = Self::category_from_account_code(acct_number);
6347
6348 entries.push(datasynth_generators::TrialBalanceEntry {
6349 account_code: acct_number.clone(),
6350 account_name,
6351 category,
6352 debit_balance: debit,
6353 credit_balance: credit,
6354 });
6355 }
6356
6357 entries
6358 }
6359
6360 fn build_cumulative_trial_balance(
6367 journal_entries: &[JournalEntry],
6368 coa: &ChartOfAccounts,
6369 company_code: &str,
6370 start_date: NaiveDate,
6371 period_end: NaiveDate,
6372 fiscal_year: u16,
6373 fiscal_period: u8,
6374 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6375 use rust_decimal::Decimal;
6376
6377 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6379 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6380
6381 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6383 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6384
6385 for je in journal_entries {
6386 if je.header.company_code != company_code {
6387 continue;
6388 }
6389
6390 for line in &je.lines {
6391 let acct = &line.gl_account;
6392 let category = Self::category_from_account_code(acct);
6393 let is_bs_account = matches!(
6394 category.as_str(),
6395 "Cash"
6396 | "Receivables"
6397 | "Inventory"
6398 | "FixedAssets"
6399 | "Payables"
6400 | "AccruedLiabilities"
6401 | "LongTermDebt"
6402 | "Equity"
6403 );
6404
6405 if is_bs_account {
6406 if je.header.document_date <= period_end
6408 && je.header.document_date >= start_date
6409 {
6410 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6411 line.debit_amount;
6412 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6413 line.credit_amount;
6414 }
6415 } else {
6416 if je.header.fiscal_year == fiscal_year
6418 && je.header.fiscal_period == fiscal_period
6419 {
6420 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6421 line.debit_amount;
6422 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6423 line.credit_amount;
6424 }
6425 }
6426 }
6427 }
6428
6429 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6431 all_accounts.extend(bs_debits.keys().cloned());
6432 all_accounts.extend(bs_credits.keys().cloned());
6433 all_accounts.extend(is_debits.keys().cloned());
6434 all_accounts.extend(is_credits.keys().cloned());
6435
6436 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6437 sorted_accounts.sort();
6438
6439 let mut entries = Vec::new();
6440
6441 for acct_number in &sorted_accounts {
6442 let category = Self::category_from_account_code(acct_number);
6443 let is_bs_account = matches!(
6444 category.as_str(),
6445 "Cash"
6446 | "Receivables"
6447 | "Inventory"
6448 | "FixedAssets"
6449 | "Payables"
6450 | "AccruedLiabilities"
6451 | "LongTermDebt"
6452 | "Equity"
6453 );
6454
6455 let (debit, credit) = if is_bs_account {
6456 (
6457 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6458 bs_credits
6459 .get(acct_number)
6460 .copied()
6461 .unwrap_or(Decimal::ZERO),
6462 )
6463 } else {
6464 (
6465 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6466 is_credits
6467 .get(acct_number)
6468 .copied()
6469 .unwrap_or(Decimal::ZERO),
6470 )
6471 };
6472
6473 if debit.is_zero() && credit.is_zero() {
6474 continue;
6475 }
6476
6477 let account_name = coa
6478 .get_account(acct_number)
6479 .map(|gl| gl.short_description.clone())
6480 .unwrap_or_else(|| format!("Account {acct_number}"));
6481
6482 entries.push(datasynth_generators::TrialBalanceEntry {
6483 account_code: acct_number.clone(),
6484 account_name,
6485 category,
6486 debit_balance: debit,
6487 credit_balance: credit,
6488 });
6489 }
6490
6491 entries
6492 }
6493
6494 fn build_cash_flow_from_trial_balances(
6499 current_tb: &[datasynth_generators::TrialBalanceEntry],
6500 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6501 net_income: rust_decimal::Decimal,
6502 ) -> Vec<CashFlowItem> {
6503 use rust_decimal::Decimal;
6504
6505 let aggregate =
6507 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6508 let mut map: HashMap<String, Decimal> = HashMap::new();
6509 for entry in tb {
6510 let net = entry.debit_balance - entry.credit_balance;
6511 *map.entry(entry.category.clone()).or_default() += net;
6512 }
6513 map
6514 };
6515
6516 let current = aggregate(current_tb);
6517 let prior = prior_tb.map(aggregate);
6518
6519 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6521 *map.get(key).unwrap_or(&Decimal::ZERO)
6522 };
6523
6524 let change = |key: &str| -> Decimal {
6526 let curr = get(¤t, key);
6527 match &prior {
6528 Some(p) => curr - get(p, key),
6529 None => curr,
6530 }
6531 };
6532
6533 let fixed_asset_change = change("FixedAssets");
6536 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6537 -fixed_asset_change
6538 } else {
6539 Decimal::ZERO
6540 };
6541
6542 let ar_change = change("Receivables");
6544 let inventory_change = change("Inventory");
6545 let ap_change = change("Payables");
6547 let accrued_change = change("AccruedLiabilities");
6548
6549 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6550 + (-ap_change)
6551 + (-accrued_change);
6552
6553 let capex = if fixed_asset_change > Decimal::ZERO {
6555 -fixed_asset_change
6556 } else {
6557 Decimal::ZERO
6558 };
6559 let investing_cf = capex;
6560
6561 let debt_change = -change("LongTermDebt");
6563 let equity_change = -change("Equity");
6564 let financing_cf = debt_change + equity_change;
6565
6566 let net_change = operating_cf + investing_cf + financing_cf;
6567
6568 vec![
6569 CashFlowItem {
6570 item_code: "CF-NI".to_string(),
6571 label: "Net Income".to_string(),
6572 category: CashFlowCategory::Operating,
6573 amount: net_income,
6574 amount_prior: None,
6575 sort_order: 1,
6576 is_total: false,
6577 },
6578 CashFlowItem {
6579 item_code: "CF-DEP".to_string(),
6580 label: "Depreciation & Amortization".to_string(),
6581 category: CashFlowCategory::Operating,
6582 amount: depreciation_addback,
6583 amount_prior: None,
6584 sort_order: 2,
6585 is_total: false,
6586 },
6587 CashFlowItem {
6588 item_code: "CF-AR".to_string(),
6589 label: "Change in Accounts Receivable".to_string(),
6590 category: CashFlowCategory::Operating,
6591 amount: -ar_change,
6592 amount_prior: None,
6593 sort_order: 3,
6594 is_total: false,
6595 },
6596 CashFlowItem {
6597 item_code: "CF-AP".to_string(),
6598 label: "Change in Accounts Payable".to_string(),
6599 category: CashFlowCategory::Operating,
6600 amount: -ap_change,
6601 amount_prior: None,
6602 sort_order: 4,
6603 is_total: false,
6604 },
6605 CashFlowItem {
6606 item_code: "CF-INV".to_string(),
6607 label: "Change in Inventory".to_string(),
6608 category: CashFlowCategory::Operating,
6609 amount: -inventory_change,
6610 amount_prior: None,
6611 sort_order: 5,
6612 is_total: false,
6613 },
6614 CashFlowItem {
6615 item_code: "CF-OP".to_string(),
6616 label: "Net Cash from Operating Activities".to_string(),
6617 category: CashFlowCategory::Operating,
6618 amount: operating_cf,
6619 amount_prior: None,
6620 sort_order: 6,
6621 is_total: true,
6622 },
6623 CashFlowItem {
6624 item_code: "CF-CAPEX".to_string(),
6625 label: "Capital Expenditures".to_string(),
6626 category: CashFlowCategory::Investing,
6627 amount: capex,
6628 amount_prior: None,
6629 sort_order: 7,
6630 is_total: false,
6631 },
6632 CashFlowItem {
6633 item_code: "CF-INV-T".to_string(),
6634 label: "Net Cash from Investing Activities".to_string(),
6635 category: CashFlowCategory::Investing,
6636 amount: investing_cf,
6637 amount_prior: None,
6638 sort_order: 8,
6639 is_total: true,
6640 },
6641 CashFlowItem {
6642 item_code: "CF-DEBT".to_string(),
6643 label: "Net Borrowings / (Repayments)".to_string(),
6644 category: CashFlowCategory::Financing,
6645 amount: debt_change,
6646 amount_prior: None,
6647 sort_order: 9,
6648 is_total: false,
6649 },
6650 CashFlowItem {
6651 item_code: "CF-EQ".to_string(),
6652 label: "Equity Changes".to_string(),
6653 category: CashFlowCategory::Financing,
6654 amount: equity_change,
6655 amount_prior: None,
6656 sort_order: 10,
6657 is_total: false,
6658 },
6659 CashFlowItem {
6660 item_code: "CF-FIN-T".to_string(),
6661 label: "Net Cash from Financing Activities".to_string(),
6662 category: CashFlowCategory::Financing,
6663 amount: financing_cf,
6664 amount_prior: None,
6665 sort_order: 11,
6666 is_total: true,
6667 },
6668 CashFlowItem {
6669 item_code: "CF-NET".to_string(),
6670 label: "Net Change in Cash".to_string(),
6671 category: CashFlowCategory::Operating,
6672 amount: net_change,
6673 amount_prior: None,
6674 sort_order: 12,
6675 is_total: true,
6676 },
6677 ]
6678 }
6679
6680 fn calculate_net_income_from_tb(
6684 tb: &[datasynth_generators::TrialBalanceEntry],
6685 ) -> rust_decimal::Decimal {
6686 use rust_decimal::Decimal;
6687
6688 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6689 for entry in tb {
6690 let net = entry.debit_balance - entry.credit_balance;
6691 *aggregated.entry(entry.category.clone()).or_default() += net;
6692 }
6693
6694 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6695 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6696 let opex = *aggregated
6697 .get("OperatingExpenses")
6698 .unwrap_or(&Decimal::ZERO);
6699 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6700 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6701
6702 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6705 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6707 operating_income - tax
6708 }
6709
6710 fn category_from_account_code(code: &str) -> String {
6717 let prefix: String = code.chars().take(2).collect();
6718 match prefix.as_str() {
6719 "10" => "Cash",
6720 "11" => "Receivables",
6721 "12" | "13" | "14" => "Inventory",
6722 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6723 "20" => "Payables",
6724 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6725 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6726 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6727 "40" | "41" | "42" | "43" | "44" => "Revenue",
6728 "50" | "51" | "52" => "CostOfSales",
6729 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6730 "OperatingExpenses"
6731 }
6732 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6733 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6734 _ => "OperatingExpenses",
6735 }
6736 .to_string()
6737 }
6738
6739 fn phase_hr_data(
6741 &mut self,
6742 stats: &mut EnhancedGenerationStatistics,
6743 ) -> SynthResult<HrSnapshot> {
6744 if !self.phase_config.generate_hr {
6745 debug!("Phase 16: Skipped (HR generation disabled)");
6746 return Ok(HrSnapshot::default());
6747 }
6748
6749 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6750
6751 let seed = self.seed;
6752 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6753 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6754 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6755 let company_code = self
6756 .config
6757 .companies
6758 .first()
6759 .map(|c| c.code.as_str())
6760 .unwrap_or("1000");
6761 let currency = self
6762 .config
6763 .companies
6764 .first()
6765 .map(|c| c.currency.as_str())
6766 .unwrap_or("USD");
6767
6768 let employee_ids: Vec<String> = self
6769 .master_data
6770 .employees
6771 .iter()
6772 .map(|e| e.employee_id.clone())
6773 .collect();
6774
6775 if employee_ids.is_empty() {
6776 debug!("Phase 16: Skipped (no employees available)");
6777 return Ok(HrSnapshot::default());
6778 }
6779
6780 let cost_center_ids: Vec<String> = self
6783 .master_data
6784 .employees
6785 .iter()
6786 .filter_map(|e| e.cost_center.clone())
6787 .collect::<std::collections::HashSet<_>>()
6788 .into_iter()
6789 .collect();
6790
6791 let mut snapshot = HrSnapshot::default();
6792
6793 if self.config.hr.payroll.enabled {
6795 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6796 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6797
6798 let payroll_pack = self.primary_pack();
6800
6801 payroll_gen.set_country_pack(payroll_pack.clone());
6804
6805 let employees_with_salary: Vec<(
6806 String,
6807 rust_decimal::Decimal,
6808 Option<String>,
6809 Option<String>,
6810 )> = self
6811 .master_data
6812 .employees
6813 .iter()
6814 .map(|e| {
6815 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6818 e.base_salary
6819 } else {
6820 rust_decimal::Decimal::from(60_000)
6821 };
6822 (
6823 e.employee_id.clone(),
6824 annual, e.cost_center.clone(),
6826 e.department_id.clone(),
6827 )
6828 })
6829 .collect();
6830
6831 let change_history = &self.master_data.employee_change_history;
6834 let has_changes = !change_history.is_empty();
6835 if has_changes {
6836 debug!(
6837 "Payroll will incorporate {} employee change events",
6838 change_history.len()
6839 );
6840 }
6841
6842 for month in 0..self.config.global.period_months {
6843 let period_start = start_date + chrono::Months::new(month);
6844 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6845 let (run, items) = if has_changes {
6846 payroll_gen.generate_with_changes(
6847 company_code,
6848 &employees_with_salary,
6849 period_start,
6850 period_end,
6851 currency,
6852 change_history,
6853 )
6854 } else {
6855 payroll_gen.generate(
6856 company_code,
6857 &employees_with_salary,
6858 period_start,
6859 period_end,
6860 currency,
6861 )
6862 };
6863 snapshot.payroll_runs.push(run);
6864 snapshot.payroll_run_count += 1;
6865 snapshot.payroll_line_item_count += items.len();
6866 snapshot.payroll_line_items.extend(items);
6867 }
6868 }
6869
6870 if self.config.hr.time_attendance.enabled {
6872 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6873 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6874 if let Some(ctx) = &self.temporal_context {
6878 time_gen.set_temporal_context(Arc::clone(ctx));
6879 }
6880 let entries = time_gen.generate(
6881 &employee_ids,
6882 start_date,
6883 end_date,
6884 &self.config.hr.time_attendance,
6885 );
6886 snapshot.time_entry_count = entries.len();
6887 snapshot.time_entries = entries;
6888 }
6889
6890 if self.config.hr.expenses.enabled {
6892 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6893 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6894 expense_gen.set_country_pack(self.primary_pack().clone());
6895 if let Some(ctx) = &self.temporal_context {
6898 expense_gen.set_temporal_context(Arc::clone(ctx));
6899 }
6900 let company_currency = self
6901 .config
6902 .companies
6903 .first()
6904 .map(|c| c.currency.as_str())
6905 .unwrap_or("USD");
6906 let reports = expense_gen.generate_with_currency(
6907 &employee_ids,
6908 start_date,
6909 end_date,
6910 &self.config.hr.expenses,
6911 company_currency,
6912 );
6913 snapshot.expense_report_count = reports.len();
6914 snapshot.expense_reports = reports;
6915 }
6916
6917 if self.config.hr.payroll.enabled {
6919 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6920 let employee_pairs: Vec<(String, String)> = self
6921 .master_data
6922 .employees
6923 .iter()
6924 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6925 .collect();
6926 let enrollments =
6927 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6928 snapshot.benefit_enrollment_count = enrollments.len();
6929 snapshot.benefit_enrollments = enrollments;
6930 }
6931
6932 if self.phase_config.generate_hr {
6934 let entity_name = self
6935 .config
6936 .companies
6937 .first()
6938 .map(|c| c.name.as_str())
6939 .unwrap_or("Entity");
6940 let period_months = self.config.global.period_months;
6941 let period_label = {
6942 let y = start_date.year();
6943 let m = start_date.month();
6944 if period_months >= 12 {
6945 format!("FY{y}")
6946 } else {
6947 format!("{y}-{m:02}")
6948 }
6949 };
6950 let reporting_date =
6951 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6952
6953 let avg_salary: Option<rust_decimal::Decimal> = {
6958 let employee_count = employee_ids.len();
6959 if self.config.hr.payroll.enabled
6960 && employee_count > 0
6961 && !snapshot.payroll_runs.is_empty()
6962 {
6963 let total_gross: rust_decimal::Decimal = snapshot
6965 .payroll_runs
6966 .iter()
6967 .filter(|r| r.company_code == company_code)
6968 .map(|r| r.total_gross)
6969 .sum();
6970 if total_gross > rust_decimal::Decimal::ZERO {
6971 let annual_total = if period_months > 0 && period_months < 12 {
6973 total_gross * rust_decimal::Decimal::from(12u32)
6974 / rust_decimal::Decimal::from(period_months)
6975 } else {
6976 total_gross
6977 };
6978 Some(
6979 (annual_total / rust_decimal::Decimal::from(employee_count))
6980 .round_dp(2),
6981 )
6982 } else {
6983 None
6984 }
6985 } else {
6986 None
6987 }
6988 };
6989
6990 let mut pension_gen =
6991 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6992 let pension_snap = pension_gen.generate(
6993 company_code,
6994 entity_name,
6995 &period_label,
6996 reporting_date,
6997 employee_ids.len(),
6998 currency,
6999 avg_salary,
7000 period_months,
7001 );
7002 snapshot.pension_plan_count = pension_snap.plans.len();
7003 snapshot.pension_plans = pension_snap.plans;
7004 snapshot.pension_obligations = pension_snap.obligations;
7005 snapshot.pension_plan_assets = pension_snap.plan_assets;
7006 snapshot.pension_disclosures = pension_snap.disclosures;
7007 snapshot.pension_journal_entries = pension_snap.journal_entries;
7012 }
7013
7014 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7016 let period_months = self.config.global.period_months;
7017 let period_label = {
7018 let y = start_date.year();
7019 let m = start_date.month();
7020 if period_months >= 12 {
7021 format!("FY{y}")
7022 } else {
7023 format!("{y}-{m:02}")
7024 }
7025 };
7026 let reporting_date =
7027 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7028
7029 let mut stock_comp_gen =
7030 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7031 let stock_snap = stock_comp_gen.generate(
7032 company_code,
7033 &employee_ids,
7034 start_date,
7035 &period_label,
7036 reporting_date,
7037 currency,
7038 );
7039 snapshot.stock_grant_count = stock_snap.grants.len();
7040 snapshot.stock_grants = stock_snap.grants;
7041 snapshot.stock_comp_expenses = stock_snap.expenses;
7042 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7043 }
7044
7045 stats.payroll_run_count = snapshot.payroll_run_count;
7046 stats.time_entry_count = snapshot.time_entry_count;
7047 stats.expense_report_count = snapshot.expense_report_count;
7048 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7049 stats.pension_plan_count = snapshot.pension_plan_count;
7050 stats.stock_grant_count = snapshot.stock_grant_count;
7051
7052 info!(
7053 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7054 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7055 snapshot.time_entry_count, snapshot.expense_report_count,
7056 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7057 snapshot.stock_grant_count
7058 );
7059 self.check_resources_with_log("post-hr")?;
7060
7061 Ok(snapshot)
7062 }
7063
7064 fn phase_accounting_standards(
7066 &mut self,
7067 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7068 journal_entries: &[JournalEntry],
7069 stats: &mut EnhancedGenerationStatistics,
7070 ) -> SynthResult<AccountingStandardsSnapshot> {
7071 if !self.phase_config.generate_accounting_standards {
7072 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7073 return Ok(AccountingStandardsSnapshot::default());
7074 }
7075 info!("Phase 17: Generating Accounting Standards Data");
7076
7077 let seed = self.seed;
7078 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7079 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7080 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7081 let company_code = self
7082 .config
7083 .companies
7084 .first()
7085 .map(|c| c.code.as_str())
7086 .unwrap_or("1000");
7087 let currency = self
7088 .config
7089 .companies
7090 .first()
7091 .map(|c| c.currency.as_str())
7092 .unwrap_or("USD");
7093
7094 let framework = match self.config.accounting_standards.framework {
7099 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7100 datasynth_standards::framework::AccountingFramework::UsGaap
7101 }
7102 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7103 datasynth_standards::framework::AccountingFramework::Ifrs
7104 }
7105 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7106 datasynth_standards::framework::AccountingFramework::DualReporting
7107 }
7108 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7109 datasynth_standards::framework::AccountingFramework::FrenchGaap
7110 }
7111 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7112 datasynth_standards::framework::AccountingFramework::GermanGaap
7113 }
7114 None => {
7115 let pack = self.primary_pack();
7117 let pack_fw = pack.accounting.framework.as_str();
7118 match pack_fw {
7119 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7120 "dual_reporting" => {
7121 datasynth_standards::framework::AccountingFramework::DualReporting
7122 }
7123 "french_gaap" => {
7124 datasynth_standards::framework::AccountingFramework::FrenchGaap
7125 }
7126 "german_gaap" | "hgb" => {
7127 datasynth_standards::framework::AccountingFramework::GermanGaap
7128 }
7129 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7131 }
7132 }
7133 };
7134
7135 let mut snapshot = AccountingStandardsSnapshot::default();
7136
7137 if self.config.accounting_standards.revenue_recognition.enabled {
7139 let customer_ids: Vec<String> = self
7140 .master_data
7141 .customers
7142 .iter()
7143 .map(|c| c.customer_id.clone())
7144 .collect();
7145
7146 if !customer_ids.is_empty() {
7147 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7148 let contracts = rev_gen.generate(
7149 company_code,
7150 &customer_ids,
7151 start_date,
7152 end_date,
7153 currency,
7154 &self.config.accounting_standards.revenue_recognition,
7155 framework,
7156 );
7157 snapshot.revenue_contract_count = contracts.len();
7158 snapshot.contracts = contracts;
7159 }
7160 }
7161
7162 if self.config.accounting_standards.impairment.enabled {
7164 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7165 .master_data
7166 .assets
7167 .iter()
7168 .map(|a| {
7169 (
7170 a.asset_id.clone(),
7171 a.description.clone(),
7172 a.acquisition_cost,
7173 )
7174 })
7175 .collect();
7176
7177 if !asset_data.is_empty() {
7178 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7179 let tests = imp_gen.generate(
7180 company_code,
7181 &asset_data,
7182 end_date,
7183 &self.config.accounting_standards.impairment,
7184 framework,
7185 );
7186 snapshot.impairment_test_count = tests.len();
7187 snapshot.impairment_tests = tests;
7188 }
7189 }
7190
7191 if self
7193 .config
7194 .accounting_standards
7195 .business_combinations
7196 .enabled
7197 {
7198 let bc_config = &self.config.accounting_standards.business_combinations;
7199 let framework_str = match framework {
7200 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7201 _ => "US_GAAP",
7202 };
7203 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7204 let bc_snap = bc_gen.generate(
7205 company_code,
7206 currency,
7207 start_date,
7208 end_date,
7209 bc_config.acquisition_count,
7210 framework_str,
7211 );
7212 snapshot.business_combination_count = bc_snap.combinations.len();
7213 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7214 snapshot.business_combinations = bc_snap.combinations;
7215 }
7216
7217 if self
7219 .config
7220 .accounting_standards
7221 .expected_credit_loss
7222 .enabled
7223 {
7224 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7225 let framework_str = match framework {
7226 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7227 _ => "ASC_326",
7228 };
7229
7230 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7233
7234 let mut ecl_gen = EclGenerator::new(seed + 43);
7235
7236 let bucket_exposures: Vec<(
7238 datasynth_core::models::subledger::ar::AgingBucket,
7239 rust_decimal::Decimal,
7240 )> = if ar_aging_reports.is_empty() {
7241 use datasynth_core::models::subledger::ar::AgingBucket;
7243 vec![
7244 (
7245 AgingBucket::Current,
7246 rust_decimal::Decimal::from(500_000_u32),
7247 ),
7248 (
7249 AgingBucket::Days1To30,
7250 rust_decimal::Decimal::from(120_000_u32),
7251 ),
7252 (
7253 AgingBucket::Days31To60,
7254 rust_decimal::Decimal::from(45_000_u32),
7255 ),
7256 (
7257 AgingBucket::Days61To90,
7258 rust_decimal::Decimal::from(15_000_u32),
7259 ),
7260 (
7261 AgingBucket::Over90Days,
7262 rust_decimal::Decimal::from(8_000_u32),
7263 ),
7264 ]
7265 } else {
7266 use datasynth_core::models::subledger::ar::AgingBucket;
7267 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7269 std::collections::HashMap::new();
7270 for report in ar_aging_reports {
7271 for (bucket, amount) in &report.bucket_totals {
7272 *totals.entry(*bucket).or_default() += amount;
7273 }
7274 }
7275 AgingBucket::all()
7276 .into_iter()
7277 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7278 .collect()
7279 };
7280
7281 let ecl_snap = ecl_gen.generate(
7282 company_code,
7283 end_date,
7284 &bucket_exposures,
7285 ecl_config,
7286 &period_label,
7287 framework_str,
7288 );
7289
7290 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7291 snapshot.ecl_models = ecl_snap.ecl_models;
7292 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7293 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7294 }
7295
7296 {
7298 let framework_str = match framework {
7299 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7300 _ => "US_GAAP",
7301 };
7302
7303 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7308 .max(rust_decimal::Decimal::from(100_000_u32));
7309
7310 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7311
7312 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7313 let prov_snap = prov_gen.generate(
7314 company_code,
7315 currency,
7316 revenue_proxy,
7317 end_date,
7318 &period_label,
7319 framework_str,
7320 None, );
7322
7323 snapshot.provision_count = prov_snap.provisions.len();
7324 snapshot.provisions = prov_snap.provisions;
7325 snapshot.provision_movements = prov_snap.movements;
7326 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7327 snapshot.provision_journal_entries = prov_snap.journal_entries;
7328 }
7329
7330 {
7334 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7335
7336 let presentation_currency = self
7337 .config
7338 .global
7339 .presentation_currency
7340 .clone()
7341 .unwrap_or_else(|| self.config.global.group_currency.clone());
7342
7343 let mut rate_table = FxRateTable::new(&presentation_currency);
7346
7347 let base_rates = base_rates_usd();
7351 for (ccy, rate) in &base_rates {
7352 rate_table.add_rate(FxRate::new(
7353 ccy,
7354 "USD",
7355 RateType::Closing,
7356 end_date,
7357 *rate,
7358 "SYNTHETIC",
7359 ));
7360 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7363 rate_table.add_rate(FxRate::new(
7364 ccy,
7365 "USD",
7366 RateType::Average,
7367 end_date,
7368 avg,
7369 "SYNTHETIC",
7370 ));
7371 }
7372
7373 let mut translation_results = Vec::new();
7374 for company in &self.config.companies {
7375 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7378 .max(rust_decimal::Decimal::from(100_000_u32));
7379
7380 let func_ccy = company
7381 .functional_currency
7382 .clone()
7383 .unwrap_or_else(|| company.currency.clone());
7384
7385 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7386 &company.code,
7387 &func_ccy,
7388 &presentation_currency,
7389 &ias21_period_label,
7390 end_date,
7391 company_revenue,
7392 &rate_table,
7393 );
7394 translation_results.push(result);
7395 }
7396
7397 snapshot.currency_translation_count = translation_results.len();
7398 snapshot.currency_translation_results = translation_results;
7399 }
7400
7401 stats.revenue_contract_count = snapshot.revenue_contract_count;
7402 stats.impairment_test_count = snapshot.impairment_test_count;
7403 stats.business_combination_count = snapshot.business_combination_count;
7404 stats.ecl_model_count = snapshot.ecl_model_count;
7405 stats.provision_count = snapshot.provision_count;
7406
7407 if self.config.accounting_standards.leases.enabled {
7411 use datasynth_generators::standards::LeaseGenerator;
7412 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7413 .unwrap_or_else(|_| {
7414 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7415 });
7416 let framework =
7417 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7418 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7419 for company in &self.config.companies {
7420 let leases = lease_gen.generate(
7421 &company.code,
7422 start_date,
7423 &self.config.accounting_standards.leases,
7424 framework,
7425 );
7426 snapshot.lease_count += leases.len();
7427 snapshot.leases.extend(leases);
7428 }
7429 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7430 }
7431
7432 if self.config.accounting_standards.fair_value.enabled {
7436 use datasynth_generators::standards::FairValueGenerator;
7437 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7438 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7439 + chrono::Months::new(self.config.global.period_months);
7440 let framework =
7441 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7442 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
7443 for company in &self.config.companies {
7444 let measurements = fv_gen.generate(
7445 &company.code,
7446 end_date,
7447 &company.currency,
7448 &self.config.accounting_standards.fair_value,
7449 framework,
7450 );
7451 snapshot.fair_value_measurement_count += measurements.len();
7452 snapshot.fair_value_measurements.extend(measurements);
7453 }
7454 info!(
7455 "v3.3.1 fair value measurements: {}",
7456 snapshot.fair_value_measurement_count
7457 );
7458 }
7459
7460 if self.config.accounting_standards.generate_differences
7464 && matches!(
7465 self.config.accounting_standards.framework,
7466 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
7467 )
7468 {
7469 use datasynth_generators::standards::FrameworkReconciliationGenerator;
7470 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7471 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7472 + chrono::Months::new(self.config.global.period_months);
7473 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
7474 for company in &self.config.companies {
7475 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
7476 snapshot.framework_difference_count += records.len();
7477 snapshot.framework_differences.extend(records);
7478 snapshot.framework_reconciliations.push(reconciliation);
7479 }
7480 info!(
7481 "v3.3.1 framework reconciliation: {} differences across {} entities",
7482 snapshot.framework_difference_count,
7483 snapshot.framework_reconciliations.len()
7484 );
7485 }
7486
7487 info!(
7488 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
7489 snapshot.revenue_contract_count,
7490 snapshot.impairment_test_count,
7491 snapshot.business_combination_count,
7492 snapshot.ecl_model_count,
7493 snapshot.provision_count,
7494 snapshot.currency_translation_count,
7495 snapshot.lease_count,
7496 snapshot.fair_value_measurement_count,
7497 snapshot.framework_difference_count,
7498 );
7499 self.check_resources_with_log("post-accounting-standards")?;
7500
7501 Ok(snapshot)
7502 }
7503
7504 fn resolve_accounting_framework(
7508 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
7509 ) -> datasynth_standards::framework::AccountingFramework {
7510 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
7511 use datasynth_standards::framework::AccountingFramework as Fw;
7512 match cfg {
7513 Some(Cfg::Ifrs) => Fw::Ifrs,
7514 Some(Cfg::DualReporting) => Fw::DualReporting,
7515 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
7516 Some(Cfg::GermanGaap) => Fw::GermanGaap,
7517 _ => Fw::UsGaap,
7518 }
7519 }
7520
7521 fn phase_manufacturing(
7523 &mut self,
7524 stats: &mut EnhancedGenerationStatistics,
7525 ) -> SynthResult<ManufacturingSnapshot> {
7526 if !self.phase_config.generate_manufacturing {
7527 debug!("Phase 18: Skipped (manufacturing generation disabled)");
7528 return Ok(ManufacturingSnapshot::default());
7529 }
7530 info!("Phase 18: Generating Manufacturing Data");
7531
7532 let seed = self.seed;
7533 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7534 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7535 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7536 let company_code = self
7537 .config
7538 .companies
7539 .first()
7540 .map(|c| c.code.as_str())
7541 .unwrap_or("1000");
7542
7543 let material_data: Vec<(String, String)> = self
7544 .master_data
7545 .materials
7546 .iter()
7547 .map(|m| (m.material_id.clone(), m.description.clone()))
7548 .collect();
7549
7550 if material_data.is_empty() {
7551 debug!("Phase 18: Skipped (no materials available)");
7552 return Ok(ManufacturingSnapshot::default());
7553 }
7554
7555 let mut snapshot = ManufacturingSnapshot::default();
7556
7557 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7559 if let Some(ctx) = &self.temporal_context {
7561 prod_gen.set_temporal_context(Arc::clone(ctx));
7562 }
7563 let production_orders = prod_gen.generate(
7564 company_code,
7565 &material_data,
7566 start_date,
7567 end_date,
7568 &self.config.manufacturing.production_orders,
7569 &self.config.manufacturing.costing,
7570 &self.config.manufacturing.routing,
7571 );
7572 snapshot.production_order_count = production_orders.len();
7573
7574 let inspection_data: Vec<(String, String, String)> = production_orders
7576 .iter()
7577 .map(|po| {
7578 (
7579 po.order_id.clone(),
7580 po.material_id.clone(),
7581 po.material_description.clone(),
7582 )
7583 })
7584 .collect();
7585
7586 snapshot.production_orders = production_orders;
7587
7588 if !inspection_data.is_empty() {
7589 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7590 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7591 snapshot.quality_inspection_count = inspections.len();
7592 snapshot.quality_inspections = inspections;
7593 }
7594
7595 let storage_locations: Vec<(String, String)> = material_data
7597 .iter()
7598 .enumerate()
7599 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7600 .collect();
7601
7602 let employee_ids: Vec<String> = self
7603 .master_data
7604 .employees
7605 .iter()
7606 .map(|e| e.employee_id.clone())
7607 .collect();
7608 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7609 .with_employee_pool(employee_ids);
7610 let mut cycle_count_total = 0usize;
7611 for month in 0..self.config.global.period_months {
7612 let count_date = start_date + chrono::Months::new(month);
7613 let items_per_count = storage_locations.len().clamp(10, 50);
7614 let cc = cc_gen.generate(
7615 company_code,
7616 &storage_locations,
7617 count_date,
7618 items_per_count,
7619 );
7620 snapshot.cycle_counts.push(cc);
7621 cycle_count_total += 1;
7622 }
7623 snapshot.cycle_count_count = cycle_count_total;
7624
7625 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7627 let bom_components = bom_gen.generate(company_code, &material_data);
7628 snapshot.bom_component_count = bom_components.len();
7629 snapshot.bom_components = bom_components;
7630
7631 let currency = self
7633 .config
7634 .companies
7635 .first()
7636 .map(|c| c.currency.as_str())
7637 .unwrap_or("USD");
7638 let production_order_ids: Vec<String> = snapshot
7639 .production_orders
7640 .iter()
7641 .map(|po| po.order_id.clone())
7642 .collect();
7643 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7644 let inventory_movements = inv_mov_gen.generate_with_production_orders(
7645 company_code,
7646 &material_data,
7647 start_date,
7648 end_date,
7649 2,
7650 currency,
7651 &production_order_ids,
7652 );
7653 snapshot.inventory_movement_count = inventory_movements.len();
7654 snapshot.inventory_movements = inventory_movements;
7655
7656 stats.production_order_count = snapshot.production_order_count;
7657 stats.quality_inspection_count = snapshot.quality_inspection_count;
7658 stats.cycle_count_count = snapshot.cycle_count_count;
7659 stats.bom_component_count = snapshot.bom_component_count;
7660 stats.inventory_movement_count = snapshot.inventory_movement_count;
7661
7662 info!(
7663 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7664 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7665 snapshot.bom_component_count, snapshot.inventory_movement_count
7666 );
7667 self.check_resources_with_log("post-manufacturing")?;
7668
7669 Ok(snapshot)
7670 }
7671
7672 fn phase_sales_kpi_budgets(
7674 &mut self,
7675 coa: &Arc<ChartOfAccounts>,
7676 financial_reporting: &FinancialReportingSnapshot,
7677 stats: &mut EnhancedGenerationStatistics,
7678 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7679 if !self.phase_config.generate_sales_kpi_budgets {
7680 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7681 return Ok(SalesKpiBudgetsSnapshot::default());
7682 }
7683 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7684
7685 let seed = self.seed;
7686 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7687 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7688 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7689 let company_code = self
7690 .config
7691 .companies
7692 .first()
7693 .map(|c| c.code.as_str())
7694 .unwrap_or("1000");
7695
7696 let mut snapshot = SalesKpiBudgetsSnapshot::default();
7697
7698 if self.config.sales_quotes.enabled {
7700 let customer_data: Vec<(String, String)> = self
7701 .master_data
7702 .customers
7703 .iter()
7704 .map(|c| (c.customer_id.clone(), c.name.clone()))
7705 .collect();
7706 let material_data: Vec<(String, String)> = self
7707 .master_data
7708 .materials
7709 .iter()
7710 .map(|m| (m.material_id.clone(), m.description.clone()))
7711 .collect();
7712
7713 if !customer_data.is_empty() && !material_data.is_empty() {
7714 let employee_ids: Vec<String> = self
7715 .master_data
7716 .employees
7717 .iter()
7718 .map(|e| e.employee_id.clone())
7719 .collect();
7720 let customer_ids: Vec<String> = self
7721 .master_data
7722 .customers
7723 .iter()
7724 .map(|c| c.customer_id.clone())
7725 .collect();
7726 let company_currency = self
7727 .config
7728 .companies
7729 .first()
7730 .map(|c| c.currency.as_str())
7731 .unwrap_or("USD");
7732
7733 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7734 .with_pools(employee_ids, customer_ids);
7735 let quotes = quote_gen.generate_with_currency(
7736 company_code,
7737 &customer_data,
7738 &material_data,
7739 start_date,
7740 end_date,
7741 &self.config.sales_quotes,
7742 company_currency,
7743 );
7744 snapshot.sales_quote_count = quotes.len();
7745 snapshot.sales_quotes = quotes;
7746 }
7747 }
7748
7749 if self.config.financial_reporting.management_kpis.enabled {
7751 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7752 let mut kpis = kpi_gen.generate(
7753 company_code,
7754 start_date,
7755 end_date,
7756 &self.config.financial_reporting.management_kpis,
7757 );
7758
7759 {
7761 use rust_decimal::Decimal;
7762
7763 if let Some(income_stmt) =
7764 financial_reporting.financial_statements.iter().find(|fs| {
7765 fs.statement_type == StatementType::IncomeStatement
7766 && fs.company_code == company_code
7767 })
7768 {
7769 let total_revenue: Decimal = income_stmt
7771 .line_items
7772 .iter()
7773 .filter(|li| li.section.contains("Revenue") && !li.is_total)
7774 .map(|li| li.amount)
7775 .sum();
7776 let total_cogs: Decimal = income_stmt
7777 .line_items
7778 .iter()
7779 .filter(|li| {
7780 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7781 && !li.is_total
7782 })
7783 .map(|li| li.amount.abs())
7784 .sum();
7785 let total_opex: Decimal = income_stmt
7786 .line_items
7787 .iter()
7788 .filter(|li| {
7789 li.section.contains("Expense")
7790 && !li.is_total
7791 && !li.section.contains("Cost")
7792 })
7793 .map(|li| li.amount.abs())
7794 .sum();
7795
7796 if total_revenue > Decimal::ZERO {
7797 let hundred = Decimal::from(100);
7798 let gross_margin_pct =
7799 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7800 let operating_income = total_revenue - total_cogs - total_opex;
7801 let op_margin_pct =
7802 (operating_income * hundred / total_revenue).round_dp(2);
7803
7804 for kpi in &mut kpis {
7806 if kpi.name == "Gross Margin" {
7807 kpi.value = gross_margin_pct;
7808 } else if kpi.name == "Operating Margin" {
7809 kpi.value = op_margin_pct;
7810 }
7811 }
7812 }
7813 }
7814
7815 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7817 fs.statement_type == StatementType::BalanceSheet
7818 && fs.company_code == company_code
7819 }) {
7820 let current_assets: Decimal = bs
7821 .line_items
7822 .iter()
7823 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7824 .map(|li| li.amount)
7825 .sum();
7826 let current_liabilities: Decimal = bs
7827 .line_items
7828 .iter()
7829 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7830 .map(|li| li.amount.abs())
7831 .sum();
7832
7833 if current_liabilities > Decimal::ZERO {
7834 let current_ratio = (current_assets / current_liabilities).round_dp(2);
7835 for kpi in &mut kpis {
7836 if kpi.name == "Current Ratio" {
7837 kpi.value = current_ratio;
7838 }
7839 }
7840 }
7841 }
7842 }
7843
7844 snapshot.kpi_count = kpis.len();
7845 snapshot.kpis = kpis;
7846 }
7847
7848 if self.config.financial_reporting.budgets.enabled {
7850 let account_data: Vec<(String, String)> = coa
7851 .accounts
7852 .iter()
7853 .map(|a| (a.account_number.clone(), a.short_description.clone()))
7854 .collect();
7855
7856 if !account_data.is_empty() {
7857 let fiscal_year = start_date.year() as u32;
7858 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7859 let budget = budget_gen.generate(
7860 company_code,
7861 fiscal_year,
7862 &account_data,
7863 &self.config.financial_reporting.budgets,
7864 );
7865 snapshot.budget_line_count = budget.line_items.len();
7866 snapshot.budgets.push(budget);
7867 }
7868 }
7869
7870 stats.sales_quote_count = snapshot.sales_quote_count;
7871 stats.kpi_count = snapshot.kpi_count;
7872 stats.budget_line_count = snapshot.budget_line_count;
7873
7874 info!(
7875 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7876 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7877 );
7878 self.check_resources_with_log("post-sales-kpi-budgets")?;
7879
7880 Ok(snapshot)
7881 }
7882
7883 fn compute_pre_tax_income(
7890 company_code: &str,
7891 journal_entries: &[JournalEntry],
7892 ) -> rust_decimal::Decimal {
7893 use datasynth_core::accounts::AccountCategory;
7894 use rust_decimal::Decimal;
7895
7896 let mut total_revenue = Decimal::ZERO;
7897 let mut total_expenses = Decimal::ZERO;
7898
7899 for je in journal_entries {
7900 if je.header.company_code != company_code {
7901 continue;
7902 }
7903 for line in &je.lines {
7904 let cat = AccountCategory::from_account(&line.gl_account);
7905 match cat {
7906 AccountCategory::Revenue => {
7907 total_revenue += line.credit_amount - line.debit_amount;
7908 }
7909 AccountCategory::Cogs
7910 | AccountCategory::OperatingExpense
7911 | AccountCategory::OtherIncomeExpense => {
7912 total_expenses += line.debit_amount - line.credit_amount;
7913 }
7914 _ => {}
7915 }
7916 }
7917 }
7918
7919 let pti = (total_revenue - total_expenses).round_dp(2);
7920 if pti == rust_decimal::Decimal::ZERO {
7921 rust_decimal::Decimal::from(1_000_000u32)
7924 } else {
7925 pti
7926 }
7927 }
7928
7929 fn phase_tax_generation(
7931 &mut self,
7932 document_flows: &DocumentFlowSnapshot,
7933 journal_entries: &[JournalEntry],
7934 stats: &mut EnhancedGenerationStatistics,
7935 ) -> SynthResult<TaxSnapshot> {
7936 if !self.phase_config.generate_tax {
7937 debug!("Phase 20: Skipped (tax generation disabled)");
7938 return Ok(TaxSnapshot::default());
7939 }
7940 info!("Phase 20: Generating Tax Data");
7941
7942 let seed = self.seed;
7943 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7944 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7945 let fiscal_year = start_date.year();
7946 let company_code = self
7947 .config
7948 .companies
7949 .first()
7950 .map(|c| c.code.as_str())
7951 .unwrap_or("1000");
7952
7953 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7954 seed + 370,
7955 self.config.tax.clone(),
7956 );
7957
7958 let pack = self.primary_pack().clone();
7959 let (jurisdictions, codes) =
7960 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7961
7962 let mut provisions = Vec::new();
7964 if self.config.tax.provisions.enabled {
7965 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7966 for company in &self.config.companies {
7967 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7968 let statutory_rate = rust_decimal::Decimal::new(
7969 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7970 2,
7971 );
7972 let provision = provision_gen.generate(
7973 &company.code,
7974 start_date,
7975 pre_tax_income,
7976 statutory_rate,
7977 );
7978 provisions.push(provision);
7979 }
7980 }
7981
7982 let mut tax_lines = Vec::new();
7984 if !codes.is_empty() {
7985 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7986 datasynth_generators::TaxLineGeneratorConfig::default(),
7987 codes.clone(),
7988 seed + 372,
7989 );
7990
7991 let buyer_country = self
7994 .config
7995 .companies
7996 .first()
7997 .map(|c| c.country.as_str())
7998 .unwrap_or("US");
7999 for vi in &document_flows.vendor_invoices {
8000 let lines = tax_line_gen.generate_for_document(
8001 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8002 &vi.header.document_id,
8003 buyer_country, buyer_country,
8005 vi.payable_amount,
8006 vi.header.document_date,
8007 None,
8008 );
8009 tax_lines.extend(lines);
8010 }
8011
8012 for ci in &document_flows.customer_invoices {
8014 let lines = tax_line_gen.generate_for_document(
8015 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8016 &ci.header.document_id,
8017 buyer_country, buyer_country,
8019 ci.total_gross_amount,
8020 ci.header.document_date,
8021 None,
8022 );
8023 tax_lines.extend(lines);
8024 }
8025 }
8026
8027 let deferred_tax = {
8029 let companies: Vec<(&str, &str)> = self
8030 .config
8031 .companies
8032 .iter()
8033 .map(|c| (c.code.as_str(), c.country.as_str()))
8034 .collect();
8035 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8036 deferred_gen.generate(&companies, start_date, journal_entries)
8037 };
8038
8039 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8042 std::collections::HashMap::new();
8043 for vi in &document_flows.vendor_invoices {
8044 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8045 }
8046 for ci in &document_flows.customer_invoices {
8047 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8048 }
8049
8050 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8052 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8053 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8054 &tax_lines,
8055 company_code,
8056 &doc_dates,
8057 end_date,
8058 );
8059 debug!("Generated {} tax posting JEs", jes.len());
8060 jes
8061 } else {
8062 Vec::new()
8063 };
8064
8065 let snapshot = TaxSnapshot {
8066 jurisdiction_count: jurisdictions.len(),
8067 code_count: codes.len(),
8068 jurisdictions,
8069 codes,
8070 tax_provisions: provisions,
8071 tax_lines,
8072 tax_returns: Vec::new(),
8073 withholding_records: Vec::new(),
8074 tax_anomaly_labels: Vec::new(),
8075 deferred_tax,
8076 tax_posting_journal_entries,
8077 };
8078
8079 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8080 stats.tax_code_count = snapshot.code_count;
8081 stats.tax_provision_count = snapshot.tax_provisions.len();
8082 stats.tax_line_count = snapshot.tax_lines.len();
8083
8084 info!(
8085 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8086 snapshot.jurisdiction_count,
8087 snapshot.code_count,
8088 snapshot.tax_provisions.len(),
8089 snapshot.deferred_tax.temporary_differences.len(),
8090 snapshot.deferred_tax.journal_entries.len(),
8091 snapshot.tax_posting_journal_entries.len(),
8092 );
8093 self.check_resources_with_log("post-tax")?;
8094
8095 Ok(snapshot)
8096 }
8097
8098 fn phase_esg_generation(
8100 &mut self,
8101 document_flows: &DocumentFlowSnapshot,
8102 manufacturing: &ManufacturingSnapshot,
8103 stats: &mut EnhancedGenerationStatistics,
8104 ) -> SynthResult<EsgSnapshot> {
8105 if !self.phase_config.generate_esg {
8106 debug!("Phase 21: Skipped (ESG generation disabled)");
8107 return Ok(EsgSnapshot::default());
8108 }
8109 let degradation = self.check_resources()?;
8110 if degradation >= DegradationLevel::Reduced {
8111 debug!(
8112 "Phase skipped due to resource pressure (degradation: {:?})",
8113 degradation
8114 );
8115 return Ok(EsgSnapshot::default());
8116 }
8117 info!("Phase 21: Generating ESG Data");
8118
8119 let seed = self.seed;
8120 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8121 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8122 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8123 let entity_id = self
8124 .config
8125 .companies
8126 .first()
8127 .map(|c| c.code.as_str())
8128 .unwrap_or("1000");
8129
8130 let esg_cfg = &self.config.esg;
8131 let mut snapshot = EsgSnapshot::default();
8132
8133 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8135 esg_cfg.environmental.energy.clone(),
8136 seed + 80,
8137 );
8138 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8139
8140 let facility_count = esg_cfg.environmental.energy.facility_count;
8142 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8143 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8144
8145 let mut waste_gen = datasynth_generators::WasteGenerator::new(
8147 seed + 82,
8148 esg_cfg.environmental.waste.diversion_target,
8149 facility_count,
8150 );
8151 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8152
8153 let mut emission_gen =
8155 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8156
8157 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8159 .iter()
8160 .map(|e| datasynth_generators::EnergyInput {
8161 facility_id: e.facility_id.clone(),
8162 energy_type: match e.energy_source {
8163 EnergySourceType::NaturalGas => {
8164 datasynth_generators::EnergyInputType::NaturalGas
8165 }
8166 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8167 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8168 _ => datasynth_generators::EnergyInputType::Electricity,
8169 },
8170 consumption_kwh: e.consumption_kwh,
8171 period: e.period,
8172 })
8173 .collect();
8174
8175 if !manufacturing.production_orders.is_empty() {
8177 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8178 &manufacturing.production_orders,
8179 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
8182 if !mfg_energy.is_empty() {
8183 info!(
8184 "ESG: {} energy inputs derived from {} production orders",
8185 mfg_energy.len(),
8186 manufacturing.production_orders.len(),
8187 );
8188 energy_inputs.extend(mfg_energy);
8189 }
8190 }
8191
8192 let mut emissions = Vec::new();
8193 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8194 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8195
8196 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8198 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8199 for payment in &document_flows.payments {
8200 if payment.is_vendor {
8201 *totals
8202 .entry(payment.business_partner_id.clone())
8203 .or_default() += payment.amount;
8204 }
8205 }
8206 totals
8207 };
8208 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8209 .master_data
8210 .vendors
8211 .iter()
8212 .map(|v| {
8213 let spend = vendor_payment_totals
8214 .get(&v.vendor_id)
8215 .copied()
8216 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8217 datasynth_generators::VendorSpendInput {
8218 vendor_id: v.vendor_id.clone(),
8219 category: format!("{:?}", v.vendor_type).to_lowercase(),
8220 spend,
8221 country: v.country.clone(),
8222 }
8223 })
8224 .collect();
8225 if !vendor_spend.is_empty() {
8226 emissions.extend(emission_gen.generate_scope3_purchased_goods(
8227 entity_id,
8228 &vendor_spend,
8229 start_date,
8230 end_date,
8231 ));
8232 }
8233
8234 let headcount = self.master_data.employees.len() as u32;
8236 if headcount > 0 {
8237 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8238 emissions.extend(emission_gen.generate_scope3_business_travel(
8239 entity_id,
8240 travel_spend,
8241 start_date,
8242 ));
8243 emissions
8244 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8245 }
8246
8247 snapshot.emission_count = emissions.len();
8248 snapshot.emissions = emissions;
8249 snapshot.energy = energy_records;
8250
8251 let mut workforce_gen =
8253 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8254 let total_headcount = headcount.max(100);
8255 snapshot.diversity =
8256 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8257 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8258
8259 if !self.master_data.employees.is_empty() {
8261 let hr_diversity = workforce_gen.generate_diversity_from_employees(
8262 entity_id,
8263 &self.master_data.employees,
8264 end_date,
8265 );
8266 if !hr_diversity.is_empty() {
8267 info!(
8268 "ESG: {} diversity metrics derived from {} actual employees",
8269 hr_diversity.len(),
8270 self.master_data.employees.len(),
8271 );
8272 snapshot.diversity.extend(hr_diversity);
8273 }
8274 }
8275
8276 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8277 entity_id,
8278 facility_count,
8279 start_date,
8280 end_date,
8281 );
8282
8283 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
8286 entity_id,
8287 &snapshot.safety_incidents,
8288 total_hours,
8289 start_date,
8290 );
8291 snapshot.safety_metrics = vec![safety_metric];
8292
8293 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8295 seed + 85,
8296 esg_cfg.governance.board_size,
8297 esg_cfg.governance.independence_target,
8298 );
8299 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8300
8301 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8303 esg_cfg.supply_chain_esg.clone(),
8304 seed + 86,
8305 );
8306 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8307 .master_data
8308 .vendors
8309 .iter()
8310 .map(|v| datasynth_generators::VendorInput {
8311 vendor_id: v.vendor_id.clone(),
8312 country: v.country.clone(),
8313 industry: format!("{:?}", v.vendor_type).to_lowercase(),
8314 quality_score: None,
8315 })
8316 .collect();
8317 snapshot.supplier_assessments =
8318 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8319
8320 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8322 seed + 87,
8323 esg_cfg.reporting.clone(),
8324 esg_cfg.climate_scenarios.clone(),
8325 );
8326 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8327 snapshot.disclosures = disclosure_gen.generate_disclosures(
8328 entity_id,
8329 &snapshot.materiality,
8330 start_date,
8331 end_date,
8332 );
8333 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8334 snapshot.disclosure_count = snapshot.disclosures.len();
8335
8336 if esg_cfg.anomaly_rate > 0.0 {
8338 let mut anomaly_injector =
8339 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8340 let mut labels = Vec::new();
8341 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8342 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8343 labels.extend(
8344 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8345 );
8346 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8347 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8348 snapshot.anomaly_labels = labels;
8349 }
8350
8351 stats.esg_emission_count = snapshot.emission_count;
8352 stats.esg_disclosure_count = snapshot.disclosure_count;
8353
8354 info!(
8355 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8356 snapshot.emission_count,
8357 snapshot.disclosure_count,
8358 snapshot.supplier_assessments.len()
8359 );
8360 self.check_resources_with_log("post-esg")?;
8361
8362 Ok(snapshot)
8363 }
8364
8365 fn phase_treasury_data(
8367 &mut self,
8368 document_flows: &DocumentFlowSnapshot,
8369 subledger: &SubledgerSnapshot,
8370 intercompany: &IntercompanySnapshot,
8371 stats: &mut EnhancedGenerationStatistics,
8372 ) -> SynthResult<TreasurySnapshot> {
8373 if !self.phase_config.generate_treasury {
8374 debug!("Phase 22: Skipped (treasury generation disabled)");
8375 return Ok(TreasurySnapshot::default());
8376 }
8377 let degradation = self.check_resources()?;
8378 if degradation >= DegradationLevel::Reduced {
8379 debug!(
8380 "Phase skipped due to resource pressure (degradation: {:?})",
8381 degradation
8382 );
8383 return Ok(TreasurySnapshot::default());
8384 }
8385 info!("Phase 22: Generating Treasury Data");
8386
8387 let seed = self.seed;
8388 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8389 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8390 let currency = self
8391 .config
8392 .companies
8393 .first()
8394 .map(|c| c.currency.as_str())
8395 .unwrap_or("USD");
8396 let entity_id = self
8397 .config
8398 .companies
8399 .first()
8400 .map(|c| c.code.as_str())
8401 .unwrap_or("1000");
8402
8403 let mut snapshot = TreasurySnapshot::default();
8404
8405 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8407 self.config.treasury.debt.clone(),
8408 seed + 90,
8409 );
8410 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8411
8412 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8414 self.config.treasury.hedging.clone(),
8415 seed + 91,
8416 );
8417 for debt in &snapshot.debt_instruments {
8418 if debt.rate_type == InterestRateType::Variable {
8419 let swap = hedge_gen.generate_ir_swap(
8420 currency,
8421 debt.principal,
8422 debt.origination_date,
8423 debt.maturity_date,
8424 );
8425 snapshot.hedging_instruments.push(swap);
8426 }
8427 }
8428
8429 {
8432 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8433 for payment in &document_flows.payments {
8434 if payment.currency != currency {
8435 let entry = fx_map
8436 .entry(payment.currency.clone())
8437 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8438 entry.0 += payment.amount;
8439 if payment.header.document_date > entry.1 {
8441 entry.1 = payment.header.document_date;
8442 }
8443 }
8444 }
8445 if !fx_map.is_empty() {
8446 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8447 .into_iter()
8448 .map(|(foreign_ccy, (net_amount, settlement_date))| {
8449 datasynth_generators::treasury::FxExposure {
8450 currency_pair: format!("{foreign_ccy}/{currency}"),
8451 foreign_currency: foreign_ccy,
8452 net_amount,
8453 settlement_date,
8454 description: "AP payment FX exposure".to_string(),
8455 }
8456 })
8457 .collect();
8458 let (fx_instruments, fx_relationships) =
8459 hedge_gen.generate(start_date, &fx_exposures);
8460 snapshot.hedging_instruments.extend(fx_instruments);
8461 snapshot.hedge_relationships.extend(fx_relationships);
8462 }
8463 }
8464
8465 if self.config.treasury.anomaly_rate > 0.0 {
8467 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8468 seed + 92,
8469 self.config.treasury.anomaly_rate,
8470 );
8471 let mut labels = Vec::new();
8472 labels.extend(
8473 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8474 );
8475 snapshot.treasury_anomaly_labels = labels;
8476 }
8477
8478 if self.config.treasury.cash_positioning.enabled {
8480 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8481
8482 for payment in &document_flows.payments {
8484 cash_flows.push(datasynth_generators::treasury::CashFlow {
8485 date: payment.header.document_date,
8486 account_id: format!("{entity_id}-MAIN"),
8487 amount: payment.amount,
8488 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8489 });
8490 }
8491
8492 for chain in &document_flows.o2c_chains {
8494 if let Some(ref receipt) = chain.customer_receipt {
8495 cash_flows.push(datasynth_generators::treasury::CashFlow {
8496 date: receipt.header.document_date,
8497 account_id: format!("{entity_id}-MAIN"),
8498 amount: receipt.amount,
8499 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8500 });
8501 }
8502 for receipt in &chain.remainder_receipts {
8504 cash_flows.push(datasynth_generators::treasury::CashFlow {
8505 date: receipt.header.document_date,
8506 account_id: format!("{entity_id}-MAIN"),
8507 amount: receipt.amount,
8508 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8509 });
8510 }
8511 }
8512
8513 if !cash_flows.is_empty() {
8514 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8515 self.config.treasury.cash_positioning.clone(),
8516 seed + 93,
8517 );
8518 let account_id = format!("{entity_id}-MAIN");
8519 snapshot.cash_positions = cash_gen.generate(
8520 entity_id,
8521 &account_id,
8522 currency,
8523 &cash_flows,
8524 start_date,
8525 start_date + chrono::Months::new(self.config.global.period_months),
8526 rust_decimal::Decimal::new(1_000_000, 0), );
8528 }
8529 }
8530
8531 if self.config.treasury.cash_forecasting.enabled {
8533 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8534
8535 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8537 .ar_invoices
8538 .iter()
8539 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8540 .map(|inv| {
8541 let days_past_due = if inv.due_date < end_date {
8542 (end_date - inv.due_date).num_days().max(0) as u32
8543 } else {
8544 0
8545 };
8546 datasynth_generators::treasury::ArAgingItem {
8547 expected_date: inv.due_date,
8548 amount: inv.amount_remaining,
8549 days_past_due,
8550 document_id: inv.invoice_number.clone(),
8551 }
8552 })
8553 .collect();
8554
8555 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8557 .ap_invoices
8558 .iter()
8559 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8560 .map(|inv| datasynth_generators::treasury::ApAgingItem {
8561 payment_date: inv.due_date,
8562 amount: inv.amount_remaining,
8563 document_id: inv.invoice_number.clone(),
8564 })
8565 .collect();
8566
8567 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8568 self.config.treasury.cash_forecasting.clone(),
8569 seed + 94,
8570 );
8571 let forecast = forecast_gen.generate(
8572 entity_id,
8573 currency,
8574 end_date,
8575 &ar_items,
8576 &ap_items,
8577 &[], );
8579 snapshot.cash_forecasts.push(forecast);
8580 }
8581
8582 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8584 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8585 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8586 self.config.treasury.cash_pooling.clone(),
8587 seed + 95,
8588 );
8589
8590 let account_ids: Vec<String> = snapshot
8592 .cash_positions
8593 .iter()
8594 .map(|cp| cp.bank_account_id.clone())
8595 .collect::<std::collections::HashSet<_>>()
8596 .into_iter()
8597 .collect();
8598
8599 if let Some(pool) =
8600 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8601 {
8602 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8604 for cp in &snapshot.cash_positions {
8605 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8606 }
8607
8608 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8609 latest_balances
8610 .into_iter()
8611 .filter(|(id, _)| pool.participant_accounts.contains(id))
8612 .map(
8613 |(id, balance)| datasynth_generators::treasury::AccountBalance {
8614 account_id: id,
8615 balance,
8616 },
8617 )
8618 .collect();
8619
8620 let sweeps =
8621 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8622 snapshot.cash_pool_sweeps = sweeps;
8623 snapshot.cash_pools.push(pool);
8624 }
8625 }
8626
8627 if self.config.treasury.bank_guarantees.enabled {
8629 let vendor_names: Vec<String> = self
8630 .master_data
8631 .vendors
8632 .iter()
8633 .map(|v| v.name.clone())
8634 .collect();
8635 if !vendor_names.is_empty() {
8636 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8637 self.config.treasury.bank_guarantees.clone(),
8638 seed + 96,
8639 );
8640 snapshot.bank_guarantees =
8641 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8642 }
8643 }
8644
8645 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8647 let entity_ids: Vec<String> = self
8648 .config
8649 .companies
8650 .iter()
8651 .map(|c| c.code.clone())
8652 .collect();
8653 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8654 .matched_pairs
8655 .iter()
8656 .map(|mp| {
8657 (
8658 mp.seller_company.clone(),
8659 mp.buyer_company.clone(),
8660 mp.amount,
8661 )
8662 })
8663 .collect();
8664 if entity_ids.len() >= 2 {
8665 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8666 self.config.treasury.netting.clone(),
8667 seed + 97,
8668 );
8669 snapshot.netting_runs = netting_gen.generate(
8670 &entity_ids,
8671 currency,
8672 start_date,
8673 self.config.global.period_months,
8674 &ic_amounts,
8675 );
8676 }
8677 }
8678
8679 {
8681 use datasynth_generators::treasury::TreasuryAccounting;
8682
8683 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8684 let mut treasury_jes = Vec::new();
8685
8686 if !snapshot.debt_instruments.is_empty() {
8688 let debt_jes =
8689 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8690 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8691 treasury_jes.extend(debt_jes);
8692 }
8693
8694 if !snapshot.hedging_instruments.is_empty() {
8696 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8697 &snapshot.hedging_instruments,
8698 &snapshot.hedge_relationships,
8699 end_date,
8700 entity_id,
8701 );
8702 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8703 treasury_jes.extend(hedge_jes);
8704 }
8705
8706 if !snapshot.cash_pool_sweeps.is_empty() {
8708 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8709 &snapshot.cash_pool_sweeps,
8710 entity_id,
8711 );
8712 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8713 treasury_jes.extend(sweep_jes);
8714 }
8715
8716 if !treasury_jes.is_empty() {
8717 debug!("Total treasury journal entries: {}", treasury_jes.len());
8718 }
8719 snapshot.journal_entries = treasury_jes;
8720 }
8721
8722 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8723 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8724 stats.cash_position_count = snapshot.cash_positions.len();
8725 stats.cash_forecast_count = snapshot.cash_forecasts.len();
8726 stats.cash_pool_count = snapshot.cash_pools.len();
8727
8728 info!(
8729 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8730 snapshot.debt_instruments.len(),
8731 snapshot.hedging_instruments.len(),
8732 snapshot.cash_positions.len(),
8733 snapshot.cash_forecasts.len(),
8734 snapshot.cash_pools.len(),
8735 snapshot.bank_guarantees.len(),
8736 snapshot.netting_runs.len(),
8737 snapshot.journal_entries.len(),
8738 );
8739 self.check_resources_with_log("post-treasury")?;
8740
8741 Ok(snapshot)
8742 }
8743
8744 fn phase_project_accounting(
8746 &mut self,
8747 document_flows: &DocumentFlowSnapshot,
8748 hr: &HrSnapshot,
8749 stats: &mut EnhancedGenerationStatistics,
8750 ) -> SynthResult<ProjectAccountingSnapshot> {
8751 if !self.phase_config.generate_project_accounting {
8752 debug!("Phase 23: Skipped (project accounting disabled)");
8753 return Ok(ProjectAccountingSnapshot::default());
8754 }
8755 let degradation = self.check_resources()?;
8756 if degradation >= DegradationLevel::Reduced {
8757 debug!(
8758 "Phase skipped due to resource pressure (degradation: {:?})",
8759 degradation
8760 );
8761 return Ok(ProjectAccountingSnapshot::default());
8762 }
8763 info!("Phase 23: Generating Project Accounting Data");
8764
8765 let seed = self.seed;
8766 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8767 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8768 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8769 let company_code = self
8770 .config
8771 .companies
8772 .first()
8773 .map(|c| c.code.as_str())
8774 .unwrap_or("1000");
8775
8776 let mut snapshot = ProjectAccountingSnapshot::default();
8777
8778 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8780 self.config.project_accounting.clone(),
8781 seed + 95,
8782 );
8783 let pool = project_gen.generate(company_code, start_date, end_date);
8784 snapshot.projects = pool.projects.clone();
8785
8786 {
8788 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8789 Vec::new();
8790
8791 for te in &hr.time_entries {
8793 let total_hours = te.hours_regular + te.hours_overtime;
8794 if total_hours > 0.0 {
8795 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8796 id: te.entry_id.clone(),
8797 entity_id: company_code.to_string(),
8798 date: te.date,
8799 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8800 .unwrap_or(rust_decimal::Decimal::ZERO),
8801 source_type: CostSourceType::TimeEntry,
8802 hours: Some(
8803 rust_decimal::Decimal::from_f64_retain(total_hours)
8804 .unwrap_or(rust_decimal::Decimal::ZERO),
8805 ),
8806 });
8807 }
8808 }
8809
8810 for er in &hr.expense_reports {
8812 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8813 id: er.report_id.clone(),
8814 entity_id: company_code.to_string(),
8815 date: er.submission_date,
8816 amount: er.total_amount,
8817 source_type: CostSourceType::ExpenseReport,
8818 hours: None,
8819 });
8820 }
8821
8822 for po in &document_flows.purchase_orders {
8824 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8825 id: po.header.document_id.clone(),
8826 entity_id: company_code.to_string(),
8827 date: po.header.document_date,
8828 amount: po.total_net_amount,
8829 source_type: CostSourceType::PurchaseOrder,
8830 hours: None,
8831 });
8832 }
8833
8834 for vi in &document_flows.vendor_invoices {
8836 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8837 id: vi.header.document_id.clone(),
8838 entity_id: company_code.to_string(),
8839 date: vi.header.document_date,
8840 amount: vi.payable_amount,
8841 source_type: CostSourceType::VendorInvoice,
8842 hours: None,
8843 });
8844 }
8845
8846 if !source_docs.is_empty() && !pool.projects.is_empty() {
8847 let mut cost_gen =
8848 datasynth_generators::project_accounting::ProjectCostGenerator::new(
8849 self.config.project_accounting.cost_allocation.clone(),
8850 seed + 99,
8851 );
8852 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8853 }
8854 }
8855
8856 if self.config.project_accounting.change_orders.enabled {
8858 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8859 self.config.project_accounting.change_orders.clone(),
8860 seed + 96,
8861 );
8862 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8863 }
8864
8865 if self.config.project_accounting.milestones.enabled {
8867 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8868 self.config.project_accounting.milestones.clone(),
8869 seed + 97,
8870 );
8871 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8872 }
8873
8874 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8876 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8877 self.config.project_accounting.earned_value.clone(),
8878 seed + 98,
8879 );
8880 snapshot.earned_value_metrics =
8881 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8882 }
8883
8884 if self.config.project_accounting.revenue_recognition.enabled
8886 && !snapshot.projects.is_empty()
8887 && !snapshot.cost_lines.is_empty()
8888 {
8889 use datasynth_generators::project_accounting::RevenueGenerator;
8890 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8891 let avg_contract_value =
8892 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8893 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8894
8895 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8898 snapshot
8899 .projects
8900 .iter()
8901 .filter(|p| {
8902 matches!(
8903 p.project_type,
8904 datasynth_core::models::ProjectType::Customer
8905 )
8906 })
8907 .map(|p| {
8908 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8909 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8910 } else {
8912 avg_contract_value
8913 };
8914 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8916 })
8917 .collect();
8918
8919 if !contract_values.is_empty() {
8920 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8921 snapshot.revenue_records = rev_gen.generate(
8922 &snapshot.projects,
8923 &snapshot.cost_lines,
8924 &contract_values,
8925 start_date,
8926 end_date,
8927 );
8928 debug!(
8929 "Generated {} revenue recognition records for {} customer projects",
8930 snapshot.revenue_records.len(),
8931 contract_values.len()
8932 );
8933 }
8934 }
8935
8936 stats.project_count = snapshot.projects.len();
8937 stats.project_change_order_count = snapshot.change_orders.len();
8938 stats.project_cost_line_count = snapshot.cost_lines.len();
8939
8940 info!(
8941 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8942 snapshot.projects.len(),
8943 snapshot.change_orders.len(),
8944 snapshot.milestones.len(),
8945 snapshot.earned_value_metrics.len()
8946 );
8947 self.check_resources_with_log("post-project-accounting")?;
8948
8949 Ok(snapshot)
8950 }
8951
8952 fn phase_evolution_events(
8954 &mut self,
8955 stats: &mut EnhancedGenerationStatistics,
8956 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8957 if !self.phase_config.generate_evolution_events {
8958 debug!("Phase 24: Skipped (evolution events disabled)");
8959 return Ok((Vec::new(), Vec::new()));
8960 }
8961 info!("Phase 24: Generating Process Evolution + Organizational Events");
8962
8963 let seed = self.seed;
8964 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8965 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8966 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8967
8968 let mut proc_gen =
8970 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8971 seed + 100,
8972 );
8973 let process_events = proc_gen.generate_events(start_date, end_date);
8974
8975 let company_codes: Vec<String> = self
8977 .config
8978 .companies
8979 .iter()
8980 .map(|c| c.code.clone())
8981 .collect();
8982 let mut org_gen =
8983 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8984 seed + 101,
8985 );
8986 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8987
8988 stats.process_evolution_event_count = process_events.len();
8989 stats.organizational_event_count = org_events.len();
8990
8991 info!(
8992 "Evolution events generated: {} process evolution, {} organizational",
8993 process_events.len(),
8994 org_events.len()
8995 );
8996 self.check_resources_with_log("post-evolution-events")?;
8997
8998 Ok((process_events, org_events))
8999 }
9000
9001 fn phase_disruption_events(
9004 &self,
9005 stats: &mut EnhancedGenerationStatistics,
9006 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9007 if !self.config.organizational_events.enabled {
9008 debug!("Phase 24b: Skipped (organizational events disabled)");
9009 return Ok(Vec::new());
9010 }
9011 info!("Phase 24b: Generating Disruption Events");
9012
9013 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9014 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9015 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9016
9017 let company_codes: Vec<String> = self
9018 .config
9019 .companies
9020 .iter()
9021 .map(|c| c.code.clone())
9022 .collect();
9023
9024 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9025 let events = gen.generate(start_date, end_date, &company_codes);
9026
9027 stats.disruption_event_count = events.len();
9028 info!("Disruption events generated: {} events", events.len());
9029 self.check_resources_with_log("post-disruption-events")?;
9030
9031 Ok(events)
9032 }
9033
9034 fn phase_counterfactuals(
9041 &self,
9042 journal_entries: &[JournalEntry],
9043 stats: &mut EnhancedGenerationStatistics,
9044 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9045 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9046 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9047 return Ok(Vec::new());
9048 }
9049 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9050
9051 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9052
9053 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9054
9055 let specs = [
9057 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9058 CounterfactualSpec::ShiftDate { days: -14 },
9059 CounterfactualSpec::SelfApprove,
9060 CounterfactualSpec::SplitTransaction { split_count: 3 },
9061 ];
9062
9063 let pairs: Vec<_> = journal_entries
9064 .iter()
9065 .enumerate()
9066 .map(|(i, je)| {
9067 let spec = &specs[i % specs.len()];
9068 gen.generate(je, spec)
9069 })
9070 .collect();
9071
9072 stats.counterfactual_pair_count = pairs.len();
9073 info!(
9074 "Counterfactual pairs generated: {} pairs from {} journal entries",
9075 pairs.len(),
9076 journal_entries.len()
9077 );
9078 self.check_resources_with_log("post-counterfactuals")?;
9079
9080 Ok(pairs)
9081 }
9082
9083 fn phase_red_flags(
9090 &self,
9091 anomaly_labels: &AnomalyLabels,
9092 document_flows: &DocumentFlowSnapshot,
9093 stats: &mut EnhancedGenerationStatistics,
9094 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9095 if !self.config.fraud.enabled {
9096 debug!("Phase 26: Skipped (fraud generation disabled)");
9097 return Ok(Vec::new());
9098 }
9099 info!("Phase 26: Generating Fraud Red-Flag Indicators");
9100
9101 use datasynth_generators::fraud::RedFlagGenerator;
9102
9103 let generator = RedFlagGenerator::new();
9104 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9105
9106 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9108 .labels
9109 .iter()
9110 .filter(|label| label.anomaly_type.is_intentional())
9111 .map(|label| label.document_id.as_str())
9112 .collect();
9113
9114 let mut flags = Vec::new();
9115
9116 for chain in &document_flows.p2p_chains {
9118 let doc_id = &chain.purchase_order.header.document_id;
9119 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9120 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9121 }
9122
9123 for chain in &document_flows.o2c_chains {
9125 let doc_id = &chain.sales_order.header.document_id;
9126 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9127 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9128 }
9129
9130 stats.red_flag_count = flags.len();
9131 info!(
9132 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9133 flags.len(),
9134 document_flows.p2p_chains.len(),
9135 document_flows.o2c_chains.len(),
9136 fraud_doc_ids.len()
9137 );
9138 self.check_resources_with_log("post-red-flags")?;
9139
9140 Ok(flags)
9141 }
9142
9143 fn phase_collusion_rings(
9149 &mut self,
9150 stats: &mut EnhancedGenerationStatistics,
9151 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9152 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9153 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9154 return Ok(Vec::new());
9155 }
9156 info!("Phase 26b: Generating Collusion Rings");
9157
9158 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9159 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9160 let months = self.config.global.period_months;
9161
9162 let employee_ids: Vec<String> = self
9163 .master_data
9164 .employees
9165 .iter()
9166 .map(|e| e.employee_id.clone())
9167 .collect();
9168 let vendor_ids: Vec<String> = self
9169 .master_data
9170 .vendors
9171 .iter()
9172 .map(|v| v.vendor_id.clone())
9173 .collect();
9174
9175 let mut generator =
9176 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9177 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9178
9179 stats.collusion_ring_count = rings.len();
9180 info!(
9181 "Collusion rings generated: {} rings, total members: {}",
9182 rings.len(),
9183 rings
9184 .iter()
9185 .map(datasynth_generators::fraud::CollusionRing::size)
9186 .sum::<usize>()
9187 );
9188 self.check_resources_with_log("post-collusion-rings")?;
9189
9190 Ok(rings)
9191 }
9192
9193 fn phase_temporal_attributes(
9198 &mut self,
9199 stats: &mut EnhancedGenerationStatistics,
9200 ) -> SynthResult<
9201 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9202 > {
9203 if !self.config.temporal_attributes.enabled {
9204 debug!("Phase 27: Skipped (temporal attributes disabled)");
9205 return Ok(Vec::new());
9206 }
9207 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9208
9209 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9210 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9211
9212 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9216 || self.config.temporal_attributes.enabled;
9217 let temporal_config = {
9218 let ta = &self.config.temporal_attributes;
9219 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9220 .enabled(ta.enabled)
9221 .closed_probability(ta.valid_time.closed_probability)
9222 .avg_validity_days(ta.valid_time.avg_validity_days)
9223 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9224 .with_version_chains(if generate_version_chains {
9225 ta.avg_versions_per_entity
9226 } else {
9227 1.0
9228 })
9229 .build()
9230 };
9231 let temporal_config = if self
9233 .config
9234 .temporal_attributes
9235 .transaction_time
9236 .allow_backdating
9237 {
9238 let mut c = temporal_config;
9239 c.transaction_time.allow_backdating = true;
9240 c.transaction_time.backdating_probability = self
9241 .config
9242 .temporal_attributes
9243 .transaction_time
9244 .backdating_probability;
9245 c.transaction_time.max_backdate_days = self
9246 .config
9247 .temporal_attributes
9248 .transaction_time
9249 .max_backdate_days;
9250 c
9251 } else {
9252 temporal_config
9253 };
9254 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9255 temporal_config,
9256 self.seed + 130,
9257 start_date,
9258 );
9259
9260 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9261 self.seed + 130,
9262 datasynth_core::GeneratorType::Vendor,
9263 );
9264
9265 let chains: Vec<_> = self
9266 .master_data
9267 .vendors
9268 .iter()
9269 .map(|vendor| {
9270 let id = uuid_factory.next();
9271 gen.generate_version_chain(vendor.clone(), id)
9272 })
9273 .collect();
9274
9275 stats.temporal_version_chain_count = chains.len();
9276 info!("Temporal version chains generated: {} chains", chains.len());
9277 self.check_resources_with_log("post-temporal-attributes")?;
9278
9279 Ok(chains)
9280 }
9281
9282 fn phase_entity_relationships(
9292 &self,
9293 journal_entries: &[JournalEntry],
9294 document_flows: &DocumentFlowSnapshot,
9295 stats: &mut EnhancedGenerationStatistics,
9296 ) -> SynthResult<(
9297 Option<datasynth_core::models::EntityGraph>,
9298 Vec<datasynth_core::models::CrossProcessLink>,
9299 )> {
9300 use datasynth_generators::relationships::{
9301 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9302 TransactionSummary,
9303 };
9304
9305 let rs_enabled = self.config.relationship_strength.enabled;
9306 let cpl_enabled = self.config.cross_process_links.enabled
9307 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9308
9309 if !rs_enabled && !cpl_enabled {
9310 debug!(
9311 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9312 );
9313 return Ok((None, Vec::new()));
9314 }
9315
9316 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9317
9318 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9319 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9320
9321 let company_code = self
9322 .config
9323 .companies
9324 .first()
9325 .map(|c| c.code.as_str())
9326 .unwrap_or("1000");
9327
9328 let gen_config = EntityGraphConfig {
9330 enabled: rs_enabled,
9331 cross_process: datasynth_generators::relationships::CrossProcessConfig {
9332 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9333 enable_return_flows: false,
9334 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9335 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9336 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9338 1.0
9339 } else {
9340 0.30
9341 },
9342 ..Default::default()
9343 },
9344 strength_config: datasynth_generators::relationships::StrengthConfig {
9345 transaction_volume_weight: self
9346 .config
9347 .relationship_strength
9348 .calculation
9349 .transaction_volume_weight,
9350 transaction_count_weight: self
9351 .config
9352 .relationship_strength
9353 .calculation
9354 .transaction_count_weight,
9355 duration_weight: self
9356 .config
9357 .relationship_strength
9358 .calculation
9359 .relationship_duration_weight,
9360 recency_weight: self.config.relationship_strength.calculation.recency_weight,
9361 mutual_connections_weight: self
9362 .config
9363 .relationship_strength
9364 .calculation
9365 .mutual_connections_weight,
9366 recency_half_life_days: self
9367 .config
9368 .relationship_strength
9369 .calculation
9370 .recency_half_life_days,
9371 },
9372 ..Default::default()
9373 };
9374
9375 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9376
9377 let entity_graph = if rs_enabled {
9379 let vendor_summaries: Vec<EntitySummary> = self
9381 .master_data
9382 .vendors
9383 .iter()
9384 .map(|v| {
9385 EntitySummary::new(
9386 &v.vendor_id,
9387 &v.name,
9388 datasynth_core::models::GraphEntityType::Vendor,
9389 start_date,
9390 )
9391 })
9392 .collect();
9393
9394 let customer_summaries: Vec<EntitySummary> = self
9395 .master_data
9396 .customers
9397 .iter()
9398 .map(|c| {
9399 EntitySummary::new(
9400 &c.customer_id,
9401 &c.name,
9402 datasynth_core::models::GraphEntityType::Customer,
9403 start_date,
9404 )
9405 })
9406 .collect();
9407
9408 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9413 std::collections::HashMap::new();
9414
9415 for je in journal_entries {
9416 let cc = je.header.company_code.clone();
9417 let posting_date = je.header.posting_date;
9418 for line in &je.lines {
9419 if let Some(ref tp) = line.trading_partner {
9420 let amount = if line.debit_amount > line.credit_amount {
9421 line.debit_amount
9422 } else {
9423 line.credit_amount
9424 };
9425 let entry = txn_summaries
9426 .entry((cc.clone(), tp.clone()))
9427 .or_insert_with(|| TransactionSummary {
9428 total_volume: rust_decimal::Decimal::ZERO,
9429 transaction_count: 0,
9430 first_transaction_date: posting_date,
9431 last_transaction_date: posting_date,
9432 related_entities: std::collections::HashSet::new(),
9433 });
9434 entry.total_volume += amount;
9435 entry.transaction_count += 1;
9436 if posting_date < entry.first_transaction_date {
9437 entry.first_transaction_date = posting_date;
9438 }
9439 if posting_date > entry.last_transaction_date {
9440 entry.last_transaction_date = posting_date;
9441 }
9442 entry.related_entities.insert(cc.clone());
9443 }
9444 }
9445 }
9446
9447 for chain in &document_flows.p2p_chains {
9450 let cc = chain.purchase_order.header.company_code.clone();
9451 let vendor_id = chain.purchase_order.vendor_id.clone();
9452 let po_date = chain.purchase_order.header.document_date;
9453 let amount = chain.purchase_order.total_net_amount;
9454
9455 let entry = txn_summaries
9456 .entry((cc.clone(), vendor_id))
9457 .or_insert_with(|| TransactionSummary {
9458 total_volume: rust_decimal::Decimal::ZERO,
9459 transaction_count: 0,
9460 first_transaction_date: po_date,
9461 last_transaction_date: po_date,
9462 related_entities: std::collections::HashSet::new(),
9463 });
9464 entry.total_volume += amount;
9465 entry.transaction_count += 1;
9466 if po_date < entry.first_transaction_date {
9467 entry.first_transaction_date = po_date;
9468 }
9469 if po_date > entry.last_transaction_date {
9470 entry.last_transaction_date = po_date;
9471 }
9472 entry.related_entities.insert(cc);
9473 }
9474
9475 for chain in &document_flows.o2c_chains {
9477 let cc = chain.sales_order.header.company_code.clone();
9478 let customer_id = chain.sales_order.customer_id.clone();
9479 let so_date = chain.sales_order.header.document_date;
9480 let amount = chain.sales_order.total_net_amount;
9481
9482 let entry = txn_summaries
9483 .entry((cc.clone(), customer_id))
9484 .or_insert_with(|| TransactionSummary {
9485 total_volume: rust_decimal::Decimal::ZERO,
9486 transaction_count: 0,
9487 first_transaction_date: so_date,
9488 last_transaction_date: so_date,
9489 related_entities: std::collections::HashSet::new(),
9490 });
9491 entry.total_volume += amount;
9492 entry.transaction_count += 1;
9493 if so_date < entry.first_transaction_date {
9494 entry.first_transaction_date = so_date;
9495 }
9496 if so_date > entry.last_transaction_date {
9497 entry.last_transaction_date = so_date;
9498 }
9499 entry.related_entities.insert(cc);
9500 }
9501
9502 let as_of_date = journal_entries
9503 .last()
9504 .map(|je| je.header.posting_date)
9505 .unwrap_or(start_date);
9506
9507 let graph = gen.generate_entity_graph(
9508 company_code,
9509 as_of_date,
9510 &vendor_summaries,
9511 &customer_summaries,
9512 &txn_summaries,
9513 );
9514
9515 info!(
9516 "Entity relationship graph: {} nodes, {} edges",
9517 graph.nodes.len(),
9518 graph.edges.len()
9519 );
9520 stats.entity_relationship_node_count = graph.nodes.len();
9521 stats.entity_relationship_edge_count = graph.edges.len();
9522 Some(graph)
9523 } else {
9524 None
9525 };
9526
9527 let cross_process_links = if cpl_enabled {
9529 let gr_refs: Vec<GoodsReceiptRef> = document_flows
9531 .p2p_chains
9532 .iter()
9533 .flat_map(|chain| {
9534 let vendor_id = chain.purchase_order.vendor_id.clone();
9535 let cc = chain.purchase_order.header.company_code.clone();
9536 chain.goods_receipts.iter().flat_map(move |gr| {
9537 gr.items.iter().filter_map({
9538 let doc_id = gr.header.document_id.clone();
9539 let v_id = vendor_id.clone();
9540 let company = cc.clone();
9541 let receipt_date = gr.header.document_date;
9542 move |item| {
9543 item.base
9544 .material_id
9545 .as_ref()
9546 .map(|mat_id| GoodsReceiptRef {
9547 document_id: doc_id.clone(),
9548 material_id: mat_id.clone(),
9549 quantity: item.base.quantity,
9550 receipt_date,
9551 vendor_id: v_id.clone(),
9552 company_code: company.clone(),
9553 })
9554 }
9555 })
9556 })
9557 })
9558 .collect();
9559
9560 let del_refs: Vec<DeliveryRef> = document_flows
9562 .o2c_chains
9563 .iter()
9564 .flat_map(|chain| {
9565 let customer_id = chain.sales_order.customer_id.clone();
9566 let cc = chain.sales_order.header.company_code.clone();
9567 chain.deliveries.iter().flat_map(move |del| {
9568 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9569 del.items.iter().filter_map({
9570 let doc_id = del.header.document_id.clone();
9571 let c_id = customer_id.clone();
9572 let company = cc.clone();
9573 move |item| {
9574 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9575 document_id: doc_id.clone(),
9576 material_id: mat_id.clone(),
9577 quantity: item.base.quantity,
9578 delivery_date,
9579 customer_id: c_id.clone(),
9580 company_code: company.clone(),
9581 })
9582 }
9583 })
9584 })
9585 })
9586 .collect();
9587
9588 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9589 info!("Cross-process links generated: {} links", links.len());
9590 stats.cross_process_link_count = links.len();
9591 links
9592 } else {
9593 Vec::new()
9594 };
9595
9596 self.check_resources_with_log("post-entity-relationships")?;
9597 Ok((entity_graph, cross_process_links))
9598 }
9599
9600 fn phase_industry_data(
9602 &self,
9603 stats: &mut EnhancedGenerationStatistics,
9604 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9605 if !self.config.industry_specific.enabled {
9606 return None;
9607 }
9608 info!("Phase 29: Generating industry-specific data");
9609 let output = datasynth_generators::industry::factory::generate_industry_output(
9610 self.config.global.industry,
9611 );
9612 stats.industry_gl_account_count = output.gl_accounts.len();
9613 info!(
9614 "Industry data generated: {} GL accounts for {:?}",
9615 output.gl_accounts.len(),
9616 self.config.global.industry
9617 );
9618 Some(output)
9619 }
9620
9621 fn phase_opening_balances(
9623 &mut self,
9624 coa: &Arc<ChartOfAccounts>,
9625 stats: &mut EnhancedGenerationStatistics,
9626 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9627 if !self.config.balance.generate_opening_balances {
9628 debug!("Phase 3b: Skipped (opening balance generation disabled)");
9629 return Ok(Vec::new());
9630 }
9631 info!("Phase 3b: Generating Opening Balances");
9632
9633 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9634 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9635 let fiscal_year = start_date.year();
9636
9637 let industry = match self.config.global.industry {
9638 IndustrySector::Manufacturing => IndustryType::Manufacturing,
9639 IndustrySector::Retail => IndustryType::Retail,
9640 IndustrySector::FinancialServices => IndustryType::Financial,
9641 IndustrySector::Healthcare => IndustryType::Healthcare,
9642 IndustrySector::Technology => IndustryType::Technology,
9643 _ => IndustryType::Manufacturing,
9644 };
9645
9646 let config = datasynth_generators::OpeningBalanceConfig {
9647 industry,
9648 ..Default::default()
9649 };
9650 let mut gen =
9651 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9652
9653 let mut results = Vec::new();
9654 for company in &self.config.companies {
9655 let spec = OpeningBalanceSpec::new(
9656 company.code.clone(),
9657 start_date,
9658 fiscal_year,
9659 company.currency.clone(),
9660 rust_decimal::Decimal::new(10_000_000, 0),
9661 industry,
9662 );
9663 let ob = gen.generate(&spec, coa, start_date, &company.code);
9664 results.push(ob);
9665 }
9666
9667 stats.opening_balance_count = results.len();
9668 info!("Opening balances generated: {} companies", results.len());
9669 self.check_resources_with_log("post-opening-balances")?;
9670
9671 Ok(results)
9672 }
9673
9674 fn phase_subledger_reconciliation(
9676 &mut self,
9677 subledger: &SubledgerSnapshot,
9678 entries: &[JournalEntry],
9679 stats: &mut EnhancedGenerationStatistics,
9680 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9681 if !self.config.balance.reconcile_subledgers {
9682 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9683 return Ok(Vec::new());
9684 }
9685 info!("Phase 9b: Reconciling GL to subledger balances");
9686
9687 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9688 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9689 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9690
9691 let tracker_config = BalanceTrackerConfig {
9693 validate_on_each_entry: false,
9694 track_history: false,
9695 fail_on_validation_error: false,
9696 ..Default::default()
9697 };
9698 let recon_currency = self
9699 .config
9700 .companies
9701 .first()
9702 .map(|c| c.currency.clone())
9703 .unwrap_or_else(|| "USD".to_string());
9704 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9705 let validation_errors = tracker.apply_entries(entries);
9706 if !validation_errors.is_empty() {
9707 warn!(
9708 error_count = validation_errors.len(),
9709 "Balance tracker encountered validation errors during subledger reconciliation"
9710 );
9711 for err in &validation_errors {
9712 debug!("Balance validation error: {:?}", err);
9713 }
9714 }
9715
9716 let mut engine = datasynth_generators::ReconciliationEngine::new(
9717 datasynth_generators::ReconciliationConfig::default(),
9718 );
9719
9720 let mut results = Vec::new();
9721 let company_code = self
9722 .config
9723 .companies
9724 .first()
9725 .map(|c| c.code.as_str())
9726 .unwrap_or("1000");
9727
9728 if !subledger.ar_invoices.is_empty() {
9730 let gl_balance = tracker
9731 .get_account_balance(
9732 company_code,
9733 datasynth_core::accounts::control_accounts::AR_CONTROL,
9734 )
9735 .map(|b| b.closing_balance)
9736 .unwrap_or_default();
9737 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9738 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9739 }
9740
9741 if !subledger.ap_invoices.is_empty() {
9743 let gl_balance = tracker
9744 .get_account_balance(
9745 company_code,
9746 datasynth_core::accounts::control_accounts::AP_CONTROL,
9747 )
9748 .map(|b| b.closing_balance)
9749 .unwrap_or_default();
9750 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9751 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9752 }
9753
9754 if !subledger.fa_records.is_empty() {
9756 let gl_asset_balance = tracker
9757 .get_account_balance(
9758 company_code,
9759 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9760 )
9761 .map(|b| b.closing_balance)
9762 .unwrap_or_default();
9763 let gl_accum_depr_balance = tracker
9764 .get_account_balance(
9765 company_code,
9766 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9767 )
9768 .map(|b| b.closing_balance)
9769 .unwrap_or_default();
9770 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9771 subledger.fa_records.iter().collect();
9772 let (asset_recon, depr_recon) = engine.reconcile_fa(
9773 company_code,
9774 end_date,
9775 gl_asset_balance,
9776 gl_accum_depr_balance,
9777 &fa_refs,
9778 );
9779 results.push(asset_recon);
9780 results.push(depr_recon);
9781 }
9782
9783 if !subledger.inventory_positions.is_empty() {
9785 let gl_balance = tracker
9786 .get_account_balance(
9787 company_code,
9788 datasynth_core::accounts::control_accounts::INVENTORY,
9789 )
9790 .map(|b| b.closing_balance)
9791 .unwrap_or_default();
9792 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9793 subledger.inventory_positions.iter().collect();
9794 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9795 }
9796
9797 stats.subledger_reconciliation_count = results.len();
9798 let passed = results.iter().filter(|r| r.is_balanced()).count();
9799 let failed = results.len() - passed;
9800 info!(
9801 "Subledger reconciliation: {} checks, {} passed, {} failed",
9802 results.len(),
9803 passed,
9804 failed
9805 );
9806 self.check_resources_with_log("post-subledger-reconciliation")?;
9807
9808 Ok(results)
9809 }
9810
9811 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9813 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9814
9815 let coa_framework = self.resolve_coa_framework();
9816
9817 let mut gen = ChartOfAccountsGenerator::new(
9818 self.config.chart_of_accounts.complexity,
9819 self.config.global.industry,
9820 self.seed,
9821 )
9822 .with_coa_framework(coa_framework);
9823
9824 let coa = Arc::new(gen.generate());
9825 self.coa = Some(Arc::clone(&coa));
9826
9827 if let Some(pb) = pb {
9828 pb.finish_with_message("Chart of Accounts complete");
9829 }
9830
9831 Ok(coa)
9832 }
9833
9834 fn generate_master_data(&mut self) -> SynthResult<()> {
9836 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9837 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9838 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9839
9840 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
9842
9843 let pack = self.primary_pack().clone();
9845
9846 let vendors_per_company = self.phase_config.vendors_per_company;
9848 let customers_per_company = self.phase_config.customers_per_company;
9849 let materials_per_company = self.phase_config.materials_per_company;
9850 let assets_per_company = self.phase_config.assets_per_company;
9851 let coa_framework = self.resolve_coa_framework();
9852
9853 let per_company_results: Vec<_> = self
9856 .config
9857 .companies
9858 .par_iter()
9859 .enumerate()
9860 .map(|(i, company)| {
9861 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9862 let pack = pack.clone();
9863
9864 let mut vendor_gen = VendorGenerator::new(company_seed);
9866 vendor_gen.set_country_pack(pack.clone());
9867 vendor_gen.set_coa_framework(coa_framework);
9868 vendor_gen.set_counter_offset(i * vendors_per_company);
9869 vendor_gen.set_template_provider(self.template_provider.clone());
9872 if self.config.vendor_network.enabled {
9874 let vn = &self.config.vendor_network;
9875 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9876 enabled: true,
9877 depth: vn.depth,
9878 tier1_count: datasynth_generators::TierCountConfig::new(
9879 vn.tier1.min,
9880 vn.tier1.max,
9881 ),
9882 tier2_per_parent: datasynth_generators::TierCountConfig::new(
9883 vn.tier2_per_parent.min,
9884 vn.tier2_per_parent.max,
9885 ),
9886 tier3_per_parent: datasynth_generators::TierCountConfig::new(
9887 vn.tier3_per_parent.min,
9888 vn.tier3_per_parent.max,
9889 ),
9890 cluster_distribution: datasynth_generators::ClusterDistribution {
9891 reliable_strategic: vn.clusters.reliable_strategic,
9892 standard_operational: vn.clusters.standard_operational,
9893 transactional: vn.clusters.transactional,
9894 problematic: vn.clusters.problematic,
9895 },
9896 concentration_limits: datasynth_generators::ConcentrationLimits {
9897 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9898 max_top5: vn.dependencies.top_5_concentration,
9899 },
9900 ..datasynth_generators::VendorNetworkConfig::default()
9901 });
9902 }
9903 let vendor_pool =
9904 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9905
9906 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9908 customer_gen.set_country_pack(pack.clone());
9909 customer_gen.set_coa_framework(coa_framework);
9910 customer_gen.set_counter_offset(i * customers_per_company);
9911 customer_gen.set_template_provider(self.template_provider.clone());
9913 if self.config.customer_segmentation.enabled {
9915 let cs = &self.config.customer_segmentation;
9916 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9917 enabled: true,
9918 segment_distribution: datasynth_generators::SegmentDistribution {
9919 enterprise: cs.value_segments.enterprise.customer_share,
9920 mid_market: cs.value_segments.mid_market.customer_share,
9921 smb: cs.value_segments.smb.customer_share,
9922 consumer: cs.value_segments.consumer.customer_share,
9923 },
9924 referral_config: datasynth_generators::ReferralConfig {
9925 enabled: cs.networks.referrals.enabled,
9926 referral_rate: cs.networks.referrals.referral_rate,
9927 ..Default::default()
9928 },
9929 hierarchy_config: datasynth_generators::HierarchyConfig {
9930 enabled: cs.networks.corporate_hierarchies.enabled,
9931 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9932 ..Default::default()
9933 },
9934 ..Default::default()
9935 };
9936 customer_gen.set_segmentation_config(seg_cfg);
9937 }
9938 let customer_pool = customer_gen.generate_customer_pool(
9939 customers_per_company,
9940 &company.code,
9941 start_date,
9942 );
9943
9944 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9946 material_gen.set_country_pack(pack.clone());
9947 material_gen.set_counter_offset(i * materials_per_company);
9948 material_gen.set_template_provider(self.template_provider.clone());
9950 let material_pool = material_gen.generate_material_pool(
9951 materials_per_company,
9952 &company.code,
9953 start_date,
9954 );
9955
9956 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9958 asset_gen.set_template_provider(self.template_provider.clone());
9960 let asset_pool = asset_gen.generate_asset_pool(
9961 assets_per_company,
9962 &company.code,
9963 (start_date, end_date),
9964 );
9965
9966 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9968 employee_gen.set_country_pack(pack);
9969 employee_gen.set_template_provider(self.template_provider.clone());
9971 let employee_pool =
9972 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9973
9974 let employee_change_history =
9976 employee_gen.generate_all_change_history(&employee_pool, end_date);
9977
9978 let employee_ids: Vec<String> = employee_pool
9980 .employees
9981 .iter()
9982 .map(|e| e.employee_id.clone())
9983 .collect();
9984 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9985 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9986
9987 (
9988 vendor_pool.vendors,
9989 customer_pool.customers,
9990 material_pool.materials,
9991 asset_pool.assets,
9992 employee_pool.employees,
9993 employee_change_history,
9994 cost_centers,
9995 )
9996 })
9997 .collect();
9998
9999 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
10001 per_company_results
10002 {
10003 self.master_data.vendors.extend(vendors);
10004 self.master_data.customers.extend(customers);
10005 self.master_data.materials.extend(materials);
10006 self.master_data.assets.extend(assets);
10007 self.master_data.employees.extend(employees);
10008 self.master_data.cost_centers.extend(cost_centers);
10009 self.master_data
10010 .employee_change_history
10011 .extend(change_history);
10012 }
10013
10014 {
10018 use datasynth_core::models::IndustrySector;
10019 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10020 let industry = match self.config.global.industry {
10021 IndustrySector::Manufacturing => "manufacturing",
10022 IndustrySector::Retail => "retail",
10023 IndustrySector::FinancialServices => "financial_services",
10024 IndustrySector::Technology => "technology",
10025 IndustrySector::Healthcare => "healthcare",
10026 _ => "other",
10027 };
10028 for (i, company) in self.config.companies.iter().enumerate() {
10029 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10030 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10031 let profile = profile_gen.generate(&company.code, industry);
10032 self.master_data.organizational_profiles.push(profile);
10033 }
10034 }
10035
10036 if let Some(pb) = &pb {
10037 pb.inc(total);
10038 }
10039 if let Some(pb) = pb {
10040 pb.finish_with_message("Master data generation complete");
10041 }
10042
10043 Ok(())
10044 }
10045
10046 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10048 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10049 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10050
10051 let months = (self.config.global.period_months as usize).max(1);
10054 let p2p_count = self
10055 .phase_config
10056 .p2p_chains
10057 .min(self.master_data.vendors.len() * 2 * months);
10058 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10059
10060 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10062 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10063 p2p_gen.set_country_pack(self.primary_pack().clone());
10064 if let Some(ctx) = &self.temporal_context {
10068 p2p_gen.set_temporal_context(Arc::clone(ctx));
10069 }
10070
10071 for i in 0..p2p_count {
10072 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10073 let materials: Vec<&Material> = self
10074 .master_data
10075 .materials
10076 .iter()
10077 .skip(i % self.master_data.materials.len().max(1))
10078 .take(2.min(self.master_data.materials.len()))
10079 .collect();
10080
10081 if materials.is_empty() {
10082 continue;
10083 }
10084
10085 let company = &self.config.companies[i % self.config.companies.len()];
10086 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10087 let fiscal_period = po_date.month() as u8;
10088 let created_by = if self.master_data.employees.is_empty() {
10089 "SYSTEM"
10090 } else {
10091 self.master_data.employees[i % self.master_data.employees.len()]
10092 .user_id
10093 .as_str()
10094 };
10095
10096 let chain = p2p_gen.generate_chain(
10097 &company.code,
10098 vendor,
10099 &materials,
10100 po_date,
10101 start_date.year() as u16,
10102 fiscal_period,
10103 created_by,
10104 );
10105
10106 flows.purchase_orders.push(chain.purchase_order.clone());
10108 flows.goods_receipts.extend(chain.goods_receipts.clone());
10109 if let Some(vi) = &chain.vendor_invoice {
10110 flows.vendor_invoices.push(vi.clone());
10111 }
10112 if let Some(payment) = &chain.payment {
10113 flows.payments.push(payment.clone());
10114 }
10115 for remainder in &chain.remainder_payments {
10116 flows.payments.push(remainder.clone());
10117 }
10118 flows.p2p_chains.push(chain);
10119
10120 if let Some(pb) = &pb {
10121 pb.inc(1);
10122 }
10123 }
10124
10125 if let Some(pb) = pb {
10126 pb.finish_with_message("P2P document flows complete");
10127 }
10128
10129 let o2c_count = self
10132 .phase_config
10133 .o2c_chains
10134 .min(self.master_data.customers.len() * 2 * months);
10135 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10136
10137 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10139 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10140 o2c_gen.set_country_pack(self.primary_pack().clone());
10141 if let Some(ctx) = &self.temporal_context {
10143 o2c_gen.set_temporal_context(Arc::clone(ctx));
10144 }
10145
10146 for i in 0..o2c_count {
10147 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10148 let materials: Vec<&Material> = self
10149 .master_data
10150 .materials
10151 .iter()
10152 .skip(i % self.master_data.materials.len().max(1))
10153 .take(2.min(self.master_data.materials.len()))
10154 .collect();
10155
10156 if materials.is_empty() {
10157 continue;
10158 }
10159
10160 let company = &self.config.companies[i % self.config.companies.len()];
10161 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10162 let fiscal_period = so_date.month() as u8;
10163 let created_by = if self.master_data.employees.is_empty() {
10164 "SYSTEM"
10165 } else {
10166 self.master_data.employees[i % self.master_data.employees.len()]
10167 .user_id
10168 .as_str()
10169 };
10170
10171 let chain = o2c_gen.generate_chain(
10172 &company.code,
10173 customer,
10174 &materials,
10175 so_date,
10176 start_date.year() as u16,
10177 fiscal_period,
10178 created_by,
10179 );
10180
10181 flows.sales_orders.push(chain.sales_order.clone());
10183 flows.deliveries.extend(chain.deliveries.clone());
10184 if let Some(ci) = &chain.customer_invoice {
10185 flows.customer_invoices.push(ci.clone());
10186 }
10187 if let Some(receipt) = &chain.customer_receipt {
10188 flows.payments.push(receipt.clone());
10189 }
10190 for receipt in &chain.remainder_receipts {
10192 flows.payments.push(receipt.clone());
10193 }
10194 flows.o2c_chains.push(chain);
10195
10196 if let Some(pb) = &pb {
10197 pb.inc(1);
10198 }
10199 }
10200
10201 if let Some(pb) = pb {
10202 pb.finish_with_message("O2C document flows complete");
10203 }
10204
10205 {
10209 let mut refs = Vec::new();
10210 for doc in &flows.purchase_orders {
10211 refs.extend(doc.header.document_references.iter().cloned());
10212 }
10213 for doc in &flows.goods_receipts {
10214 refs.extend(doc.header.document_references.iter().cloned());
10215 }
10216 for doc in &flows.vendor_invoices {
10217 refs.extend(doc.header.document_references.iter().cloned());
10218 }
10219 for doc in &flows.sales_orders {
10220 refs.extend(doc.header.document_references.iter().cloned());
10221 }
10222 for doc in &flows.deliveries {
10223 refs.extend(doc.header.document_references.iter().cloned());
10224 }
10225 for doc in &flows.customer_invoices {
10226 refs.extend(doc.header.document_references.iter().cloned());
10227 }
10228 for doc in &flows.payments {
10229 refs.extend(doc.header.document_references.iter().cloned());
10230 }
10231 debug!(
10232 "Collected {} document cross-references from document headers",
10233 refs.len()
10234 );
10235 flows.document_references = refs;
10236 }
10237
10238 Ok(())
10239 }
10240
10241 fn generate_journal_entries(
10243 &mut self,
10244 coa: &Arc<ChartOfAccounts>,
10245 ) -> SynthResult<Vec<JournalEntry>> {
10246 use datasynth_core::traits::ParallelGenerator;
10247
10248 let total = self.calculate_total_transactions();
10249 let pb = self.create_progress_bar(total, "Generating Journal Entries");
10250
10251 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10252 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10253 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10254
10255 let company_codes: Vec<String> = self
10256 .config
10257 .companies
10258 .iter()
10259 .map(|c| c.code.clone())
10260 .collect();
10261
10262 let mut generator = JournalEntryGenerator::new_with_params(
10263 self.config.transactions.clone(),
10264 Arc::clone(coa),
10265 company_codes,
10266 start_date,
10267 end_date,
10268 self.seed,
10269 );
10270 let bp = &self.config.business_processes;
10273 generator.set_business_process_weights(
10274 bp.o2c_weight,
10275 bp.p2p_weight,
10276 bp.r2r_weight,
10277 bp.h2r_weight,
10278 bp.a2r_weight,
10279 );
10280 generator
10285 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10286 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10287 let generator = generator;
10288
10289 let je_pack = self.primary_pack();
10293
10294 let mut generator = generator
10295 .with_master_data(
10296 &self.master_data.vendors,
10297 &self.master_data.customers,
10298 &self.master_data.materials,
10299 )
10300 .with_country_pack_names(je_pack)
10301 .with_country_pack_temporal(
10302 self.config.temporal_patterns.clone(),
10303 self.seed + 200,
10304 je_pack,
10305 )
10306 .with_persona_errors(true)
10307 .with_fraud_config(self.config.fraud.clone());
10308
10309 let temporal_enabled = self.config.temporal.enabled;
10314 let regimes_enabled = self.config.distributions.regime_changes.enabled;
10315 if temporal_enabled || regimes_enabled {
10316 let mut drift_config = if temporal_enabled {
10317 self.config.temporal.to_core_config()
10318 } else {
10319 datasynth_core::distributions::DriftConfig::default()
10322 };
10323 if regimes_enabled {
10324 self.config
10325 .distributions
10326 .regime_changes
10327 .apply_to(&mut drift_config, start_date);
10328 }
10329 generator = generator.with_drift_config(drift_config, self.seed + 100);
10330 }
10331
10332 self.check_memory_limit()?;
10334
10335 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
10337
10338 let entries = if total >= 10_000 && num_threads > 1 {
10342 let sub_generators = generator.split(num_threads);
10345 let entries_per_thread = total as usize / num_threads;
10346 let remainder = total as usize % num_threads;
10347
10348 let batches: Vec<Vec<JournalEntry>> = sub_generators
10349 .into_par_iter()
10350 .enumerate()
10351 .map(|(i, mut gen)| {
10352 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
10353 gen.generate_batch(count)
10354 })
10355 .collect();
10356
10357 let entries = JournalEntryGenerator::merge_results(batches);
10359
10360 if let Some(pb) = &pb {
10361 pb.inc(total);
10362 }
10363 entries
10364 } else {
10365 let mut entries = Vec::with_capacity(total as usize);
10367 for _ in 0..total {
10368 let entry = generator.generate();
10369 entries.push(entry);
10370 if let Some(pb) = &pb {
10371 pb.inc(1);
10372 }
10373 }
10374 entries
10375 };
10376
10377 if let Some(pb) = pb {
10378 pb.finish_with_message("Journal entries complete");
10379 }
10380
10381 Ok(entries)
10382 }
10383
10384 fn generate_jes_from_document_flows(
10389 &mut self,
10390 flows: &DocumentFlowSnapshot,
10391 ) -> SynthResult<Vec<JournalEntry>> {
10392 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
10393 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
10394
10395 let je_config = match self.resolve_coa_framework() {
10396 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
10397 CoAFramework::GermanSkr04 => {
10398 let fa = datasynth_core::FrameworkAccounts::german_gaap();
10399 DocumentFlowJeConfig::from(&fa)
10400 }
10401 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
10402 };
10403
10404 let populate_fec = je_config.populate_fec_fields;
10405 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
10406
10407 if populate_fec {
10411 let mut aux_lookup = std::collections::HashMap::new();
10412 for vendor in &self.master_data.vendors {
10413 if let Some(ref aux) = vendor.auxiliary_gl_account {
10414 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
10415 }
10416 }
10417 for customer in &self.master_data.customers {
10418 if let Some(ref aux) = customer.auxiliary_gl_account {
10419 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
10420 }
10421 }
10422 if !aux_lookup.is_empty() {
10423 generator.set_auxiliary_account_lookup(aux_lookup);
10424 }
10425 }
10426
10427 let mut entries = Vec::new();
10428
10429 for chain in &flows.p2p_chains {
10431 let chain_entries = generator.generate_from_p2p_chain(chain);
10432 entries.extend(chain_entries);
10433 if let Some(pb) = &pb {
10434 pb.inc(1);
10435 }
10436 }
10437
10438 for chain in &flows.o2c_chains {
10440 let chain_entries = generator.generate_from_o2c_chain(chain);
10441 entries.extend(chain_entries);
10442 if let Some(pb) = &pb {
10443 pb.inc(1);
10444 }
10445 }
10446
10447 if let Some(pb) = pb {
10448 pb.finish_with_message(format!(
10449 "Generated {} JEs from document flows",
10450 entries.len()
10451 ));
10452 }
10453
10454 Ok(entries)
10455 }
10456
10457 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10463 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10464
10465 let mut jes = Vec::with_capacity(payroll_runs.len());
10466
10467 for run in payroll_runs {
10468 let mut je = JournalEntry::new_simple(
10469 format!("JE-PAYROLL-{}", run.payroll_id),
10470 run.company_code.clone(),
10471 run.run_date,
10472 format!("Payroll {}", run.payroll_id),
10473 );
10474
10475 je.add_line(JournalEntryLine {
10477 line_number: 1,
10478 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10479 debit_amount: run.total_gross,
10480 reference: Some(run.payroll_id.clone()),
10481 text: Some(format!(
10482 "Payroll {} ({} employees)",
10483 run.payroll_id, run.employee_count
10484 )),
10485 ..Default::default()
10486 });
10487
10488 je.add_line(JournalEntryLine {
10490 line_number: 2,
10491 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10492 credit_amount: run.total_gross,
10493 reference: Some(run.payroll_id.clone()),
10494 ..Default::default()
10495 });
10496
10497 jes.push(je);
10498 }
10499
10500 jes
10501 }
10502
10503 fn link_document_flows_to_subledgers(
10508 &mut self,
10509 flows: &DocumentFlowSnapshot,
10510 ) -> SynthResult<SubledgerSnapshot> {
10511 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10512 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10513
10514 let vendor_names: std::collections::HashMap<String, String> = self
10516 .master_data
10517 .vendors
10518 .iter()
10519 .map(|v| (v.vendor_id.clone(), v.name.clone()))
10520 .collect();
10521 let customer_names: std::collections::HashMap<String, String> = self
10522 .master_data
10523 .customers
10524 .iter()
10525 .map(|c| (c.customer_id.clone(), c.name.clone()))
10526 .collect();
10527
10528 let mut linker = DocumentFlowLinker::new()
10529 .with_vendor_names(vendor_names)
10530 .with_customer_names(customer_names);
10531
10532 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10534 if let Some(pb) = &pb {
10535 pb.inc(flows.vendor_invoices.len() as u64);
10536 }
10537
10538 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10540 if let Some(pb) = &pb {
10541 pb.inc(flows.customer_invoices.len() as u64);
10542 }
10543
10544 if let Some(pb) = pb {
10545 pb.finish_with_message(format!(
10546 "Linked {} AP and {} AR invoices",
10547 ap_invoices.len(),
10548 ar_invoices.len()
10549 ));
10550 }
10551
10552 Ok(SubledgerSnapshot {
10553 ap_invoices,
10554 ar_invoices,
10555 fa_records: Vec::new(),
10556 inventory_positions: Vec::new(),
10557 inventory_movements: Vec::new(),
10558 ar_aging_reports: Vec::new(),
10560 ap_aging_reports: Vec::new(),
10561 depreciation_runs: Vec::new(),
10563 inventory_valuations: Vec::new(),
10564 dunning_runs: Vec::new(),
10566 dunning_letters: Vec::new(),
10567 })
10568 }
10569
10570 #[allow(clippy::too_many_arguments)]
10575 fn generate_ocpm_events(
10576 &mut self,
10577 flows: &DocumentFlowSnapshot,
10578 sourcing: &SourcingSnapshot,
10579 hr: &HrSnapshot,
10580 manufacturing: &ManufacturingSnapshot,
10581 banking: &BankingSnapshot,
10582 audit: &AuditSnapshot,
10583 financial_reporting: &FinancialReportingSnapshot,
10584 ) -> SynthResult<OcpmSnapshot> {
10585 let total_chains = flows.p2p_chains.len()
10586 + flows.o2c_chains.len()
10587 + sourcing.sourcing_projects.len()
10588 + hr.payroll_runs.len()
10589 + manufacturing.production_orders.len()
10590 + banking.customers.len()
10591 + audit.engagements.len()
10592 + financial_reporting.bank_reconciliations.len();
10593 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
10594
10595 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
10597 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
10598
10599 let ocpm_config = OcpmGeneratorConfig {
10601 generate_p2p: true,
10602 generate_o2c: true,
10603 generate_s2c: !sourcing.sourcing_projects.is_empty(),
10604 generate_h2r: !hr.payroll_runs.is_empty(),
10605 generate_mfg: !manufacturing.production_orders.is_empty(),
10606 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
10607 generate_bank: !banking.customers.is_empty(),
10608 generate_audit: !audit.engagements.is_empty(),
10609 happy_path_rate: 0.75,
10610 exception_path_rate: 0.20,
10611 error_path_rate: 0.05,
10612 add_duration_variability: true,
10613 duration_std_dev_factor: 0.3,
10614 };
10615 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
10616 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
10617
10618 let available_users: Vec<String> = self
10620 .master_data
10621 .employees
10622 .iter()
10623 .take(20)
10624 .map(|e| e.user_id.clone())
10625 .collect();
10626
10627 let fallback_date =
10629 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
10630 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10631 .unwrap_or(fallback_date);
10632 let base_midnight = base_date
10633 .and_hms_opt(0, 0, 0)
10634 .expect("midnight is always valid");
10635 let base_datetime =
10636 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
10637
10638 let add_result = |event_log: &mut OcpmEventLog,
10640 result: datasynth_ocpm::CaseGenerationResult| {
10641 for event in result.events {
10642 event_log.add_event(event);
10643 }
10644 for object in result.objects {
10645 event_log.add_object(object);
10646 }
10647 for relationship in result.relationships {
10648 event_log.add_relationship(relationship);
10649 }
10650 for corr in result.correlation_events {
10651 event_log.add_correlation_event(corr);
10652 }
10653 event_log.add_case(result.case_trace);
10654 };
10655
10656 for chain in &flows.p2p_chains {
10658 let po = &chain.purchase_order;
10659 let documents = P2pDocuments::new(
10660 &po.header.document_id,
10661 &po.vendor_id,
10662 &po.header.company_code,
10663 po.total_net_amount,
10664 &po.header.currency,
10665 &ocpm_uuid_factory,
10666 )
10667 .with_goods_receipt(
10668 chain
10669 .goods_receipts
10670 .first()
10671 .map(|gr| gr.header.document_id.as_str())
10672 .unwrap_or(""),
10673 &ocpm_uuid_factory,
10674 )
10675 .with_invoice(
10676 chain
10677 .vendor_invoice
10678 .as_ref()
10679 .map(|vi| vi.header.document_id.as_str())
10680 .unwrap_or(""),
10681 &ocpm_uuid_factory,
10682 )
10683 .with_payment(
10684 chain
10685 .payment
10686 .as_ref()
10687 .map(|p| p.header.document_id.as_str())
10688 .unwrap_or(""),
10689 &ocpm_uuid_factory,
10690 );
10691
10692 let start_time =
10693 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10694 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10695 add_result(&mut event_log, result);
10696
10697 if let Some(pb) = &pb {
10698 pb.inc(1);
10699 }
10700 }
10701
10702 for chain in &flows.o2c_chains {
10704 let so = &chain.sales_order;
10705 let documents = O2cDocuments::new(
10706 &so.header.document_id,
10707 &so.customer_id,
10708 &so.header.company_code,
10709 so.total_net_amount,
10710 &so.header.currency,
10711 &ocpm_uuid_factory,
10712 )
10713 .with_delivery(
10714 chain
10715 .deliveries
10716 .first()
10717 .map(|d| d.header.document_id.as_str())
10718 .unwrap_or(""),
10719 &ocpm_uuid_factory,
10720 )
10721 .with_invoice(
10722 chain
10723 .customer_invoice
10724 .as_ref()
10725 .map(|ci| ci.header.document_id.as_str())
10726 .unwrap_or(""),
10727 &ocpm_uuid_factory,
10728 )
10729 .with_receipt(
10730 chain
10731 .customer_receipt
10732 .as_ref()
10733 .map(|r| r.header.document_id.as_str())
10734 .unwrap_or(""),
10735 &ocpm_uuid_factory,
10736 );
10737
10738 let start_time =
10739 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10740 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10741 add_result(&mut event_log, result);
10742
10743 if let Some(pb) = &pb {
10744 pb.inc(1);
10745 }
10746 }
10747
10748 for project in &sourcing.sourcing_projects {
10750 let vendor_id = sourcing
10752 .contracts
10753 .iter()
10754 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10755 .map(|c| c.vendor_id.clone())
10756 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10757 .or_else(|| {
10758 self.master_data
10759 .vendors
10760 .first()
10761 .map(|v| v.vendor_id.clone())
10762 })
10763 .unwrap_or_else(|| "V000".to_string());
10764 let mut docs = S2cDocuments::new(
10765 &project.project_id,
10766 &vendor_id,
10767 &project.company_code,
10768 project.estimated_annual_spend,
10769 &ocpm_uuid_factory,
10770 );
10771 if let Some(rfx) = sourcing
10773 .rfx_events
10774 .iter()
10775 .find(|r| r.sourcing_project_id == project.project_id)
10776 {
10777 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10778 if let Some(bid) = sourcing.bids.iter().find(|b| {
10780 b.rfx_id == rfx.rfx_id
10781 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10782 }) {
10783 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10784 }
10785 }
10786 if let Some(contract) = sourcing
10788 .contracts
10789 .iter()
10790 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10791 {
10792 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10793 }
10794 let start_time = base_datetime - chrono::Duration::days(90);
10795 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10796 add_result(&mut event_log, result);
10797
10798 if let Some(pb) = &pb {
10799 pb.inc(1);
10800 }
10801 }
10802
10803 for run in &hr.payroll_runs {
10805 let employee_id = hr
10807 .payroll_line_items
10808 .iter()
10809 .find(|li| li.payroll_id == run.payroll_id)
10810 .map(|li| li.employee_id.as_str())
10811 .unwrap_or("EMP000");
10812 let docs = H2rDocuments::new(
10813 &run.payroll_id,
10814 employee_id,
10815 &run.company_code,
10816 run.total_gross,
10817 &ocpm_uuid_factory,
10818 )
10819 .with_time_entries(
10820 hr.time_entries
10821 .iter()
10822 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10823 .take(5)
10824 .map(|t| t.entry_id.as_str())
10825 .collect(),
10826 );
10827 let start_time = base_datetime - chrono::Duration::days(30);
10828 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10829 add_result(&mut event_log, result);
10830
10831 if let Some(pb) = &pb {
10832 pb.inc(1);
10833 }
10834 }
10835
10836 for order in &manufacturing.production_orders {
10838 let mut docs = MfgDocuments::new(
10839 &order.order_id,
10840 &order.material_id,
10841 &order.company_code,
10842 order.planned_quantity,
10843 &ocpm_uuid_factory,
10844 )
10845 .with_operations(
10846 order
10847 .operations
10848 .iter()
10849 .map(|o| format!("OP-{:04}", o.operation_number))
10850 .collect::<Vec<_>>()
10851 .iter()
10852 .map(std::string::String::as_str)
10853 .collect(),
10854 );
10855 if let Some(insp) = manufacturing
10857 .quality_inspections
10858 .iter()
10859 .find(|i| i.reference_id == order.order_id)
10860 {
10861 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10862 }
10863 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10865 cc.items
10866 .iter()
10867 .any(|item| item.material_id == order.material_id)
10868 }) {
10869 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10870 }
10871 let start_time = base_datetime - chrono::Duration::days(60);
10872 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10873 add_result(&mut event_log, result);
10874
10875 if let Some(pb) = &pb {
10876 pb.inc(1);
10877 }
10878 }
10879
10880 for customer in &banking.customers {
10882 let customer_id_str = customer.customer_id.to_string();
10883 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10884 if let Some(account) = banking
10886 .accounts
10887 .iter()
10888 .find(|a| a.primary_owner_id == customer.customer_id)
10889 {
10890 let account_id_str = account.account_id.to_string();
10891 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10892 let txn_strs: Vec<String> = banking
10894 .transactions
10895 .iter()
10896 .filter(|t| t.account_id == account.account_id)
10897 .take(10)
10898 .map(|t| t.transaction_id.to_string())
10899 .collect();
10900 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10901 let txn_amounts: Vec<rust_decimal::Decimal> = banking
10902 .transactions
10903 .iter()
10904 .filter(|t| t.account_id == account.account_id)
10905 .take(10)
10906 .map(|t| t.amount)
10907 .collect();
10908 if !txn_ids.is_empty() {
10909 docs = docs.with_transactions(txn_ids, txn_amounts);
10910 }
10911 }
10912 let start_time = base_datetime - chrono::Duration::days(180);
10913 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10914 add_result(&mut event_log, result);
10915
10916 if let Some(pb) = &pb {
10917 pb.inc(1);
10918 }
10919 }
10920
10921 for engagement in &audit.engagements {
10923 let engagement_id_str = engagement.engagement_id.to_string();
10924 let docs = AuditDocuments::new(
10925 &engagement_id_str,
10926 &engagement.client_entity_id,
10927 &ocpm_uuid_factory,
10928 )
10929 .with_workpapers(
10930 audit
10931 .workpapers
10932 .iter()
10933 .filter(|w| w.engagement_id == engagement.engagement_id)
10934 .take(10)
10935 .map(|w| w.workpaper_id.to_string())
10936 .collect::<Vec<_>>()
10937 .iter()
10938 .map(std::string::String::as_str)
10939 .collect(),
10940 )
10941 .with_evidence(
10942 audit
10943 .evidence
10944 .iter()
10945 .filter(|e| e.engagement_id == engagement.engagement_id)
10946 .take(10)
10947 .map(|e| e.evidence_id.to_string())
10948 .collect::<Vec<_>>()
10949 .iter()
10950 .map(std::string::String::as_str)
10951 .collect(),
10952 )
10953 .with_risks(
10954 audit
10955 .risk_assessments
10956 .iter()
10957 .filter(|r| r.engagement_id == engagement.engagement_id)
10958 .take(5)
10959 .map(|r| r.risk_id.to_string())
10960 .collect::<Vec<_>>()
10961 .iter()
10962 .map(std::string::String::as_str)
10963 .collect(),
10964 )
10965 .with_findings(
10966 audit
10967 .findings
10968 .iter()
10969 .filter(|f| f.engagement_id == engagement.engagement_id)
10970 .take(5)
10971 .map(|f| f.finding_id.to_string())
10972 .collect::<Vec<_>>()
10973 .iter()
10974 .map(std::string::String::as_str)
10975 .collect(),
10976 )
10977 .with_judgments(
10978 audit
10979 .judgments
10980 .iter()
10981 .filter(|j| j.engagement_id == engagement.engagement_id)
10982 .take(5)
10983 .map(|j| j.judgment_id.to_string())
10984 .collect::<Vec<_>>()
10985 .iter()
10986 .map(std::string::String::as_str)
10987 .collect(),
10988 );
10989 let start_time = base_datetime - chrono::Duration::days(120);
10990 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10991 add_result(&mut event_log, result);
10992
10993 if let Some(pb) = &pb {
10994 pb.inc(1);
10995 }
10996 }
10997
10998 for recon in &financial_reporting.bank_reconciliations {
11000 let docs = BankReconDocuments::new(
11001 &recon.reconciliation_id,
11002 &recon.bank_account_id,
11003 &recon.company_code,
11004 recon.bank_ending_balance,
11005 &ocpm_uuid_factory,
11006 )
11007 .with_statement_lines(
11008 recon
11009 .statement_lines
11010 .iter()
11011 .take(20)
11012 .map(|l| l.line_id.as_str())
11013 .collect(),
11014 )
11015 .with_reconciling_items(
11016 recon
11017 .reconciling_items
11018 .iter()
11019 .take(10)
11020 .map(|i| i.item_id.as_str())
11021 .collect(),
11022 );
11023 let start_time = base_datetime - chrono::Duration::days(30);
11024 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11025 add_result(&mut event_log, result);
11026
11027 if let Some(pb) = &pb {
11028 pb.inc(1);
11029 }
11030 }
11031
11032 event_log.compute_variants();
11034
11035 let summary = event_log.summary();
11036
11037 if let Some(pb) = pb {
11038 pb.finish_with_message(format!(
11039 "Generated {} OCPM events, {} objects",
11040 summary.event_count, summary.object_count
11041 ));
11042 }
11043
11044 Ok(OcpmSnapshot {
11045 event_count: summary.event_count,
11046 object_count: summary.object_count,
11047 case_count: summary.case_count,
11048 event_log: Some(event_log),
11049 })
11050 }
11051
11052 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11054 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11055
11056 let total_rate = if self.config.anomaly_injection.enabled {
11059 self.config.anomaly_injection.rates.total_rate
11060 } else if self.config.fraud.enabled {
11061 self.config.fraud.fraud_rate
11062 } else {
11063 0.02
11064 };
11065
11066 let fraud_rate = if self.config.anomaly_injection.enabled {
11067 self.config.anomaly_injection.rates.fraud_rate
11068 } else {
11069 AnomalyRateConfig::default().fraud_rate
11070 };
11071
11072 let error_rate = if self.config.anomaly_injection.enabled {
11073 self.config.anomaly_injection.rates.error_rate
11074 } else {
11075 AnomalyRateConfig::default().error_rate
11076 };
11077
11078 let process_issue_rate = if self.config.anomaly_injection.enabled {
11079 self.config.anomaly_injection.rates.process_rate
11080 } else {
11081 AnomalyRateConfig::default().process_issue_rate
11082 };
11083
11084 let anomaly_config = AnomalyInjectorConfig {
11085 rates: AnomalyRateConfig {
11086 total_rate,
11087 fraud_rate,
11088 error_rate,
11089 process_issue_rate,
11090 ..Default::default()
11091 },
11092 seed: self.seed + 5000,
11093 ..Default::default()
11094 };
11095
11096 let mut injector = AnomalyInjector::new(anomaly_config);
11097 let result = injector.process_entries(entries);
11098
11099 if let Some(pb) = &pb {
11100 pb.inc(entries.len() as u64);
11101 pb.finish_with_message("Anomaly injection complete");
11102 }
11103
11104 let mut by_type = HashMap::new();
11105 for label in &result.labels {
11106 *by_type
11107 .entry(format!("{:?}", label.anomaly_type))
11108 .or_insert(0) += 1;
11109 }
11110
11111 Ok(AnomalyLabels {
11112 labels: result.labels,
11113 summary: Some(result.summary),
11114 by_type,
11115 })
11116 }
11117
11118 fn validate_journal_entries(
11127 &mut self,
11128 entries: &[JournalEntry],
11129 ) -> SynthResult<BalanceValidationResult> {
11130 let clean_entries: Vec<&JournalEntry> = entries
11132 .iter()
11133 .filter(|e| {
11134 e.header
11135 .header_text
11136 .as_ref()
11137 .map(|t| !t.contains("[HUMAN_ERROR:"))
11138 .unwrap_or(true)
11139 })
11140 .collect();
11141
11142 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11143
11144 let config = BalanceTrackerConfig {
11146 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
11150 };
11151 let validation_currency = self
11152 .config
11153 .companies
11154 .first()
11155 .map(|c| c.currency.clone())
11156 .unwrap_or_else(|| "USD".to_string());
11157
11158 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11159
11160 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11162 let errors = tracker.apply_entries(&clean_refs);
11163
11164 if let Some(pb) = &pb {
11165 pb.inc(entries.len() as u64);
11166 }
11167
11168 let has_unbalanced = tracker
11171 .get_validation_errors()
11172 .iter()
11173 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11174
11175 let mut all_errors = errors;
11178 all_errors.extend(tracker.get_validation_errors().iter().cloned());
11179 let company_codes: Vec<String> = self
11180 .config
11181 .companies
11182 .iter()
11183 .map(|c| c.code.clone())
11184 .collect();
11185
11186 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11187 .map(|d| d + chrono::Months::new(self.config.global.period_months))
11188 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11189
11190 for company_code in &company_codes {
11191 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11192 all_errors.push(e);
11193 }
11194 }
11195
11196 let stats = tracker.get_statistics();
11198
11199 let is_balanced = all_errors.is_empty();
11201
11202 if let Some(pb) = pb {
11203 let msg = if is_balanced {
11204 "Balance validation passed"
11205 } else {
11206 "Balance validation completed with errors"
11207 };
11208 pb.finish_with_message(msg);
11209 }
11210
11211 Ok(BalanceValidationResult {
11212 validated: true,
11213 is_balanced,
11214 entries_processed: stats.entries_processed,
11215 total_debits: stats.total_debits,
11216 total_credits: stats.total_credits,
11217 accounts_tracked: stats.accounts_tracked,
11218 companies_tracked: stats.companies_tracked,
11219 validation_errors: all_errors,
11220 has_unbalanced_entries: has_unbalanced,
11221 })
11222 }
11223
11224 fn inject_data_quality(
11229 &mut self,
11230 entries: &mut [JournalEntry],
11231 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11232 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11233
11234 let config = if self.config.data_quality.enabled {
11237 let dq = &self.config.data_quality;
11238 DataQualityConfig {
11239 enable_missing_values: dq.missing_values.enabled,
11240 missing_values: datasynth_generators::MissingValueConfig {
11241 global_rate: dq.effective_missing_rate(),
11242 ..Default::default()
11243 },
11244 enable_format_variations: dq.format_variations.enabled,
11245 format_variations: datasynth_generators::FormatVariationConfig {
11246 date_variation_rate: dq.format_variations.dates.rate,
11247 amount_variation_rate: dq.format_variations.amounts.rate,
11248 identifier_variation_rate: dq.format_variations.identifiers.rate,
11249 ..Default::default()
11250 },
11251 enable_duplicates: dq.duplicates.enabled,
11252 duplicates: datasynth_generators::DuplicateConfig {
11253 duplicate_rate: dq.effective_duplicate_rate(),
11254 ..Default::default()
11255 },
11256 enable_typos: dq.typos.enabled,
11257 typos: datasynth_generators::TypoConfig {
11258 char_error_rate: dq.effective_typo_rate(),
11259 ..Default::default()
11260 },
11261 enable_encoding_issues: dq.encoding_issues.enabled,
11262 encoding_issue_rate: dq.encoding_issues.rate,
11263 seed: self.seed.wrapping_add(77), track_statistics: true,
11265 }
11266 } else {
11267 DataQualityConfig::minimal()
11268 };
11269 let mut injector = DataQualityInjector::new(config);
11270
11271 injector.set_country_pack(self.primary_pack().clone());
11273
11274 let context = HashMap::new();
11276
11277 for entry in entries.iter_mut() {
11278 if let Some(text) = &entry.header.header_text {
11280 let processed = injector.process_text_field(
11281 "header_text",
11282 text,
11283 &entry.header.document_id.to_string(),
11284 &context,
11285 );
11286 match processed {
11287 Some(new_text) if new_text != *text => {
11288 entry.header.header_text = Some(new_text);
11289 }
11290 None => {
11291 entry.header.header_text = None; }
11293 _ => {}
11294 }
11295 }
11296
11297 if let Some(ref_text) = &entry.header.reference {
11299 let processed = injector.process_text_field(
11300 "reference",
11301 ref_text,
11302 &entry.header.document_id.to_string(),
11303 &context,
11304 );
11305 match processed {
11306 Some(new_text) if new_text != *ref_text => {
11307 entry.header.reference = Some(new_text);
11308 }
11309 None => {
11310 entry.header.reference = None;
11311 }
11312 _ => {}
11313 }
11314 }
11315
11316 let user_persona = entry.header.user_persona.clone();
11318 if let Some(processed) = injector.process_text_field(
11319 "user_persona",
11320 &user_persona,
11321 &entry.header.document_id.to_string(),
11322 &context,
11323 ) {
11324 if processed != user_persona {
11325 entry.header.user_persona = processed;
11326 }
11327 }
11328
11329 for line in &mut entry.lines {
11331 if let Some(ref text) = line.line_text {
11333 let processed = injector.process_text_field(
11334 "line_text",
11335 text,
11336 &entry.header.document_id.to_string(),
11337 &context,
11338 );
11339 match processed {
11340 Some(new_text) if new_text != *text => {
11341 line.line_text = Some(new_text);
11342 }
11343 None => {
11344 line.line_text = None;
11345 }
11346 _ => {}
11347 }
11348 }
11349
11350 if let Some(cc) = &line.cost_center {
11352 let processed = injector.process_text_field(
11353 "cost_center",
11354 cc,
11355 &entry.header.document_id.to_string(),
11356 &context,
11357 );
11358 match processed {
11359 Some(new_cc) if new_cc != *cc => {
11360 line.cost_center = Some(new_cc);
11361 }
11362 None => {
11363 line.cost_center = None;
11364 }
11365 _ => {}
11366 }
11367 }
11368 }
11369
11370 if let Some(pb) = &pb {
11371 pb.inc(1);
11372 }
11373 }
11374
11375 if let Some(pb) = pb {
11376 pb.finish_with_message("Data quality injection complete");
11377 }
11378
11379 let quality_issues = injector.issues().to_vec();
11380 Ok((injector.stats().clone(), quality_issues))
11381 }
11382
11383 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
11394 let use_fsm = self
11396 .config
11397 .audit
11398 .fsm
11399 .as_ref()
11400 .map(|f| f.enabled)
11401 .unwrap_or(false);
11402
11403 if use_fsm {
11404 return self.generate_audit_data_with_fsm(entries);
11405 }
11406
11407 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11409 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11410 let fiscal_year = start_date.year() as u16;
11411 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11412
11413 let total_revenue: rust_decimal::Decimal = entries
11415 .iter()
11416 .flat_map(|e| e.lines.iter())
11417 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11418 .map(|l| l.credit_amount)
11419 .sum();
11420
11421 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
11423
11424 let mut snapshot = AuditSnapshot::default();
11425
11426 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
11428 engagement_gen.set_team_config(&self.config.audit.team);
11431
11432 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
11433 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
11437 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
11438 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
11439 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
11440 finding_gen.set_template_provider(self.template_provider.clone());
11442 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
11443 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
11444 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
11445 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
11446 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
11447 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
11448 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
11449
11450 let accounts: Vec<String> = self
11452 .coa
11453 .as_ref()
11454 .map(|coa| {
11455 coa.get_postable_accounts()
11456 .iter()
11457 .map(|acc| acc.account_code().to_string())
11458 .collect()
11459 })
11460 .unwrap_or_default();
11461
11462 for (i, company) in self.config.companies.iter().enumerate() {
11464 let company_revenue = total_revenue
11466 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
11467
11468 let engagements_for_company =
11470 self.phase_config.audit_engagements / self.config.companies.len().max(1);
11471 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11472 1
11473 } else {
11474 0
11475 };
11476
11477 for _eng_idx in 0..(engagements_for_company + extra) {
11478 let eng_type =
11483 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
11484
11485 let mut engagement = engagement_gen.generate_engagement(
11487 &company.code,
11488 &company.name,
11489 fiscal_year,
11490 period_end,
11491 company_revenue,
11492 Some(eng_type),
11493 );
11494
11495 if !self.master_data.employees.is_empty() {
11497 let emp_count = self.master_data.employees.len();
11498 let base = (i * 10 + _eng_idx) % emp_count;
11500 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11501 .employee_id
11502 .clone();
11503 engagement.engagement_manager_id = self.master_data.employees
11504 [(base + 1) % emp_count]
11505 .employee_id
11506 .clone();
11507 let real_team: Vec<String> = engagement
11508 .team_member_ids
11509 .iter()
11510 .enumerate()
11511 .map(|(j, _)| {
11512 self.master_data.employees[(base + 2 + j) % emp_count]
11513 .employee_id
11514 .clone()
11515 })
11516 .collect();
11517 engagement.team_member_ids = real_team;
11518 }
11519
11520 if let Some(pb) = &pb {
11521 pb.inc(1);
11522 }
11523
11524 let team_members: Vec<String> = engagement.team_member_ids.clone();
11526
11527 let workpapers = if self.config.audit.generate_workpapers {
11533 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
11534 } else {
11535 Vec::new()
11536 };
11537
11538 for wp in &workpapers {
11539 if let Some(pb) = &pb {
11540 pb.inc(1);
11541 }
11542
11543 let evidence = evidence_gen.generate_evidence_for_workpaper(
11545 wp,
11546 &team_members,
11547 wp.preparer_date,
11548 );
11549
11550 for _ in &evidence {
11551 if let Some(pb) = &pb {
11552 pb.inc(1);
11553 }
11554 }
11555
11556 snapshot.evidence.extend(evidence);
11557 }
11558
11559 let risks =
11561 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11562
11563 for _ in &risks {
11564 if let Some(pb) = &pb {
11565 pb.inc(1);
11566 }
11567 }
11568 snapshot.risk_assessments.extend(risks);
11569
11570 let findings = finding_gen.generate_findings_for_engagement(
11572 &engagement,
11573 &workpapers,
11574 &team_members,
11575 );
11576
11577 for _ in &findings {
11578 if let Some(pb) = &pb {
11579 pb.inc(1);
11580 }
11581 }
11582 snapshot.findings.extend(findings);
11583
11584 let judgments =
11586 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11587
11588 for _ in &judgments {
11589 if let Some(pb) = &pb {
11590 pb.inc(1);
11591 }
11592 }
11593 snapshot.judgments.extend(judgments);
11594
11595 let (confs, resps) =
11597 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
11598 snapshot.confirmations.extend(confs);
11599 snapshot.confirmation_responses.extend(resps);
11600
11601 let team_pairs: Vec<(String, String)> = team_members
11603 .iter()
11604 .map(|id| {
11605 let name = self
11606 .master_data
11607 .employees
11608 .iter()
11609 .find(|e| e.employee_id == *id)
11610 .map(|e| e.display_name.clone())
11611 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
11612 (id.clone(), name)
11613 })
11614 .collect();
11615 for wp in &workpapers {
11616 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
11617 snapshot.procedure_steps.extend(steps);
11618 }
11619
11620 for wp in &workpapers {
11622 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
11623 snapshot.samples.push(sample);
11624 }
11625 }
11626
11627 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
11629 snapshot.analytical_results.extend(analytical);
11630
11631 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
11633 snapshot.ia_functions.push(ia_func);
11634 snapshot.ia_reports.extend(ia_reports);
11635
11636 let vendor_names: Vec<String> = self
11638 .master_data
11639 .vendors
11640 .iter()
11641 .map(|v| v.name.clone())
11642 .collect();
11643 let customer_names: Vec<String> = self
11644 .master_data
11645 .customers
11646 .iter()
11647 .map(|c| c.name.clone())
11648 .collect();
11649 let (parties, rp_txns) =
11650 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
11651 snapshot.related_parties.extend(parties);
11652 snapshot.related_party_transactions.extend(rp_txns);
11653
11654 snapshot.workpapers.extend(workpapers);
11656
11657 {
11659 let scope_id = format!(
11660 "SCOPE-{}-{}",
11661 engagement.engagement_id.simple(),
11662 &engagement.client_entity_id
11663 );
11664 let scope = datasynth_core::models::audit::AuditScope::new(
11665 scope_id.clone(),
11666 engagement.engagement_id.to_string(),
11667 engagement.client_entity_id.clone(),
11668 engagement.materiality,
11669 );
11670 let mut eng = engagement;
11672 eng.scope_id = Some(scope_id);
11673 snapshot.audit_scopes.push(scope);
11674 snapshot.engagements.push(eng);
11675 }
11676 }
11677 }
11678
11679 if self.config.companies.len() > 1 {
11683 let group_materiality = snapshot
11686 .engagements
11687 .first()
11688 .map(|e| e.materiality)
11689 .unwrap_or_else(|| {
11690 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11691 total_revenue * pct
11692 });
11693
11694 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11695 let group_engagement_id = snapshot
11696 .engagements
11697 .first()
11698 .map(|e| e.engagement_id.to_string())
11699 .unwrap_or_else(|| "GROUP-ENG".to_string());
11700
11701 let component_snapshot = component_gen.generate(
11702 &self.config.companies,
11703 group_materiality,
11704 &group_engagement_id,
11705 period_end,
11706 );
11707
11708 snapshot.component_auditors = component_snapshot.component_auditors;
11709 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11710 snapshot.component_instructions = component_snapshot.component_instructions;
11711 snapshot.component_reports = component_snapshot.component_reports;
11712
11713 info!(
11714 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11715 snapshot.component_auditors.len(),
11716 snapshot.component_instructions.len(),
11717 snapshot.component_reports.len(),
11718 );
11719 }
11720
11721 {
11725 let applicable_framework = self
11726 .config
11727 .accounting_standards
11728 .framework
11729 .as_ref()
11730 .map(|f| format!("{f:?}"))
11731 .unwrap_or_else(|| "IFRS".to_string());
11732
11733 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11734 let entity_count = self.config.companies.len();
11735
11736 for engagement in &snapshot.engagements {
11737 let company = self
11738 .config
11739 .companies
11740 .iter()
11741 .find(|c| c.code == engagement.client_entity_id);
11742 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11743 let letter_date = engagement.planning_start;
11744 let letter = letter_gen.generate(
11745 &engagement.engagement_id.to_string(),
11746 &engagement.client_name,
11747 entity_count,
11748 engagement.period_end_date,
11749 currency,
11750 &applicable_framework,
11751 letter_date,
11752 );
11753 snapshot.engagement_letters.push(letter);
11754 }
11755
11756 info!(
11757 "ISA 210 engagement letters: {} generated",
11758 snapshot.engagement_letters.len()
11759 );
11760 }
11761
11762 if self.phase_config.generate_legal_documents {
11766 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
11767 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
11768 for engagement in &snapshot.engagements {
11769 let employee_names: Vec<String> = self
11773 .master_data
11774 .employees
11775 .iter()
11776 .filter(|e| e.company_code == engagement.client_entity_id)
11777 .map(|e| e.display_name.clone())
11778 .collect();
11779 let names_to_use = if !employee_names.is_empty() {
11780 employee_names
11781 } else {
11782 self.master_data
11783 .employees
11784 .iter()
11785 .take(10)
11786 .map(|e| e.display_name.clone())
11787 .collect()
11788 };
11789 let docs = legal_gen.generate(
11790 &engagement.client_entity_id,
11791 engagement.fiscal_year as i32,
11792 &names_to_use,
11793 );
11794 snapshot.legal_documents.extend(docs);
11795 }
11796 info!(
11797 "v3.3.0 legal documents: {} emitted across {} engagements",
11798 snapshot.legal_documents.len(),
11799 snapshot.engagements.len()
11800 );
11801 }
11802
11803 if self.phase_config.generate_it_controls {
11813 use datasynth_generators::it_controls_generator::ItControlsGenerator;
11814 use std::collections::HashMap;
11815 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
11816
11817 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
11820 HashMap::new();
11821 for engagement in &snapshot.engagements {
11822 let entry = by_company
11823 .entry(engagement.client_entity_id.clone())
11824 .or_insert((engagement.planning_start, engagement.period_end_date));
11825 if engagement.planning_start < entry.0 {
11826 entry.0 = engagement.planning_start;
11827 }
11828 if engagement.period_end_date > entry.1 {
11829 entry.1 = engagement.period_end_date;
11830 }
11831 }
11832
11833 let systems: Vec<String> = vec![
11837 "SAP ECC",
11838 "SAP S/4 HANA",
11839 "Oracle EBS",
11840 "Workday",
11841 "NetSuite",
11842 "Active Directory",
11843 "SharePoint",
11844 "Salesforce",
11845 "ServiceNow",
11846 "Jira",
11847 "GitHub Enterprise",
11848 "AWS Console",
11849 "Okta",
11850 ]
11851 .into_iter()
11852 .map(String::from)
11853 .collect();
11854
11855 for (company_code, (start, end)) in by_company {
11856 let emps: Vec<(String, String)> = self
11857 .master_data
11858 .employees
11859 .iter()
11860 .filter(|e| e.company_code == company_code)
11861 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11862 .collect();
11863 if emps.is_empty() {
11864 continue;
11865 }
11866 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
11869 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
11870 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
11871 snapshot.it_controls_access_logs.extend(access_logs);
11872 snapshot.it_controls_change_records.extend(change_records);
11873 }
11874
11875 info!(
11876 "v3.3.0 IT controls: {} access logs, {} change records",
11877 snapshot.it_controls_access_logs.len(),
11878 snapshot.it_controls_change_records.len()
11879 );
11880 }
11881
11882 {
11886 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11887 let entity_codes: Vec<String> = self
11888 .config
11889 .companies
11890 .iter()
11891 .map(|c| c.code.clone())
11892 .collect();
11893 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11894 info!(
11895 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11896 subsequent.len(),
11897 subsequent
11898 .iter()
11899 .filter(|e| matches!(
11900 e.classification,
11901 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11902 ))
11903 .count(),
11904 subsequent
11905 .iter()
11906 .filter(|e| matches!(
11907 e.classification,
11908 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11909 ))
11910 .count(),
11911 );
11912 snapshot.subsequent_events = subsequent;
11913 }
11914
11915 {
11919 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11920 let entity_codes: Vec<String> = self
11921 .config
11922 .companies
11923 .iter()
11924 .map(|c| c.code.clone())
11925 .collect();
11926 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11927 info!(
11928 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11929 soc_snapshot.service_organizations.len(),
11930 soc_snapshot.soc_reports.len(),
11931 soc_snapshot.user_entity_controls.len(),
11932 );
11933 snapshot.service_organizations = soc_snapshot.service_organizations;
11934 snapshot.soc_reports = soc_snapshot.soc_reports;
11935 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11936 }
11937
11938 {
11942 use datasynth_generators::audit::going_concern_generator::{
11943 GoingConcernGenerator, GoingConcernInput,
11944 };
11945 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11946 let entity_codes: Vec<String> = self
11947 .config
11948 .companies
11949 .iter()
11950 .map(|c| c.code.clone())
11951 .collect();
11952 let assessment_date = period_end + chrono::Duration::days(75);
11954 let period_label = format!("FY{}", period_end.year());
11955
11956 let gc_inputs: Vec<GoingConcernInput> = self
11967 .config
11968 .companies
11969 .iter()
11970 .map(|company| {
11971 let code = &company.code;
11972 let mut revenue = rust_decimal::Decimal::ZERO;
11973 let mut expenses = rust_decimal::Decimal::ZERO;
11974 let mut current_assets = rust_decimal::Decimal::ZERO;
11975 let mut current_liabs = rust_decimal::Decimal::ZERO;
11976 let mut total_debt = rust_decimal::Decimal::ZERO;
11977
11978 for je in entries.iter().filter(|je| &je.header.company_code == code) {
11979 for line in &je.lines {
11980 let acct = line.gl_account.as_str();
11981 let net = line.debit_amount - line.credit_amount;
11982 if acct.starts_with('4') {
11983 revenue -= net;
11985 } else if acct.starts_with('6') {
11986 expenses += net;
11988 }
11989 if acct.starts_with('1') {
11991 if let Ok(n) = acct.parse::<u32>() {
11993 if (1000..=1499).contains(&n) {
11994 current_assets += net;
11995 }
11996 }
11997 } else if acct.starts_with('2') {
11998 if let Ok(n) = acct.parse::<u32>() {
11999 if (2000..=2499).contains(&n) {
12000 current_liabs -= net; } else if (2500..=2999).contains(&n) {
12003 total_debt -= net;
12005 }
12006 }
12007 }
12008 }
12009 }
12010
12011 let net_income = revenue - expenses;
12012 let working_capital = current_assets - current_liabs;
12013 let operating_cash_flow = net_income;
12016
12017 GoingConcernInput {
12018 entity_code: code.clone(),
12019 net_income,
12020 working_capital,
12021 operating_cash_flow,
12022 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12023 assessment_date,
12024 }
12025 })
12026 .collect();
12027
12028 let assessments = if gc_inputs.is_empty() {
12029 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12030 } else {
12031 gc_gen.generate_for_entities_with_inputs(
12032 &entity_codes,
12033 &gc_inputs,
12034 assessment_date,
12035 &period_label,
12036 )
12037 };
12038 info!(
12039 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12040 assessments.len(),
12041 assessments.iter().filter(|a| matches!(
12042 a.auditor_conclusion,
12043 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12044 )).count(),
12045 assessments.iter().filter(|a| matches!(
12046 a.auditor_conclusion,
12047 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12048 )).count(),
12049 assessments.iter().filter(|a| matches!(
12050 a.auditor_conclusion,
12051 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12052 )).count(),
12053 );
12054 snapshot.going_concern_assessments = assessments;
12055 }
12056
12057 {
12061 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12062 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12063 let entity_codes: Vec<String> = self
12064 .config
12065 .companies
12066 .iter()
12067 .map(|c| c.code.clone())
12068 .collect();
12069 let estimates = est_gen.generate_for_entities(&entity_codes);
12070 info!(
12071 "ISA 540 accounting estimates: {} estimates across {} entities \
12072 ({} with retrospective reviews, {} with auditor point estimates)",
12073 estimates.len(),
12074 entity_codes.len(),
12075 estimates
12076 .iter()
12077 .filter(|e| e.retrospective_review.is_some())
12078 .count(),
12079 estimates
12080 .iter()
12081 .filter(|e| e.auditor_point_estimate.is_some())
12082 .count(),
12083 );
12084 snapshot.accounting_estimates = estimates;
12085 }
12086
12087 {
12091 use datasynth_generators::audit::audit_opinion_generator::{
12092 AuditOpinionGenerator, AuditOpinionInput,
12093 };
12094
12095 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12096
12097 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12099 .engagements
12100 .iter()
12101 .map(|eng| {
12102 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12104 .findings
12105 .iter()
12106 .filter(|f| f.engagement_id == eng.engagement_id)
12107 .cloned()
12108 .collect();
12109
12110 let gc = snapshot
12112 .going_concern_assessments
12113 .iter()
12114 .find(|g| g.entity_code == eng.client_entity_id)
12115 .cloned();
12116
12117 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12119 snapshot.component_reports.clone();
12120
12121 let auditor = self
12122 .master_data
12123 .employees
12124 .first()
12125 .map(|e| e.display_name.clone())
12126 .unwrap_or_else(|| "Global Audit LLP".into());
12127
12128 let partner = self
12129 .master_data
12130 .employees
12131 .get(1)
12132 .map(|e| e.display_name.clone())
12133 .unwrap_or_else(|| eng.engagement_partner_id.clone());
12134
12135 AuditOpinionInput {
12136 entity_code: eng.client_entity_id.clone(),
12137 entity_name: eng.client_name.clone(),
12138 engagement_id: eng.engagement_id,
12139 period_end: eng.period_end_date,
12140 findings: eng_findings,
12141 going_concern: gc,
12142 component_reports: comp_reports,
12143 is_us_listed: {
12145 let fw = &self.config.audit_standards.isa_compliance.framework;
12146 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12147 },
12148 auditor_name: auditor,
12149 engagement_partner: partner,
12150 }
12151 })
12152 .collect();
12153
12154 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12155
12156 for go in &generated_opinions {
12157 snapshot
12158 .key_audit_matters
12159 .extend(go.key_audit_matters.clone());
12160 }
12161 snapshot.audit_opinions = generated_opinions
12162 .into_iter()
12163 .map(|go| go.opinion)
12164 .collect();
12165
12166 info!(
12167 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12168 snapshot.audit_opinions.len(),
12169 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12170 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12171 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12172 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12173 );
12174 }
12175
12176 {
12180 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12181
12182 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12183
12184 for (i, company) in self.config.companies.iter().enumerate() {
12185 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12187 .engagements
12188 .iter()
12189 .filter(|e| e.client_entity_id == company.code)
12190 .map(|e| e.engagement_id)
12191 .collect();
12192
12193 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12194 .findings
12195 .iter()
12196 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12197 .cloned()
12198 .collect();
12199
12200 let emp_count = self.master_data.employees.len();
12202 let ceo_name = if emp_count > 0 {
12203 self.master_data.employees[i % emp_count]
12204 .display_name
12205 .clone()
12206 } else {
12207 format!("CEO of {}", company.name)
12208 };
12209 let cfo_name = if emp_count > 1 {
12210 self.master_data.employees[(i + 1) % emp_count]
12211 .display_name
12212 .clone()
12213 } else {
12214 format!("CFO of {}", company.name)
12215 };
12216
12217 let materiality = snapshot
12219 .engagements
12220 .iter()
12221 .find(|e| e.client_entity_id == company.code)
12222 .map(|e| e.materiality)
12223 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12224
12225 let input = SoxGeneratorInput {
12226 company_code: company.code.clone(),
12227 company_name: company.name.clone(),
12228 fiscal_year,
12229 period_end,
12230 findings: company_findings,
12231 ceo_name,
12232 cfo_name,
12233 materiality_threshold: materiality,
12234 revenue_percent: rust_decimal::Decimal::from(100),
12235 assets_percent: rust_decimal::Decimal::from(100),
12236 significant_accounts: vec![
12237 "Revenue".into(),
12238 "Accounts Receivable".into(),
12239 "Inventory".into(),
12240 "Fixed Assets".into(),
12241 "Accounts Payable".into(),
12242 ],
12243 };
12244
12245 let (certs, assessment) = sox_gen.generate(&input);
12246 snapshot.sox_302_certifications.extend(certs);
12247 snapshot.sox_404_assessments.push(assessment);
12248 }
12249
12250 info!(
12251 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12252 snapshot.sox_302_certifications.len(),
12253 snapshot.sox_404_assessments.len(),
12254 snapshot
12255 .sox_404_assessments
12256 .iter()
12257 .filter(|a| a.icfr_effective)
12258 .count(),
12259 snapshot
12260 .sox_404_assessments
12261 .iter()
12262 .filter(|a| !a.icfr_effective)
12263 .count(),
12264 );
12265 }
12266
12267 {
12271 use datasynth_generators::audit::materiality_generator::{
12272 MaterialityGenerator, MaterialityInput,
12273 };
12274
12275 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
12276
12277 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
12281
12282 for company in &self.config.companies {
12283 let company_code = company.code.clone();
12284
12285 let company_revenue: rust_decimal::Decimal = entries
12287 .iter()
12288 .filter(|e| e.company_code() == company_code)
12289 .flat_map(|e| e.lines.iter())
12290 .filter(|l| l.account_code.starts_with('4'))
12291 .map(|l| l.credit_amount)
12292 .sum();
12293
12294 let total_assets: rust_decimal::Decimal = entries
12296 .iter()
12297 .filter(|e| e.company_code() == company_code)
12298 .flat_map(|e| e.lines.iter())
12299 .filter(|l| l.account_code.starts_with('1'))
12300 .map(|l| l.debit_amount)
12301 .sum();
12302
12303 let total_expenses: rust_decimal::Decimal = entries
12305 .iter()
12306 .filter(|e| e.company_code() == company_code)
12307 .flat_map(|e| e.lines.iter())
12308 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12309 .map(|l| l.debit_amount)
12310 .sum();
12311
12312 let equity: rust_decimal::Decimal = entries
12314 .iter()
12315 .filter(|e| e.company_code() == company_code)
12316 .flat_map(|e| e.lines.iter())
12317 .filter(|l| l.account_code.starts_with('3'))
12318 .map(|l| l.credit_amount)
12319 .sum();
12320
12321 let pretax_income = company_revenue - total_expenses;
12322
12323 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
12325 let w = rust_decimal::Decimal::try_from(company.volume_weight)
12326 .unwrap_or(rust_decimal::Decimal::ONE);
12327 (
12328 total_revenue * w,
12329 total_revenue * w * rust_decimal::Decimal::from(3),
12330 total_revenue * w * rust_decimal::Decimal::new(1, 1),
12331 total_revenue * w * rust_decimal::Decimal::from(2),
12332 )
12333 } else {
12334 (company_revenue, total_assets, pretax_income, equity)
12335 };
12336
12337 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
12340 entity_code: company_code,
12341 period: format!("FY{}", fiscal_year),
12342 revenue: rev,
12343 pretax_income: pti,
12344 total_assets: assets,
12345 equity: eq,
12346 gross_profit,
12347 });
12348 }
12349
12350 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
12351
12352 info!(
12353 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
12354 {} total assets, {} equity benchmarks)",
12355 snapshot.materiality_calculations.len(),
12356 snapshot
12357 .materiality_calculations
12358 .iter()
12359 .filter(|m| matches!(
12360 m.benchmark,
12361 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
12362 ))
12363 .count(),
12364 snapshot
12365 .materiality_calculations
12366 .iter()
12367 .filter(|m| matches!(
12368 m.benchmark,
12369 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
12370 ))
12371 .count(),
12372 snapshot
12373 .materiality_calculations
12374 .iter()
12375 .filter(|m| matches!(
12376 m.benchmark,
12377 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
12378 ))
12379 .count(),
12380 snapshot
12381 .materiality_calculations
12382 .iter()
12383 .filter(|m| matches!(
12384 m.benchmark,
12385 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
12386 ))
12387 .count(),
12388 );
12389 }
12390
12391 {
12395 use datasynth_generators::audit::cra_generator::CraGenerator;
12396
12397 let mut cra_gen = CraGenerator::new(self.seed + 8315);
12398
12399 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
12401 .audit_scopes
12402 .iter()
12403 .map(|s| (s.entity_code.clone(), s.id.clone()))
12404 .collect();
12405
12406 for company in &self.config.companies {
12407 let cras = cra_gen.generate_for_entity(&company.code, None);
12408 let scope_id = entity_scope_map.get(&company.code).cloned();
12409 let cras_with_scope: Vec<_> = cras
12410 .into_iter()
12411 .map(|mut cra| {
12412 cra.scope_id = scope_id.clone();
12413 cra
12414 })
12415 .collect();
12416 snapshot.combined_risk_assessments.extend(cras_with_scope);
12417 }
12418
12419 let significant_count = snapshot
12420 .combined_risk_assessments
12421 .iter()
12422 .filter(|c| c.significant_risk)
12423 .count();
12424 let high_cra_count = snapshot
12425 .combined_risk_assessments
12426 .iter()
12427 .filter(|c| {
12428 matches!(
12429 c.combined_risk,
12430 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
12431 )
12432 })
12433 .count();
12434
12435 info!(
12436 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
12437 snapshot.combined_risk_assessments.len(),
12438 significant_count,
12439 high_cra_count,
12440 );
12441 }
12442
12443 {
12447 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
12448
12449 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
12450
12451 for company in &self.config.companies {
12453 let entity_code = company.code.clone();
12454
12455 let tolerable_error = snapshot
12457 .materiality_calculations
12458 .iter()
12459 .find(|m| m.entity_code == entity_code)
12460 .map(|m| m.tolerable_error);
12461
12462 let entity_cras: Vec<_> = snapshot
12464 .combined_risk_assessments
12465 .iter()
12466 .filter(|c| c.entity_code == entity_code)
12467 .cloned()
12468 .collect();
12469
12470 if !entity_cras.is_empty() {
12471 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
12472 snapshot.sampling_plans.extend(plans);
12473 snapshot.sampled_items.extend(items);
12474 }
12475 }
12476
12477 let misstatement_count = snapshot
12478 .sampled_items
12479 .iter()
12480 .filter(|i| i.misstatement_found)
12481 .count();
12482
12483 info!(
12484 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
12485 snapshot.sampling_plans.len(),
12486 snapshot.sampled_items.len(),
12487 misstatement_count,
12488 );
12489 }
12490
12491 {
12495 use datasynth_generators::audit::scots_generator::{
12496 ScotsGenerator, ScotsGeneratorConfig,
12497 };
12498
12499 let ic_enabled = self.config.intercompany.enabled;
12500
12501 let config = ScotsGeneratorConfig {
12502 intercompany_enabled: ic_enabled,
12503 ..ScotsGeneratorConfig::default()
12504 };
12505 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
12506
12507 for company in &self.config.companies {
12508 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
12509 snapshot
12510 .significant_transaction_classes
12511 .extend(entity_scots);
12512 }
12513
12514 let estimation_count = snapshot
12515 .significant_transaction_classes
12516 .iter()
12517 .filter(|s| {
12518 matches!(
12519 s.transaction_type,
12520 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
12521 )
12522 })
12523 .count();
12524
12525 info!(
12526 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
12527 snapshot.significant_transaction_classes.len(),
12528 estimation_count,
12529 );
12530 }
12531
12532 {
12536 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
12537
12538 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
12539 let entity_codes: Vec<String> = self
12540 .config
12541 .companies
12542 .iter()
12543 .map(|c| c.code.clone())
12544 .collect();
12545 let unusual_flags =
12546 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
12547 info!(
12548 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
12549 unusual_flags.len(),
12550 unusual_flags
12551 .iter()
12552 .filter(|f| matches!(
12553 f.severity,
12554 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
12555 ))
12556 .count(),
12557 unusual_flags
12558 .iter()
12559 .filter(|f| matches!(
12560 f.severity,
12561 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
12562 ))
12563 .count(),
12564 unusual_flags
12565 .iter()
12566 .filter(|f| matches!(
12567 f.severity,
12568 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
12569 ))
12570 .count(),
12571 );
12572 snapshot.unusual_items = unusual_flags;
12573 }
12574
12575 {
12579 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
12580
12581 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
12582 let entity_codes: Vec<String> = self
12583 .config
12584 .companies
12585 .iter()
12586 .map(|c| c.code.clone())
12587 .collect();
12588 let current_period_label = format!("FY{fiscal_year}");
12589 let prior_period_label = format!("FY{}", fiscal_year - 1);
12590 let analytical_rels = ar_gen.generate_for_entities(
12591 &entity_codes,
12592 entries,
12593 ¤t_period_label,
12594 &prior_period_label,
12595 );
12596 let out_of_range = analytical_rels
12597 .iter()
12598 .filter(|r| !r.within_expected_range)
12599 .count();
12600 info!(
12601 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
12602 analytical_rels.len(),
12603 out_of_range,
12604 );
12605 snapshot.analytical_relationships = analytical_rels;
12606 }
12607
12608 if let Some(pb) = pb {
12609 pb.finish_with_message(format!(
12610 "Audit data: {} engagements, {} workpapers, {} evidence, \
12611 {} confirmations, {} procedure steps, {} samples, \
12612 {} analytical, {} IA funcs, {} related parties, \
12613 {} component auditors, {} letters, {} subsequent events, \
12614 {} service orgs, {} going concern, {} accounting estimates, \
12615 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
12616 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
12617 {} unusual items, {} analytical relationships",
12618 snapshot.engagements.len(),
12619 snapshot.workpapers.len(),
12620 snapshot.evidence.len(),
12621 snapshot.confirmations.len(),
12622 snapshot.procedure_steps.len(),
12623 snapshot.samples.len(),
12624 snapshot.analytical_results.len(),
12625 snapshot.ia_functions.len(),
12626 snapshot.related_parties.len(),
12627 snapshot.component_auditors.len(),
12628 snapshot.engagement_letters.len(),
12629 snapshot.subsequent_events.len(),
12630 snapshot.service_organizations.len(),
12631 snapshot.going_concern_assessments.len(),
12632 snapshot.accounting_estimates.len(),
12633 snapshot.audit_opinions.len(),
12634 snapshot.key_audit_matters.len(),
12635 snapshot.sox_302_certifications.len(),
12636 snapshot.sox_404_assessments.len(),
12637 snapshot.materiality_calculations.len(),
12638 snapshot.combined_risk_assessments.len(),
12639 snapshot.sampling_plans.len(),
12640 snapshot.significant_transaction_classes.len(),
12641 snapshot.unusual_items.len(),
12642 snapshot.analytical_relationships.len(),
12643 ));
12644 }
12645
12646 {
12653 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12654 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12655 debug!(
12656 "PCAOB-ISA mappings generated: {} mappings",
12657 snapshot.isa_pcaob_mappings.len()
12658 );
12659 }
12660
12661 {
12668 use datasynth_standards::audit::isa_reference::IsaStandard;
12669 snapshot.isa_mappings = IsaStandard::standard_entries();
12670 debug!(
12671 "ISA standard entries generated: {} standards",
12672 snapshot.isa_mappings.len()
12673 );
12674 }
12675
12676 {
12679 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
12680 .engagements
12681 .iter()
12682 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
12683 .collect();
12684
12685 for rpt in &mut snapshot.related_party_transactions {
12686 if rpt.journal_entry_id.is_some() {
12687 continue; }
12689 let entity = engagement_by_id
12690 .get(&rpt.engagement_id.to_string())
12691 .copied()
12692 .unwrap_or("");
12693
12694 let best_je = entries
12696 .iter()
12697 .filter(|je| je.header.company_code == entity)
12698 .min_by_key(|je| {
12699 (je.header.posting_date - rpt.transaction_date)
12700 .num_days()
12701 .abs()
12702 });
12703
12704 if let Some(je) = best_je {
12705 rpt.journal_entry_id = Some(je.header.document_id.to_string());
12706 }
12707 }
12708
12709 let linked = snapshot
12710 .related_party_transactions
12711 .iter()
12712 .filter(|t| t.journal_entry_id.is_some())
12713 .count();
12714 debug!(
12715 "Linked {}/{} related party transactions to journal entries",
12716 linked,
12717 snapshot.related_party_transactions.len()
12718 );
12719 }
12720
12721 if !snapshot.engagements.is_empty() {
12727 use datasynth_generators::audit_opinion_generator::{
12728 AuditOpinionGenerator, AuditOpinionInput,
12729 };
12730
12731 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
12732 let inputs: Vec<AuditOpinionInput> = snapshot
12733 .engagements
12734 .iter()
12735 .map(|eng| {
12736 let findings = snapshot
12737 .findings
12738 .iter()
12739 .filter(|f| f.engagement_id == eng.engagement_id)
12740 .cloned()
12741 .collect();
12742 let going_concern = snapshot
12743 .going_concern_assessments
12744 .iter()
12745 .find(|gc| gc.entity_code == eng.client_entity_id)
12746 .cloned();
12747 let component_reports = snapshot
12750 .component_reports
12751 .iter()
12752 .filter(|r| r.entity_code == eng.client_entity_id)
12753 .cloned()
12754 .collect();
12755
12756 AuditOpinionInput {
12757 entity_code: eng.client_entity_id.clone(),
12758 entity_name: eng.client_name.clone(),
12759 engagement_id: eng.engagement_id,
12760 period_end: eng.period_end_date,
12761 findings,
12762 going_concern,
12763 component_reports,
12764 is_us_listed: matches!(
12765 eng.engagement_type,
12766 datasynth_core::audit::EngagementType::IntegratedAudit
12767 | datasynth_core::audit::EngagementType::Sox404
12768 ),
12769 auditor_name: "DataSynth Audit LLP".to_string(),
12770 engagement_partner: "Engagement Partner".to_string(),
12771 }
12772 })
12773 .collect();
12774
12775 let generated = opinion_gen.generate_batch(&inputs);
12776 for g in generated {
12777 snapshot.key_audit_matters.extend(g.key_audit_matters);
12778 snapshot.audit_opinions.push(g.opinion);
12779 }
12780 debug!(
12781 "Generated {} audit opinions with {} key audit matters",
12782 snapshot.audit_opinions.len(),
12783 snapshot.key_audit_matters.len()
12784 );
12785 }
12786
12787 Ok(snapshot)
12788 }
12789
12790 fn generate_audit_data_with_fsm(
12797 &mut self,
12798 entries: &[JournalEntry],
12799 ) -> SynthResult<AuditSnapshot> {
12800 use datasynth_audit_fsm::{
12801 context::EngagementContext,
12802 engine::AuditFsmEngine,
12803 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
12804 };
12805 use rand::SeedableRng;
12806 use rand_chacha::ChaCha8Rng;
12807
12808 info!("Audit FSM: generating audit data via FSM engine");
12809
12810 let fsm_config = self
12811 .config
12812 .audit
12813 .fsm
12814 .as_ref()
12815 .expect("FSM config must be present when FSM is enabled");
12816
12817 let bwp = match fsm_config.blueprint.as_str() {
12819 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
12820 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
12821 _ => {
12822 warn!(
12823 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
12824 fsm_config.blueprint
12825 );
12826 BlueprintWithPreconditions::load_builtin_fsa()
12827 }
12828 }
12829 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
12830
12831 let overlay = match fsm_config.overlay.as_str() {
12833 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
12834 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
12835 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
12836 _ => {
12837 warn!(
12838 "Unknown FSM overlay '{}', falling back to builtin:default",
12839 fsm_config.overlay
12840 );
12841 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
12842 }
12843 }
12844 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
12845
12846 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12848 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12849 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12850
12851 let company = self.config.companies.first();
12853 let company_code = company
12854 .map(|c| c.code.clone())
12855 .unwrap_or_else(|| "UNKNOWN".to_string());
12856 let company_name = company
12857 .map(|c| c.name.clone())
12858 .unwrap_or_else(|| "Unknown Company".to_string());
12859 let currency = company
12860 .map(|c| c.currency.clone())
12861 .unwrap_or_else(|| "USD".to_string());
12862
12863 let entity_entries: Vec<_> = entries
12865 .iter()
12866 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
12867 .cloned()
12868 .collect();
12869 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
12873 .iter()
12874 .flat_map(|e| e.lines.iter())
12875 .filter(|l| l.account_code.starts_with('4'))
12876 .map(|l| l.credit_amount - l.debit_amount)
12877 .sum();
12878
12879 let total_assets: rust_decimal::Decimal = entries
12880 .iter()
12881 .flat_map(|e| e.lines.iter())
12882 .filter(|l| l.account_code.starts_with('1'))
12883 .map(|l| l.debit_amount - l.credit_amount)
12884 .sum();
12885
12886 let total_expenses: rust_decimal::Decimal = entries
12887 .iter()
12888 .flat_map(|e| e.lines.iter())
12889 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12890 .map(|l| l.debit_amount)
12891 .sum();
12892
12893 let equity: rust_decimal::Decimal = entries
12894 .iter()
12895 .flat_map(|e| e.lines.iter())
12896 .filter(|l| l.account_code.starts_with('3'))
12897 .map(|l| l.credit_amount - l.debit_amount)
12898 .sum();
12899
12900 let total_debt: rust_decimal::Decimal = entries
12901 .iter()
12902 .flat_map(|e| e.lines.iter())
12903 .filter(|l| l.account_code.starts_with('2'))
12904 .map(|l| l.credit_amount - l.debit_amount)
12905 .sum();
12906
12907 let pretax_income = total_revenue - total_expenses;
12908
12909 let cogs: rust_decimal::Decimal = entries
12910 .iter()
12911 .flat_map(|e| e.lines.iter())
12912 .filter(|l| l.account_code.starts_with('5'))
12913 .map(|l| l.debit_amount)
12914 .sum();
12915 let gross_profit = total_revenue - cogs;
12916
12917 let current_assets: rust_decimal::Decimal = entries
12918 .iter()
12919 .flat_map(|e| e.lines.iter())
12920 .filter(|l| {
12921 l.account_code.starts_with("10")
12922 || l.account_code.starts_with("11")
12923 || l.account_code.starts_with("12")
12924 || l.account_code.starts_with("13")
12925 })
12926 .map(|l| l.debit_amount - l.credit_amount)
12927 .sum();
12928 let current_liabilities: rust_decimal::Decimal = entries
12929 .iter()
12930 .flat_map(|e| e.lines.iter())
12931 .filter(|l| {
12932 l.account_code.starts_with("20")
12933 || l.account_code.starts_with("21")
12934 || l.account_code.starts_with("22")
12935 })
12936 .map(|l| l.credit_amount - l.debit_amount)
12937 .sum();
12938 let working_capital = current_assets - current_liabilities;
12939
12940 let depreciation: rust_decimal::Decimal = entries
12941 .iter()
12942 .flat_map(|e| e.lines.iter())
12943 .filter(|l| l.account_code.starts_with("60"))
12944 .map(|l| l.debit_amount)
12945 .sum();
12946 let operating_cash_flow = pretax_income + depreciation;
12947
12948 let accounts: Vec<String> = self
12950 .coa
12951 .as_ref()
12952 .map(|coa| {
12953 coa.get_postable_accounts()
12954 .iter()
12955 .map(|acc| acc.account_code().to_string())
12956 .collect()
12957 })
12958 .unwrap_or_default();
12959
12960 let team_member_ids: Vec<String> = self
12962 .master_data
12963 .employees
12964 .iter()
12965 .take(8) .map(|e| e.employee_id.clone())
12967 .collect();
12968 let team_member_pairs: Vec<(String, String)> = self
12969 .master_data
12970 .employees
12971 .iter()
12972 .take(8)
12973 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12974 .collect();
12975
12976 let vendor_names: Vec<String> = self
12977 .master_data
12978 .vendors
12979 .iter()
12980 .map(|v| v.name.clone())
12981 .collect();
12982 let customer_names: Vec<String> = self
12983 .master_data
12984 .customers
12985 .iter()
12986 .map(|c| c.name.clone())
12987 .collect();
12988
12989 let entity_codes: Vec<String> = self
12990 .config
12991 .companies
12992 .iter()
12993 .map(|c| c.code.clone())
12994 .collect();
12995
12996 let journal_entry_ids: Vec<String> = entries
12998 .iter()
12999 .take(50)
13000 .map(|e| e.header.document_id.to_string())
13001 .collect();
13002
13003 let mut account_balances = std::collections::HashMap::<String, f64>::new();
13005 for entry in entries {
13006 for line in &entry.lines {
13007 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13008 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13009 *account_balances
13010 .entry(line.account_code.clone())
13011 .or_insert(0.0) += debit_f64 - credit_f64;
13012 }
13013 }
13014
13015 let control_ids: Vec<String> = Vec::new();
13020 let anomaly_refs: Vec<String> = Vec::new();
13021
13022 let mut context = EngagementContext {
13023 company_code,
13024 company_name,
13025 fiscal_year: start_date.year(),
13026 currency,
13027 total_revenue,
13028 total_assets,
13029 engagement_start: start_date,
13030 report_date: period_end,
13031 pretax_income,
13032 equity,
13033 gross_profit,
13034 working_capital,
13035 operating_cash_flow,
13036 total_debt,
13037 team_member_ids,
13038 team_member_pairs,
13039 accounts,
13040 vendor_names,
13041 customer_names,
13042 journal_entry_ids,
13043 account_balances,
13044 control_ids,
13045 anomaly_refs,
13046 journal_entries: entries.to_vec(),
13047 is_us_listed: false,
13048 entity_codes,
13049 auditor_firm_name: "DataSynth Audit LLP".into(),
13050 accounting_framework: self
13051 .config
13052 .accounting_standards
13053 .framework
13054 .map(|f| match f {
13055 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13056 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13057 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13058 "French GAAP"
13059 }
13060 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13061 "German GAAP"
13062 }
13063 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13064 "Dual Reporting"
13065 }
13066 })
13067 .unwrap_or("IFRS")
13068 .into(),
13069 };
13070
13071 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13073 let rng = ChaCha8Rng::seed_from_u64(seed);
13074 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13075
13076 let mut result = engine
13077 .run_engagement(&context)
13078 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13079
13080 info!(
13081 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13082 {} phases completed, duration {:.1}h",
13083 result.event_log.len(),
13084 result.artifacts.total_artifacts(),
13085 result.anomalies.len(),
13086 result.phases_completed.len(),
13087 result.total_duration_hours,
13088 );
13089
13090 let tb_entity = context.company_code.clone();
13092 let tb_fy = context.fiscal_year;
13093 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13094 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13095 entries,
13096 &tb_entity,
13097 tb_fy,
13098 self.coa.as_ref().map(|c| c.as_ref()),
13099 );
13100
13101 let bag = result.artifacts;
13103 let mut snapshot = AuditSnapshot {
13104 engagements: bag.engagements,
13105 engagement_letters: bag.engagement_letters,
13106 materiality_calculations: bag.materiality_calculations,
13107 risk_assessments: bag.risk_assessments,
13108 combined_risk_assessments: bag.combined_risk_assessments,
13109 workpapers: bag.workpapers,
13110 evidence: bag.evidence,
13111 findings: bag.findings,
13112 judgments: bag.judgments,
13113 sampling_plans: bag.sampling_plans,
13114 sampled_items: bag.sampled_items,
13115 analytical_results: bag.analytical_results,
13116 going_concern_assessments: bag.going_concern_assessments,
13117 subsequent_events: bag.subsequent_events,
13118 audit_opinions: bag.audit_opinions,
13119 key_audit_matters: bag.key_audit_matters,
13120 procedure_steps: bag.procedure_steps,
13121 samples: bag.samples,
13122 confirmations: bag.confirmations,
13123 confirmation_responses: bag.confirmation_responses,
13124 fsm_event_trail: Some(result.event_log),
13126 ..Default::default()
13128 };
13129
13130 {
13132 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13133 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13134 }
13135 {
13136 use datasynth_standards::audit::isa_reference::IsaStandard;
13137 snapshot.isa_mappings = IsaStandard::standard_entries();
13138 }
13139
13140 info!(
13141 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13142 {} risk assessments, {} findings, {} materiality calcs",
13143 snapshot.engagements.len(),
13144 snapshot.workpapers.len(),
13145 snapshot.evidence.len(),
13146 snapshot.risk_assessments.len(),
13147 snapshot.findings.len(),
13148 snapshot.materiality_calculations.len(),
13149 );
13150
13151 Ok(snapshot)
13152 }
13153
13154 fn export_graphs(
13161 &mut self,
13162 entries: &[JournalEntry],
13163 _coa: &Arc<ChartOfAccounts>,
13164 stats: &mut EnhancedGenerationStatistics,
13165 ) -> SynthResult<GraphExportSnapshot> {
13166 let pb = self.create_progress_bar(100, "Exporting Graphs");
13167
13168 let mut snapshot = GraphExportSnapshot::default();
13169
13170 let output_dir = self
13172 .output_path
13173 .clone()
13174 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13175 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13176
13177 for graph_type in &self.config.graph_export.graph_types {
13179 if let Some(pb) = &pb {
13180 pb.inc(10);
13181 }
13182
13183 let graph_config = TransactionGraphConfig {
13185 include_vendors: false,
13186 include_customers: false,
13187 create_debit_credit_edges: true,
13188 include_document_nodes: graph_type.include_document_nodes,
13189 min_edge_weight: graph_type.min_edge_weight,
13190 aggregate_parallel_edges: graph_type.aggregate_edges,
13191 framework: None,
13192 };
13193
13194 let mut builder = TransactionGraphBuilder::new(graph_config);
13195 builder.add_journal_entries(entries);
13196 let graph = builder.build();
13197
13198 stats.graph_node_count += graph.node_count();
13200 stats.graph_edge_count += graph.edge_count();
13201
13202 if let Some(pb) = &pb {
13203 pb.inc(40);
13204 }
13205
13206 for format in &self.config.graph_export.formats {
13208 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13209
13210 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13212 warn!("Failed to create graph output directory: {}", e);
13213 continue;
13214 }
13215
13216 match format {
13217 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13218 let pyg_config = PyGExportConfig {
13219 common: datasynth_graph::CommonExportConfig {
13220 export_node_features: true,
13221 export_edge_features: true,
13222 export_node_labels: true,
13223 export_edge_labels: true,
13224 export_masks: true,
13225 train_ratio: self.config.graph_export.train_ratio,
13226 val_ratio: self.config.graph_export.validation_ratio,
13227 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13228 },
13229 one_hot_categoricals: false,
13230 };
13231
13232 let exporter = PyGExporter::new(pyg_config);
13233 match exporter.export(&graph, &format_dir) {
13234 Ok(metadata) => {
13235 snapshot.exports.insert(
13236 format!("{}_{}", graph_type.name, "pytorch_geometric"),
13237 GraphExportInfo {
13238 name: graph_type.name.clone(),
13239 format: "pytorch_geometric".to_string(),
13240 output_path: format_dir.clone(),
13241 node_count: metadata.num_nodes,
13242 edge_count: metadata.num_edges,
13243 },
13244 );
13245 snapshot.graph_count += 1;
13246 }
13247 Err(e) => {
13248 warn!("Failed to export PyTorch Geometric graph: {}", e);
13249 }
13250 }
13251 }
13252 datasynth_config::schema::GraphExportFormat::Neo4j => {
13253 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13254
13255 let neo4j_config = Neo4jExportConfig {
13256 export_node_properties: true,
13257 export_edge_properties: true,
13258 export_features: true,
13259 generate_cypher: true,
13260 generate_admin_import: true,
13261 database_name: "synth".to_string(),
13262 cypher_batch_size: 1000,
13263 };
13264
13265 let exporter = Neo4jExporter::new(neo4j_config);
13266 match exporter.export(&graph, &format_dir) {
13267 Ok(metadata) => {
13268 snapshot.exports.insert(
13269 format!("{}_{}", graph_type.name, "neo4j"),
13270 GraphExportInfo {
13271 name: graph_type.name.clone(),
13272 format: "neo4j".to_string(),
13273 output_path: format_dir.clone(),
13274 node_count: metadata.num_nodes,
13275 edge_count: metadata.num_edges,
13276 },
13277 );
13278 snapshot.graph_count += 1;
13279 }
13280 Err(e) => {
13281 warn!("Failed to export Neo4j graph: {}", e);
13282 }
13283 }
13284 }
13285 datasynth_config::schema::GraphExportFormat::Dgl => {
13286 use datasynth_graph::{DGLExportConfig, DGLExporter};
13287
13288 let dgl_config = DGLExportConfig {
13289 common: datasynth_graph::CommonExportConfig {
13290 export_node_features: true,
13291 export_edge_features: true,
13292 export_node_labels: true,
13293 export_edge_labels: true,
13294 export_masks: true,
13295 train_ratio: self.config.graph_export.train_ratio,
13296 val_ratio: self.config.graph_export.validation_ratio,
13297 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13298 },
13299 heterogeneous: self.config.graph_export.dgl.heterogeneous,
13300 include_pickle_script: true, };
13302
13303 let exporter = DGLExporter::new(dgl_config);
13304 match exporter.export(&graph, &format_dir) {
13305 Ok(metadata) => {
13306 snapshot.exports.insert(
13307 format!("{}_{}", graph_type.name, "dgl"),
13308 GraphExportInfo {
13309 name: graph_type.name.clone(),
13310 format: "dgl".to_string(),
13311 output_path: format_dir.clone(),
13312 node_count: metadata.common.num_nodes,
13313 edge_count: metadata.common.num_edges,
13314 },
13315 );
13316 snapshot.graph_count += 1;
13317 }
13318 Err(e) => {
13319 warn!("Failed to export DGL graph: {}", e);
13320 }
13321 }
13322 }
13323 datasynth_config::schema::GraphExportFormat::RustGraph => {
13324 use datasynth_graph::{
13325 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
13326 };
13327
13328 let rustgraph_config = RustGraphExportConfig {
13329 include_features: true,
13330 include_temporal: true,
13331 include_labels: true,
13332 source_name: "datasynth".to_string(),
13333 batch_id: None,
13334 output_format: RustGraphOutputFormat::JsonLines,
13335 export_node_properties: true,
13336 export_edge_properties: true,
13337 pretty_print: false,
13338 };
13339
13340 let exporter = RustGraphExporter::new(rustgraph_config);
13341 match exporter.export(&graph, &format_dir) {
13342 Ok(metadata) => {
13343 snapshot.exports.insert(
13344 format!("{}_{}", graph_type.name, "rustgraph"),
13345 GraphExportInfo {
13346 name: graph_type.name.clone(),
13347 format: "rustgraph".to_string(),
13348 output_path: format_dir.clone(),
13349 node_count: metadata.num_nodes,
13350 edge_count: metadata.num_edges,
13351 },
13352 );
13353 snapshot.graph_count += 1;
13354 }
13355 Err(e) => {
13356 warn!("Failed to export RustGraph: {}", e);
13357 }
13358 }
13359 }
13360 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
13361 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
13363 }
13364 }
13365 }
13366
13367 if let Some(pb) = &pb {
13368 pb.inc(40);
13369 }
13370 }
13371
13372 stats.graph_export_count = snapshot.graph_count;
13373 snapshot.exported = snapshot.graph_count > 0;
13374
13375 if let Some(pb) = pb {
13376 pb.finish_with_message(format!(
13377 "Graphs exported: {} graphs ({} nodes, {} edges)",
13378 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
13379 ));
13380 }
13381
13382 Ok(snapshot)
13383 }
13384
13385 fn build_additional_graphs(
13390 &self,
13391 banking: &BankingSnapshot,
13392 intercompany: &IntercompanySnapshot,
13393 entries: &[JournalEntry],
13394 stats: &mut EnhancedGenerationStatistics,
13395 ) {
13396 let output_dir = self
13397 .output_path
13398 .clone()
13399 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13400 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13401
13402 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
13404 info!("Phase 10c: Building banking network graph");
13405 let config = BankingGraphConfig::default();
13406 let mut builder = BankingGraphBuilder::new(config);
13407 builder.add_customers(&banking.customers);
13408 builder.add_accounts(&banking.accounts, &banking.customers);
13409 builder.add_transactions(&banking.transactions);
13410 let graph = builder.build();
13411
13412 let node_count = graph.node_count();
13413 let edge_count = graph.edge_count();
13414 stats.graph_node_count += node_count;
13415 stats.graph_edge_count += edge_count;
13416
13417 for format in &self.config.graph_export.formats {
13419 if matches!(
13420 format,
13421 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13422 ) {
13423 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
13424 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13425 warn!("Failed to create banking graph output dir: {}", e);
13426 continue;
13427 }
13428 let pyg_config = PyGExportConfig::default();
13429 let exporter = PyGExporter::new(pyg_config);
13430 if let Err(e) = exporter.export(&graph, &format_dir) {
13431 warn!("Failed to export banking graph as PyG: {}", e);
13432 } else {
13433 info!(
13434 "Banking network graph exported: {} nodes, {} edges",
13435 node_count, edge_count
13436 );
13437 }
13438 }
13439 }
13440 }
13441
13442 let approval_entries: Vec<_> = entries
13444 .iter()
13445 .filter(|je| je.header.approval_workflow.is_some())
13446 .collect();
13447
13448 if !approval_entries.is_empty() {
13449 info!(
13450 "Phase 10c: Building approval network graph ({} entries with approvals)",
13451 approval_entries.len()
13452 );
13453 let config = ApprovalGraphConfig::default();
13454 let mut builder = ApprovalGraphBuilder::new(config);
13455
13456 for je in &approval_entries {
13457 if let Some(ref wf) = je.header.approval_workflow {
13458 for action in &wf.actions {
13459 let record = datasynth_core::models::ApprovalRecord {
13460 approval_id: format!(
13461 "APR-{}-{}",
13462 je.header.document_id, action.approval_level
13463 ),
13464 document_number: je.header.document_id.to_string(),
13465 document_type: "JE".to_string(),
13466 company_code: je.company_code().to_string(),
13467 requester_id: wf.preparer_id.clone(),
13468 requester_name: Some(wf.preparer_name.clone()),
13469 approver_id: action.actor_id.clone(),
13470 approver_name: action.actor_name.clone(),
13471 approval_date: je.posting_date(),
13472 action: format!("{:?}", action.action),
13473 amount: wf.amount,
13474 approval_limit: None,
13475 comments: action.comments.clone(),
13476 delegation_from: None,
13477 is_auto_approved: false,
13478 };
13479 builder.add_approval(&record);
13480 }
13481 }
13482 }
13483
13484 let graph = builder.build();
13485 let node_count = graph.node_count();
13486 let edge_count = graph.edge_count();
13487 stats.graph_node_count += node_count;
13488 stats.graph_edge_count += edge_count;
13489
13490 for format in &self.config.graph_export.formats {
13492 if matches!(
13493 format,
13494 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13495 ) {
13496 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
13497 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13498 warn!("Failed to create approval graph output dir: {}", e);
13499 continue;
13500 }
13501 let pyg_config = PyGExportConfig::default();
13502 let exporter = PyGExporter::new(pyg_config);
13503 if let Err(e) = exporter.export(&graph, &format_dir) {
13504 warn!("Failed to export approval graph as PyG: {}", e);
13505 } else {
13506 info!(
13507 "Approval network graph exported: {} nodes, {} edges",
13508 node_count, edge_count
13509 );
13510 }
13511 }
13512 }
13513 }
13514
13515 if self.config.companies.len() >= 2 {
13517 info!(
13518 "Phase 10c: Building entity relationship graph ({} companies)",
13519 self.config.companies.len()
13520 );
13521
13522 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13523 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
13524
13525 let parent_code = &self.config.companies[0].code;
13527 let mut companies: Vec<datasynth_core::models::Company> =
13528 Vec::with_capacity(self.config.companies.len());
13529
13530 let first = &self.config.companies[0];
13532 companies.push(datasynth_core::models::Company::parent(
13533 &first.code,
13534 &first.name,
13535 &first.country,
13536 &first.currency,
13537 ));
13538
13539 for cc in self.config.companies.iter().skip(1) {
13541 companies.push(datasynth_core::models::Company::subsidiary(
13542 &cc.code,
13543 &cc.name,
13544 &cc.country,
13545 &cc.currency,
13546 parent_code,
13547 rust_decimal::Decimal::from(100),
13548 ));
13549 }
13550
13551 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
13553 self.config
13554 .companies
13555 .iter()
13556 .skip(1)
13557 .enumerate()
13558 .map(|(i, cc)| {
13559 let mut rel =
13560 datasynth_core::models::intercompany::IntercompanyRelationship::new(
13561 format!("REL{:03}", i + 1),
13562 parent_code.clone(),
13563 cc.code.clone(),
13564 rust_decimal::Decimal::from(100),
13565 start_date,
13566 );
13567 rel.functional_currency = cc.currency.clone();
13568 rel
13569 })
13570 .collect();
13571
13572 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
13573 builder.add_companies(&companies);
13574 builder.add_ownership_relationships(&relationships);
13575
13576 for pair in &intercompany.matched_pairs {
13578 builder.add_intercompany_edge(
13579 &pair.seller_company,
13580 &pair.buyer_company,
13581 pair.amount,
13582 &format!("{:?}", pair.transaction_type),
13583 );
13584 }
13585
13586 let graph = builder.build();
13587 let node_count = graph.node_count();
13588 let edge_count = graph.edge_count();
13589 stats.graph_node_count += node_count;
13590 stats.graph_edge_count += edge_count;
13591
13592 for format in &self.config.graph_export.formats {
13594 if matches!(
13595 format,
13596 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13597 ) {
13598 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
13599 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13600 warn!("Failed to create entity graph output dir: {}", e);
13601 continue;
13602 }
13603 let pyg_config = PyGExportConfig::default();
13604 let exporter = PyGExporter::new(pyg_config);
13605 if let Err(e) = exporter.export(&graph, &format_dir) {
13606 warn!("Failed to export entity graph as PyG: {}", e);
13607 } else {
13608 info!(
13609 "Entity relationship graph exported: {} nodes, {} edges",
13610 node_count, edge_count
13611 );
13612 }
13613 }
13614 }
13615 } else {
13616 debug!(
13617 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
13618 self.config.companies.len()
13619 );
13620 }
13621 }
13622
13623 #[allow(clippy::too_many_arguments)]
13630 fn export_hypergraph(
13631 &self,
13632 coa: &Arc<ChartOfAccounts>,
13633 entries: &[JournalEntry],
13634 document_flows: &DocumentFlowSnapshot,
13635 sourcing: &SourcingSnapshot,
13636 hr: &HrSnapshot,
13637 manufacturing: &ManufacturingSnapshot,
13638 banking: &BankingSnapshot,
13639 audit: &AuditSnapshot,
13640 financial_reporting: &FinancialReportingSnapshot,
13641 ocpm: &OcpmSnapshot,
13642 compliance: &ComplianceRegulationsSnapshot,
13643 stats: &mut EnhancedGenerationStatistics,
13644 ) -> SynthResult<HypergraphExportInfo> {
13645 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
13646 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
13647 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
13648 use datasynth_graph::models::hypergraph::AggregationStrategy;
13649
13650 let hg_settings = &self.config.graph_export.hypergraph;
13651
13652 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
13654 "truncate" => AggregationStrategy::Truncate,
13655 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
13656 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
13657 "importance_sample" => AggregationStrategy::ImportanceSample,
13658 _ => AggregationStrategy::PoolByCounterparty,
13659 };
13660
13661 let builder_config = HypergraphConfig {
13662 max_nodes: hg_settings.max_nodes,
13663 aggregation_strategy,
13664 include_coso: hg_settings.governance_layer.include_coso,
13665 include_controls: hg_settings.governance_layer.include_controls,
13666 include_sox: hg_settings.governance_layer.include_sox,
13667 include_vendors: hg_settings.governance_layer.include_vendors,
13668 include_customers: hg_settings.governance_layer.include_customers,
13669 include_employees: hg_settings.governance_layer.include_employees,
13670 include_p2p: hg_settings.process_layer.include_p2p,
13671 include_o2c: hg_settings.process_layer.include_o2c,
13672 include_s2c: hg_settings.process_layer.include_s2c,
13673 include_h2r: hg_settings.process_layer.include_h2r,
13674 include_mfg: hg_settings.process_layer.include_mfg,
13675 include_bank: hg_settings.process_layer.include_bank,
13676 include_audit: hg_settings.process_layer.include_audit,
13677 include_r2r: hg_settings.process_layer.include_r2r,
13678 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
13679 docs_per_counterparty_threshold: hg_settings
13680 .process_layer
13681 .docs_per_counterparty_threshold,
13682 include_accounts: hg_settings.accounting_layer.include_accounts,
13683 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
13684 include_cross_layer_edges: hg_settings.cross_layer.enabled,
13685 include_compliance: self.config.compliance_regulations.enabled,
13686 include_tax: true,
13687 include_treasury: true,
13688 include_esg: true,
13689 include_project: true,
13690 include_intercompany: true,
13691 include_temporal_events: true,
13692 };
13693
13694 let mut builder = HypergraphBuilder::new(builder_config);
13695
13696 builder.add_coso_framework();
13698
13699 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
13702 let controls = InternalControl::standard_controls();
13703 builder.add_controls(&controls);
13704 }
13705
13706 builder.add_vendors(&self.master_data.vendors);
13708 builder.add_customers(&self.master_data.customers);
13709 builder.add_employees(&self.master_data.employees);
13710
13711 builder.add_p2p_documents(
13713 &document_flows.purchase_orders,
13714 &document_flows.goods_receipts,
13715 &document_flows.vendor_invoices,
13716 &document_flows.payments,
13717 );
13718 builder.add_o2c_documents(
13719 &document_flows.sales_orders,
13720 &document_flows.deliveries,
13721 &document_flows.customer_invoices,
13722 );
13723 builder.add_s2c_documents(
13724 &sourcing.sourcing_projects,
13725 &sourcing.qualifications,
13726 &sourcing.rfx_events,
13727 &sourcing.bids,
13728 &sourcing.bid_evaluations,
13729 &sourcing.contracts,
13730 );
13731 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
13732 builder.add_mfg_documents(
13733 &manufacturing.production_orders,
13734 &manufacturing.quality_inspections,
13735 &manufacturing.cycle_counts,
13736 );
13737 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
13738 builder.add_audit_documents(
13739 &audit.engagements,
13740 &audit.workpapers,
13741 &audit.findings,
13742 &audit.evidence,
13743 &audit.risk_assessments,
13744 &audit.judgments,
13745 &audit.materiality_calculations,
13746 &audit.audit_opinions,
13747 &audit.going_concern_assessments,
13748 );
13749 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
13750
13751 if let Some(ref event_log) = ocpm.event_log {
13753 builder.add_ocpm_events(event_log);
13754 }
13755
13756 if self.config.compliance_regulations.enabled
13758 && hg_settings.governance_layer.include_controls
13759 {
13760 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13762 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
13763 .standard_records
13764 .iter()
13765 .filter_map(|r| {
13766 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
13767 registry.get(&sid).cloned()
13768 })
13769 .collect();
13770
13771 builder.add_compliance_regulations(
13772 &standards,
13773 &compliance.findings,
13774 &compliance.filings,
13775 );
13776 }
13777
13778 builder.add_accounts(coa);
13780 builder.add_journal_entries_as_hyperedges(entries);
13781
13782 let hypergraph = builder.build();
13784
13785 let output_dir = self
13787 .output_path
13788 .clone()
13789 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13790 let hg_dir = output_dir
13791 .join(&self.config.graph_export.output_subdirectory)
13792 .join(&hg_settings.output_subdirectory);
13793
13794 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
13796 "unified" => {
13797 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13798 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13799 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
13800 })?;
13801 (
13802 metadata.num_nodes,
13803 metadata.num_edges,
13804 metadata.num_hyperedges,
13805 )
13806 }
13807 _ => {
13808 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
13810 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13811 SynthError::generation(format!("Hypergraph export failed: {e}"))
13812 })?;
13813 (
13814 metadata.num_nodes,
13815 metadata.num_edges,
13816 metadata.num_hyperedges,
13817 )
13818 }
13819 };
13820
13821 #[cfg(feature = "streaming")]
13823 if let Some(ref target_url) = hg_settings.stream_target {
13824 use crate::stream_client::{StreamClient, StreamConfig};
13825 use std::io::Write as _;
13826
13827 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
13828 let stream_config = StreamConfig {
13829 target_url: target_url.clone(),
13830 batch_size: hg_settings.stream_batch_size,
13831 api_key,
13832 ..StreamConfig::default()
13833 };
13834
13835 match StreamClient::new(stream_config) {
13836 Ok(mut client) => {
13837 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13838 match exporter.export_to_writer(&hypergraph, &mut client) {
13839 Ok(_) => {
13840 if let Err(e) = client.flush() {
13841 warn!("Failed to flush stream client: {}", e);
13842 } else {
13843 info!("Streamed {} records to {}", client.total_sent(), target_url);
13844 }
13845 }
13846 Err(e) => {
13847 warn!("Streaming export failed: {}", e);
13848 }
13849 }
13850 }
13851 Err(e) => {
13852 warn!("Failed to create stream client: {}", e);
13853 }
13854 }
13855 }
13856
13857 stats.graph_node_count += num_nodes;
13859 stats.graph_edge_count += num_edges;
13860 stats.graph_export_count += 1;
13861
13862 Ok(HypergraphExportInfo {
13863 node_count: num_nodes,
13864 edge_count: num_edges,
13865 hyperedge_count: num_hyperedges,
13866 output_path: hg_dir,
13867 })
13868 }
13869
13870 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
13875 let pb = self.create_progress_bar(100, "Generating Banking Data");
13876
13877 let orchestrator = BankingOrchestratorBuilder::new()
13879 .config(self.config.banking.clone())
13880 .seed(self.seed + 9000)
13881 .country_pack(self.primary_pack().clone())
13882 .build();
13883
13884 if let Some(pb) = &pb {
13885 pb.inc(10);
13886 }
13887
13888 let result = orchestrator.generate();
13890
13891 if let Some(pb) = &pb {
13892 pb.inc(90);
13893 pb.finish_with_message(format!(
13894 "Banking: {} customers, {} transactions",
13895 result.customers.len(),
13896 result.transactions.len()
13897 ));
13898 }
13899
13900 let mut banking_customers = result.customers;
13905 let core_customers = &self.master_data.customers;
13906 if !core_customers.is_empty() {
13907 for (i, bc) in banking_customers.iter_mut().enumerate() {
13908 let core = &core_customers[i % core_customers.len()];
13909 bc.name = CustomerName::business(&core.name);
13910 bc.residence_country = core.country.clone();
13911 bc.enterprise_customer_id = Some(core.customer_id.clone());
13912 }
13913 debug!(
13914 "Cross-referenced {} banking customers with {} core customers",
13915 banking_customers.len(),
13916 core_customers.len()
13917 );
13918 }
13919
13920 Ok(BankingSnapshot {
13921 customers: banking_customers,
13922 accounts: result.accounts,
13923 transactions: result.transactions,
13924 transaction_labels: result.transaction_labels,
13925 customer_labels: result.customer_labels,
13926 account_labels: result.account_labels,
13927 relationship_labels: result.relationship_labels,
13928 narratives: result.narratives,
13929 suspicious_count: result.stats.suspicious_count,
13930 scenario_count: result.scenarios.len(),
13931 })
13932 }
13933
13934 fn calculate_total_transactions(&self) -> u64 {
13936 let months = self.config.global.period_months as f64;
13937 self.config
13938 .companies
13939 .iter()
13940 .map(|c| {
13941 let annual = c.annual_transaction_volume.count() as f64;
13942 let weighted = annual * c.volume_weight;
13943 (weighted * months / 12.0) as u64
13944 })
13945 .sum()
13946 }
13947
13948 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13950 if !self.phase_config.show_progress {
13951 return None;
13952 }
13953
13954 let pb = if let Some(mp) = &self.multi_progress {
13955 mp.add(ProgressBar::new(total))
13956 } else {
13957 ProgressBar::new(total)
13958 };
13959
13960 pb.set_style(
13961 ProgressStyle::default_bar()
13962 .template(&format!(
13963 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13964 ))
13965 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13966 .progress_chars("#>-"),
13967 );
13968
13969 Some(pb)
13970 }
13971
13972 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13974 self.coa.clone()
13975 }
13976
13977 pub fn get_master_data(&self) -> &MasterDataSnapshot {
13979 &self.master_data
13980 }
13981
13982 fn phase_compliance_regulations(
13984 &mut self,
13985 _stats: &mut EnhancedGenerationStatistics,
13986 ) -> SynthResult<ComplianceRegulationsSnapshot> {
13987 if !self.phase_config.generate_compliance_regulations {
13988 return Ok(ComplianceRegulationsSnapshot::default());
13989 }
13990
13991 info!("Phase: Generating Compliance Regulations Data");
13992
13993 let cr_config = &self.config.compliance_regulations;
13994
13995 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13997 self.config
13998 .companies
13999 .iter()
14000 .map(|c| c.country.clone())
14001 .collect::<std::collections::HashSet<_>>()
14002 .into_iter()
14003 .collect()
14004 } else {
14005 cr_config.jurisdictions.clone()
14006 };
14007
14008 let fallback_date =
14010 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14011 let reference_date = cr_config
14012 .reference_date
14013 .as_ref()
14014 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14015 .unwrap_or_else(|| {
14016 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14017 .unwrap_or(fallback_date)
14018 });
14019
14020 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14022 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14023 let cross_reference_records = reg_gen.generate_cross_reference_records();
14024 let jurisdiction_records =
14025 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14026
14027 info!(
14028 " Standards: {} records, {} cross-references, {} jurisdictions",
14029 standard_records.len(),
14030 cross_reference_records.len(),
14031 jurisdiction_records.len()
14032 );
14033
14034 let audit_procedures = if cr_config.audit_procedures.enabled {
14036 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14037 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14038 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14039 confidence_level: cr_config.audit_procedures.confidence_level,
14040 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14041 };
14042 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14043 self.seed + 9000,
14044 proc_config,
14045 );
14046 let registry = reg_gen.registry();
14047 let mut all_procs = Vec::new();
14048 for jurisdiction in &jurisdictions {
14049 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14050 all_procs.extend(procs);
14051 }
14052 info!(" Audit procedures: {}", all_procs.len());
14053 all_procs
14054 } else {
14055 Vec::new()
14056 };
14057
14058 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14060 let finding_config =
14061 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14062 finding_rate: cr_config.findings.finding_rate,
14063 material_weakness_rate: cr_config.findings.material_weakness_rate,
14064 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14065 generate_remediation: cr_config.findings.generate_remediation,
14066 };
14067 let mut finding_gen =
14068 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14069 self.seed + 9100,
14070 finding_config,
14071 );
14072 let mut all_findings = Vec::new();
14073 for company in &self.config.companies {
14074 let company_findings =
14075 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14076 all_findings.extend(company_findings);
14077 }
14078 info!(" Compliance findings: {}", all_findings.len());
14079 all_findings
14080 } else {
14081 Vec::new()
14082 };
14083
14084 let filings = if cr_config.filings.enabled {
14086 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14087 filing_types: cr_config.filings.filing_types.clone(),
14088 generate_status_progression: cr_config.filings.generate_status_progression,
14089 };
14090 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14091 self.seed + 9200,
14092 filing_config,
14093 );
14094 let company_codes: Vec<String> = self
14095 .config
14096 .companies
14097 .iter()
14098 .map(|c| c.code.clone())
14099 .collect();
14100 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14101 .unwrap_or(fallback_date);
14102 let filings = filing_gen.generate_filings(
14103 &company_codes,
14104 &jurisdictions,
14105 start_date,
14106 self.config.global.period_months,
14107 );
14108 info!(" Regulatory filings: {}", filings.len());
14109 filings
14110 } else {
14111 Vec::new()
14112 };
14113
14114 let compliance_graph = if cr_config.graph.enabled {
14116 let graph_config = datasynth_graph::ComplianceGraphConfig {
14117 include_standard_nodes: cr_config.graph.include_compliance_nodes,
14118 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14119 include_cross_references: cr_config.graph.include_cross_references,
14120 include_supersession_edges: cr_config.graph.include_supersession_edges,
14121 include_account_links: cr_config.graph.include_account_links,
14122 include_control_links: cr_config.graph.include_control_links,
14123 include_company_links: cr_config.graph.include_company_links,
14124 };
14125 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14126
14127 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14129 .iter()
14130 .map(|r| datasynth_graph::StandardNodeInput {
14131 standard_id: r.standard_id.clone(),
14132 title: r.title.clone(),
14133 category: r.category.clone(),
14134 domain: r.domain.clone(),
14135 is_active: r.is_active,
14136 features: vec![if r.is_active { 1.0 } else { 0.0 }],
14137 applicable_account_types: r.applicable_account_types.clone(),
14138 applicable_processes: r.applicable_processes.clone(),
14139 })
14140 .collect();
14141 builder.add_standards(&standard_inputs);
14142
14143 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14145 jurisdiction_records
14146 .iter()
14147 .map(|r| datasynth_graph::JurisdictionNodeInput {
14148 country_code: r.country_code.clone(),
14149 country_name: r.country_name.clone(),
14150 framework: r.accounting_framework.clone(),
14151 standard_count: r.standard_count,
14152 tax_rate: r.statutory_tax_rate,
14153 })
14154 .collect();
14155 builder.add_jurisdictions(&jurisdiction_inputs);
14156
14157 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14159 cross_reference_records
14160 .iter()
14161 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14162 from_standard: r.from_standard.clone(),
14163 to_standard: r.to_standard.clone(),
14164 relationship: r.relationship.clone(),
14165 convergence_level: r.convergence_level,
14166 })
14167 .collect();
14168 builder.add_cross_references(&xref_inputs);
14169
14170 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14172 .iter()
14173 .map(|r| datasynth_graph::JurisdictionMappingInput {
14174 country_code: r.jurisdiction.clone(),
14175 standard_id: r.standard_id.clone(),
14176 })
14177 .collect();
14178 builder.add_jurisdiction_mappings(&mapping_inputs);
14179
14180 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14182 .iter()
14183 .map(|p| datasynth_graph::ProcedureNodeInput {
14184 procedure_id: p.procedure_id.clone(),
14185 standard_id: p.standard_id.clone(),
14186 procedure_type: p.procedure_type.clone(),
14187 sample_size: p.sample_size,
14188 confidence_level: p.confidence_level,
14189 })
14190 .collect();
14191 builder.add_procedures(&proc_inputs);
14192
14193 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14195 .iter()
14196 .map(|f| datasynth_graph::FindingNodeInput {
14197 finding_id: f.finding_id.to_string(),
14198 standard_id: f
14199 .related_standards
14200 .first()
14201 .map(|s| s.as_str().to_string())
14202 .unwrap_or_default(),
14203 severity: f.severity.to_string(),
14204 deficiency_level: f.deficiency_level.to_string(),
14205 severity_score: f.deficiency_level.severity_score(),
14206 control_id: f.control_id.clone(),
14207 affected_accounts: f.affected_accounts.clone(),
14208 })
14209 .collect();
14210 builder.add_findings(&finding_inputs);
14211
14212 if cr_config.graph.include_account_links {
14214 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14215 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14216 for std_record in &standard_records {
14217 if let Some(std_obj) =
14218 registry.get(&datasynth_core::models::compliance::StandardId::parse(
14219 &std_record.standard_id,
14220 ))
14221 {
14222 for acct_type in &std_obj.applicable_account_types {
14223 account_links.push(datasynth_graph::AccountLinkInput {
14224 standard_id: std_record.standard_id.clone(),
14225 account_code: acct_type.clone(),
14226 account_name: acct_type.clone(),
14227 });
14228 }
14229 }
14230 }
14231 builder.add_account_links(&account_links);
14232 }
14233
14234 if cr_config.graph.include_control_links {
14236 let mut control_links = Vec::new();
14237 let sox_like_ids: Vec<String> = standard_records
14239 .iter()
14240 .filter(|r| {
14241 r.standard_id.starts_with("SOX")
14242 || r.standard_id.starts_with("PCAOB-AS-2201")
14243 })
14244 .map(|r| r.standard_id.clone())
14245 .collect();
14246 let control_ids = [
14248 ("C001", "Cash Controls"),
14249 ("C002", "Large Transaction Approval"),
14250 ("C010", "PO Approval"),
14251 ("C011", "Three-Way Match"),
14252 ("C020", "Revenue Recognition"),
14253 ("C021", "Credit Check"),
14254 ("C030", "Manual JE Approval"),
14255 ("C031", "Period Close Review"),
14256 ("C032", "Account Reconciliation"),
14257 ("C040", "Payroll Processing"),
14258 ("C050", "Fixed Asset Capitalization"),
14259 ("C060", "Intercompany Elimination"),
14260 ];
14261 for sox_id in &sox_like_ids {
14262 for (ctrl_id, ctrl_name) in &control_ids {
14263 control_links.push(datasynth_graph::ControlLinkInput {
14264 standard_id: sox_id.clone(),
14265 control_id: ctrl_id.to_string(),
14266 control_name: ctrl_name.to_string(),
14267 });
14268 }
14269 }
14270 builder.add_control_links(&control_links);
14271 }
14272
14273 if cr_config.graph.include_company_links {
14275 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
14276 .iter()
14277 .enumerate()
14278 .map(|(i, f)| datasynth_graph::FilingNodeInput {
14279 filing_id: format!("F{:04}", i + 1),
14280 filing_type: f.filing_type.to_string(),
14281 company_code: f.company_code.clone(),
14282 jurisdiction: f.jurisdiction.clone(),
14283 status: format!("{:?}", f.status),
14284 })
14285 .collect();
14286 builder.add_filings(&filing_inputs);
14287 }
14288
14289 let graph = builder.build();
14290 info!(
14291 " Compliance graph: {} nodes, {} edges",
14292 graph.nodes.len(),
14293 graph.edges.len()
14294 );
14295 Some(graph)
14296 } else {
14297 None
14298 };
14299
14300 self.check_resources_with_log("post-compliance-regulations")?;
14301
14302 Ok(ComplianceRegulationsSnapshot {
14303 standard_records,
14304 cross_reference_records,
14305 jurisdiction_records,
14306 audit_procedures,
14307 findings,
14308 filings,
14309 compliance_graph,
14310 })
14311 }
14312
14313 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
14315 use super::lineage::LineageGraphBuilder;
14316
14317 let mut builder = LineageGraphBuilder::new();
14318
14319 builder.add_config_section("config:global", "Global Config");
14321 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
14322 builder.add_config_section("config:transactions", "Transaction Config");
14323
14324 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
14326 builder.add_generator_phase("phase:je", "Journal Entry Generation");
14327
14328 builder.configured_by("phase:coa", "config:chart_of_accounts");
14330 builder.configured_by("phase:je", "config:transactions");
14331
14332 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
14334 builder.produced_by("output:je", "phase:je");
14335
14336 if self.phase_config.generate_master_data {
14338 builder.add_config_section("config:master_data", "Master Data Config");
14339 builder.add_generator_phase("phase:master_data", "Master Data Generation");
14340 builder.configured_by("phase:master_data", "config:master_data");
14341 builder.input_to("phase:master_data", "phase:je");
14342 }
14343
14344 if self.phase_config.generate_document_flows {
14345 builder.add_config_section("config:document_flows", "Document Flow Config");
14346 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
14347 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
14348 builder.configured_by("phase:p2p", "config:document_flows");
14349 builder.configured_by("phase:o2c", "config:document_flows");
14350
14351 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
14352 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
14353 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
14354 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
14355 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
14356
14357 builder.produced_by("output:po", "phase:p2p");
14358 builder.produced_by("output:gr", "phase:p2p");
14359 builder.produced_by("output:vi", "phase:p2p");
14360 builder.produced_by("output:so", "phase:o2c");
14361 builder.produced_by("output:ci", "phase:o2c");
14362 }
14363
14364 if self.phase_config.inject_anomalies {
14365 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
14366 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
14367 builder.configured_by("phase:anomaly", "config:fraud");
14368 builder.add_output_file(
14369 "output:labels",
14370 "Anomaly Labels",
14371 "labels/anomaly_labels.csv",
14372 );
14373 builder.produced_by("output:labels", "phase:anomaly");
14374 }
14375
14376 if self.phase_config.generate_audit {
14377 builder.add_config_section("config:audit", "Audit Config");
14378 builder.add_generator_phase("phase:audit", "Audit Data Generation");
14379 builder.configured_by("phase:audit", "config:audit");
14380 }
14381
14382 if self.phase_config.generate_banking {
14383 builder.add_config_section("config:banking", "Banking Config");
14384 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
14385 builder.configured_by("phase:banking", "config:banking");
14386 }
14387
14388 if self.config.llm.enabled {
14389 builder.add_config_section("config:llm", "LLM Enrichment Config");
14390 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
14391 builder.configured_by("phase:llm_enrichment", "config:llm");
14392 }
14393
14394 if self.config.diffusion.enabled {
14395 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
14396 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
14397 builder.configured_by("phase:diffusion", "config:diffusion");
14398 }
14399
14400 if self.config.causal.enabled {
14401 builder.add_config_section("config:causal", "Causal Generation Config");
14402 builder.add_generator_phase("phase:causal", "Causal Overlay");
14403 builder.configured_by("phase:causal", "config:causal");
14404 }
14405
14406 builder.build()
14407 }
14408
14409 fn compute_company_revenue(
14418 entries: &[JournalEntry],
14419 company_code: &str,
14420 ) -> rust_decimal::Decimal {
14421 use rust_decimal::Decimal;
14422 let mut revenue = Decimal::ZERO;
14423 for je in entries {
14424 if je.header.company_code != company_code {
14425 continue;
14426 }
14427 for line in &je.lines {
14428 if line.gl_account.starts_with('4') {
14429 revenue += line.credit_amount - line.debit_amount;
14431 }
14432 }
14433 }
14434 revenue.max(Decimal::ZERO)
14435 }
14436
14437 fn compute_entity_net_assets(
14441 entries: &[JournalEntry],
14442 entity_code: &str,
14443 ) -> rust_decimal::Decimal {
14444 use rust_decimal::Decimal;
14445 let mut asset_net = Decimal::ZERO;
14446 let mut liability_net = Decimal::ZERO;
14447 for je in entries {
14448 if je.header.company_code != entity_code {
14449 continue;
14450 }
14451 for line in &je.lines {
14452 if line.gl_account.starts_with('1') {
14453 asset_net += line.debit_amount - line.credit_amount;
14454 } else if line.gl_account.starts_with('2') {
14455 liability_net += line.credit_amount - line.debit_amount;
14456 }
14457 }
14458 }
14459 asset_net - liability_net
14460 }
14461
14462 fn phase_statistical_validation(
14473 &self,
14474 entries: &[JournalEntry],
14475 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
14476 use datasynth_config::schema::StatisticalTestConfig;
14477 use datasynth_core::distributions::{
14478 run_benford_first_digit, run_chi_squared, run_ks_uniform_log, StatisticalTestResult,
14479 StatisticalValidationReport, TestOutcome,
14480 };
14481
14482 let cfg = &self.config.distributions.validation;
14483 if !cfg.enabled {
14484 return Ok(None);
14485 }
14486
14487 let amounts: Vec<rust_decimal::Decimal> = entries
14490 .iter()
14491 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
14492 .filter(|a| *a > rust_decimal::Decimal::ZERO)
14493 .collect();
14494
14495 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
14496 for test_cfg in &cfg.tests {
14497 match test_cfg {
14498 StatisticalTestConfig::BenfordFirstDigit {
14499 threshold_mad,
14500 warning_mad,
14501 } => {
14502 results.push(run_benford_first_digit(
14503 &amounts,
14504 *threshold_mad,
14505 *warning_mad,
14506 ));
14507 }
14508 StatisticalTestConfig::ChiSquared { bins, significance } => {
14509 results.push(run_chi_squared(&amounts, *bins, *significance));
14510 }
14511 StatisticalTestConfig::DistributionFit {
14512 target: _,
14513 ks_significance,
14514 method: _,
14515 } => {
14516 results.push(run_ks_uniform_log(&amounts, *ks_significance));
14519 }
14520 StatisticalTestConfig::CorrelationCheck { .. }
14521 | StatisticalTestConfig::AndersonDarling { .. } => {
14522 results.push(StatisticalTestResult {
14523 name: match test_cfg {
14524 StatisticalTestConfig::CorrelationCheck { .. } => "correlation_check",
14525 StatisticalTestConfig::AndersonDarling { .. } => "anderson_darling",
14526 _ => "unknown",
14527 }
14528 .to_string(),
14529 outcome: TestOutcome::Skipped,
14530 statistic: 0.0,
14531 threshold: 0.0,
14532 message: "not implemented in v3.5.1; scheduled for follow-up".to_string(),
14533 });
14534 }
14535 }
14536 }
14537
14538 let report = StatisticalValidationReport {
14539 sample_count: amounts.len(),
14540 results,
14541 };
14542
14543 if cfg.reporting.fail_on_error && !report.all_passed() {
14544 let failed = report.failed_names().join(", ");
14545 return Err(SynthError::validation(format!(
14546 "statistical validation failed: {failed}"
14547 )));
14548 }
14549
14550 Ok(Some(report))
14551 }
14552
14553 fn phase_analytics_metadata(
14566 &mut self,
14567 entries: &[JournalEntry],
14568 ) -> SynthResult<AnalyticsMetadataSnapshot> {
14569 use datasynth_generators::drift_event_generator::DriftEventGenerator;
14570 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
14571 use datasynth_generators::management_report_generator::ManagementReportGenerator;
14572 use datasynth_generators::prior_year_generator::PriorYearGenerator;
14573 use std::collections::BTreeMap;
14574
14575 let mut snap = AnalyticsMetadataSnapshot::default();
14576
14577 if !self.phase_config.generate_analytics_metadata {
14578 return Ok(snap);
14579 }
14580
14581 let cfg = &self.config.analytics_metadata;
14582 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14583 .map(|d| d.year())
14584 .unwrap_or(2025);
14585
14586 if cfg.prior_year {
14588 let mut gen = PriorYearGenerator::new(self.seed + 9100);
14589 for company in &self.config.companies {
14590 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
14593 BTreeMap::new();
14594 for je in entries {
14595 if je.header.company_code != company.code {
14596 continue;
14597 }
14598 for line in &je.lines {
14599 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
14600 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
14601 });
14602 entry.1 += line.debit_amount - line.credit_amount;
14603 }
14604 }
14605 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
14606 .into_iter()
14607 .filter(|(_, (_, bal))| !bal.is_zero())
14608 .map(|(code, (name, bal))| (code, name, bal))
14609 .collect();
14610 if !current.is_empty() {
14611 let comparatives =
14612 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
14613 snap.prior_year_comparatives.extend(comparatives);
14614 }
14615 }
14616 info!(
14617 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
14618 snap.prior_year_comparatives.len(),
14619 self.config.companies.len()
14620 );
14621 }
14622
14623 if cfg.industry_benchmark {
14625 use datasynth_core::models::IndustrySector;
14626 let industry = match self.config.global.industry {
14627 IndustrySector::Manufacturing => "manufacturing",
14628 IndustrySector::Retail => "retail",
14629 IndustrySector::FinancialServices => "financial_services",
14630 IndustrySector::Technology => "technology",
14631 IndustrySector::Healthcare => "healthcare",
14632 _ => "other",
14633 };
14634 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
14635 let benchmarks = gen.generate(industry, fiscal_year);
14636 info!(
14637 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
14638 benchmarks.len()
14639 );
14640 snap.industry_benchmarks = benchmarks;
14641 }
14642
14643 if cfg.management_reports {
14645 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
14646 let period_months = self.config.global.period_months;
14647 for company in &self.config.companies {
14648 let reports =
14649 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
14650 snap.management_reports.extend(reports);
14651 }
14652 info!(
14653 "v3.3.0 analytics: {} management reports across {} companies",
14654 snap.management_reports.len(),
14655 self.config.companies.len()
14656 );
14657 }
14658
14659 if cfg.drift_events {
14661 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
14662 .expect("hardcoded NaiveDate 2025-01-01 is valid");
14663 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14664 .unwrap_or(fallback_start);
14665 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
14666 let mut gen = DriftEventGenerator::new(self.seed + 9400);
14667 let drifts = gen.generate_standalone_drifts(start_date, end_date);
14668 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
14669 snap.drift_events = drifts;
14670 }
14671 let _ = entries;
14673
14674 Ok(snap)
14675 }
14676}
14677
14678fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
14680 match format {
14681 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
14682 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
14683 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
14684 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
14685 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
14686 }
14687}
14688
14689fn compute_trial_balance_entries(
14694 entries: &[JournalEntry],
14695 entity_code: &str,
14696 fiscal_year: i32,
14697 coa: Option<&ChartOfAccounts>,
14698) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
14699 use std::collections::BTreeMap;
14700
14701 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
14702 BTreeMap::new();
14703
14704 for je in entries {
14705 for line in &je.lines {
14706 let entry = balances.entry(line.account_code.clone()).or_default();
14707 entry.0 += line.debit_amount;
14708 entry.1 += line.credit_amount;
14709 }
14710 }
14711
14712 balances
14713 .into_iter()
14714 .map(
14715 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
14716 account_description: coa
14717 .and_then(|c| c.get_account(&account_code))
14718 .map(|a| a.description().to_string())
14719 .unwrap_or_else(|| account_code.clone()),
14720 account_code,
14721 debit_balance: debit,
14722 credit_balance: credit,
14723 net_balance: debit - credit,
14724 entity_code: entity_code.to_string(),
14725 period: format!("FY{}", fiscal_year),
14726 },
14727 )
14728 .collect()
14729}
14730
14731#[cfg(test)]
14732#[allow(clippy::unwrap_used)]
14733mod tests {
14734 use super::*;
14735 use datasynth_config::schema::*;
14736
14737 fn create_test_config() -> GeneratorConfig {
14738 GeneratorConfig {
14739 global: GlobalConfig {
14740 industry: IndustrySector::Manufacturing,
14741 start_date: "2024-01-01".to_string(),
14742 period_months: 1,
14743 seed: Some(42),
14744 parallel: false,
14745 group_currency: "USD".to_string(),
14746 presentation_currency: None,
14747 worker_threads: 0,
14748 memory_limit_mb: 0,
14749 fiscal_year_months: None,
14750 },
14751 companies: vec![CompanyConfig {
14752 code: "1000".to_string(),
14753 name: "Test Company".to_string(),
14754 currency: "USD".to_string(),
14755 functional_currency: None,
14756 country: "US".to_string(),
14757 annual_transaction_volume: TransactionVolume::TenK,
14758 volume_weight: 1.0,
14759 fiscal_year_variant: "K4".to_string(),
14760 }],
14761 chart_of_accounts: ChartOfAccountsConfig {
14762 complexity: CoAComplexity::Small,
14763 industry_specific: true,
14764 custom_accounts: None,
14765 min_hierarchy_depth: 2,
14766 max_hierarchy_depth: 4,
14767 },
14768 transactions: TransactionConfig::default(),
14769 output: OutputConfig::default(),
14770 fraud: FraudConfig::default(),
14771 internal_controls: InternalControlsConfig::default(),
14772 business_processes: BusinessProcessConfig::default(),
14773 user_personas: UserPersonaConfig::default(),
14774 templates: TemplateConfig::default(),
14775 approval: ApprovalConfig::default(),
14776 departments: DepartmentConfig::default(),
14777 master_data: MasterDataConfig::default(),
14778 document_flows: DocumentFlowConfig::default(),
14779 intercompany: IntercompanyConfig::default(),
14780 balance: BalanceConfig::default(),
14781 ocpm: OcpmConfig::default(),
14782 audit: AuditGenerationConfig::default(),
14783 banking: datasynth_banking::BankingConfig::default(),
14784 data_quality: DataQualitySchemaConfig::default(),
14785 scenario: ScenarioConfig::default(),
14786 temporal: TemporalDriftConfig::default(),
14787 graph_export: GraphExportConfig::default(),
14788 streaming: StreamingSchemaConfig::default(),
14789 rate_limit: RateLimitSchemaConfig::default(),
14790 temporal_attributes: TemporalAttributeSchemaConfig::default(),
14791 relationships: RelationshipSchemaConfig::default(),
14792 accounting_standards: AccountingStandardsConfig::default(),
14793 audit_standards: AuditStandardsConfig::default(),
14794 distributions: Default::default(),
14795 temporal_patterns: Default::default(),
14796 vendor_network: VendorNetworkSchemaConfig::default(),
14797 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
14798 relationship_strength: RelationshipStrengthSchemaConfig::default(),
14799 cross_process_links: CrossProcessLinksSchemaConfig::default(),
14800 organizational_events: OrganizationalEventsSchemaConfig::default(),
14801 behavioral_drift: BehavioralDriftSchemaConfig::default(),
14802 market_drift: MarketDriftSchemaConfig::default(),
14803 drift_labeling: DriftLabelingSchemaConfig::default(),
14804 anomaly_injection: Default::default(),
14805 industry_specific: Default::default(),
14806 fingerprint_privacy: Default::default(),
14807 quality_gates: Default::default(),
14808 compliance: Default::default(),
14809 webhooks: Default::default(),
14810 llm: Default::default(),
14811 diffusion: Default::default(),
14812 causal: Default::default(),
14813 source_to_pay: Default::default(),
14814 financial_reporting: Default::default(),
14815 hr: Default::default(),
14816 manufacturing: Default::default(),
14817 sales_quotes: Default::default(),
14818 tax: Default::default(),
14819 treasury: Default::default(),
14820 project_accounting: Default::default(),
14821 esg: Default::default(),
14822 country_packs: None,
14823 scenarios: Default::default(),
14824 session: Default::default(),
14825 compliance_regulations: Default::default(),
14826 analytics_metadata: Default::default(),
14827 }
14828 }
14829
14830 #[test]
14831 fn test_enhanced_orchestrator_creation() {
14832 let config = create_test_config();
14833 let orchestrator = EnhancedOrchestrator::with_defaults(config);
14834 assert!(orchestrator.is_ok());
14835 }
14836
14837 #[test]
14838 fn test_minimal_generation() {
14839 let config = create_test_config();
14840 let phase_config = PhaseConfig {
14841 generate_master_data: false,
14842 generate_document_flows: false,
14843 generate_journal_entries: true,
14844 inject_anomalies: false,
14845 show_progress: false,
14846 ..Default::default()
14847 };
14848
14849 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14850 let result = orchestrator.generate();
14851
14852 assert!(result.is_ok());
14853 let result = result.unwrap();
14854 assert!(!result.journal_entries.is_empty());
14855 }
14856
14857 #[test]
14858 fn test_master_data_generation() {
14859 let config = create_test_config();
14860 let phase_config = PhaseConfig {
14861 generate_master_data: true,
14862 generate_document_flows: false,
14863 generate_journal_entries: false,
14864 inject_anomalies: false,
14865 show_progress: false,
14866 vendors_per_company: 5,
14867 customers_per_company: 5,
14868 materials_per_company: 10,
14869 assets_per_company: 5,
14870 employees_per_company: 10,
14871 ..Default::default()
14872 };
14873
14874 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14875 let result = orchestrator.generate().unwrap();
14876
14877 assert!(!result.master_data.vendors.is_empty());
14878 assert!(!result.master_data.customers.is_empty());
14879 assert!(!result.master_data.materials.is_empty());
14880 }
14881
14882 #[test]
14883 fn test_document_flow_generation() {
14884 let config = create_test_config();
14885 let phase_config = PhaseConfig {
14886 generate_master_data: true,
14887 generate_document_flows: true,
14888 generate_journal_entries: false,
14889 inject_anomalies: false,
14890 inject_data_quality: false,
14891 validate_balances: false,
14892 generate_ocpm_events: false,
14893 show_progress: false,
14894 vendors_per_company: 5,
14895 customers_per_company: 5,
14896 materials_per_company: 10,
14897 assets_per_company: 5,
14898 employees_per_company: 10,
14899 p2p_chains: 5,
14900 o2c_chains: 5,
14901 ..Default::default()
14902 };
14903
14904 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14905 let result = orchestrator.generate().unwrap();
14906
14907 assert!(!result.document_flows.p2p_chains.is_empty());
14909 assert!(!result.document_flows.o2c_chains.is_empty());
14910
14911 assert!(!result.document_flows.purchase_orders.is_empty());
14913 assert!(!result.document_flows.sales_orders.is_empty());
14914 }
14915
14916 #[test]
14917 fn test_anomaly_injection() {
14918 let config = create_test_config();
14919 let phase_config = PhaseConfig {
14920 generate_master_data: false,
14921 generate_document_flows: false,
14922 generate_journal_entries: true,
14923 inject_anomalies: true,
14924 show_progress: false,
14925 ..Default::default()
14926 };
14927
14928 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14929 let result = orchestrator.generate().unwrap();
14930
14931 assert!(!result.journal_entries.is_empty());
14933
14934 assert!(result.anomaly_labels.summary.is_some());
14937 }
14938
14939 #[test]
14940 fn test_full_generation_pipeline() {
14941 let config = create_test_config();
14942 let phase_config = PhaseConfig {
14943 generate_master_data: true,
14944 generate_document_flows: true,
14945 generate_journal_entries: true,
14946 inject_anomalies: false,
14947 inject_data_quality: false,
14948 validate_balances: true,
14949 generate_ocpm_events: false,
14950 show_progress: false,
14951 vendors_per_company: 3,
14952 customers_per_company: 3,
14953 materials_per_company: 5,
14954 assets_per_company: 3,
14955 employees_per_company: 5,
14956 p2p_chains: 3,
14957 o2c_chains: 3,
14958 ..Default::default()
14959 };
14960
14961 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14962 let result = orchestrator.generate().unwrap();
14963
14964 assert!(!result.master_data.vendors.is_empty());
14966 assert!(!result.master_data.customers.is_empty());
14967 assert!(!result.document_flows.p2p_chains.is_empty());
14968 assert!(!result.document_flows.o2c_chains.is_empty());
14969 assert!(!result.journal_entries.is_empty());
14970 assert!(result.statistics.accounts_count > 0);
14971
14972 assert!(!result.subledger.ap_invoices.is_empty());
14974 assert!(!result.subledger.ar_invoices.is_empty());
14975
14976 assert!(result.balance_validation.validated);
14978 assert!(result.balance_validation.entries_processed > 0);
14979 }
14980
14981 #[test]
14982 fn test_subledger_linking() {
14983 let config = create_test_config();
14984 let phase_config = PhaseConfig {
14985 generate_master_data: true,
14986 generate_document_flows: true,
14987 generate_journal_entries: false,
14988 inject_anomalies: false,
14989 inject_data_quality: false,
14990 validate_balances: false,
14991 generate_ocpm_events: false,
14992 show_progress: false,
14993 vendors_per_company: 5,
14994 customers_per_company: 5,
14995 materials_per_company: 10,
14996 assets_per_company: 3,
14997 employees_per_company: 5,
14998 p2p_chains: 5,
14999 o2c_chains: 5,
15000 ..Default::default()
15001 };
15002
15003 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15004 let result = orchestrator.generate().unwrap();
15005
15006 assert!(!result.document_flows.vendor_invoices.is_empty());
15008 assert!(!result.document_flows.customer_invoices.is_empty());
15009
15010 assert!(!result.subledger.ap_invoices.is_empty());
15012 assert!(!result.subledger.ar_invoices.is_empty());
15013
15014 assert_eq!(
15016 result.subledger.ap_invoices.len(),
15017 result.document_flows.vendor_invoices.len()
15018 );
15019
15020 assert_eq!(
15022 result.subledger.ar_invoices.len(),
15023 result.document_flows.customer_invoices.len()
15024 );
15025
15026 assert_eq!(
15028 result.statistics.ap_invoice_count,
15029 result.subledger.ap_invoices.len()
15030 );
15031 assert_eq!(
15032 result.statistics.ar_invoice_count,
15033 result.subledger.ar_invoices.len()
15034 );
15035 }
15036
15037 #[test]
15038 fn test_balance_validation() {
15039 let config = create_test_config();
15040 let phase_config = PhaseConfig {
15041 generate_master_data: false,
15042 generate_document_flows: false,
15043 generate_journal_entries: true,
15044 inject_anomalies: false,
15045 validate_balances: true,
15046 show_progress: false,
15047 ..Default::default()
15048 };
15049
15050 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15051 let result = orchestrator.generate().unwrap();
15052
15053 assert!(result.balance_validation.validated);
15055 assert!(result.balance_validation.entries_processed > 0);
15056
15057 assert!(!result.balance_validation.has_unbalanced_entries);
15059
15060 assert_eq!(
15062 result.balance_validation.total_debits,
15063 result.balance_validation.total_credits
15064 );
15065 }
15066
15067 #[test]
15068 fn test_statistics_accuracy() {
15069 let config = create_test_config();
15070 let phase_config = PhaseConfig {
15071 generate_master_data: true,
15072 generate_document_flows: false,
15073 generate_journal_entries: true,
15074 inject_anomalies: false,
15075 show_progress: false,
15076 vendors_per_company: 10,
15077 customers_per_company: 20,
15078 materials_per_company: 15,
15079 assets_per_company: 5,
15080 employees_per_company: 8,
15081 ..Default::default()
15082 };
15083
15084 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15085 let result = orchestrator.generate().unwrap();
15086
15087 assert_eq!(
15089 result.statistics.vendor_count,
15090 result.master_data.vendors.len()
15091 );
15092 assert_eq!(
15093 result.statistics.customer_count,
15094 result.master_data.customers.len()
15095 );
15096 assert_eq!(
15097 result.statistics.material_count,
15098 result.master_data.materials.len()
15099 );
15100 assert_eq!(
15101 result.statistics.total_entries as usize,
15102 result.journal_entries.len()
15103 );
15104 }
15105
15106 #[test]
15107 fn test_phase_config_defaults() {
15108 let config = PhaseConfig::default();
15109 assert!(config.generate_master_data);
15110 assert!(config.generate_document_flows);
15111 assert!(config.generate_journal_entries);
15112 assert!(!config.inject_anomalies);
15113 assert!(config.validate_balances);
15114 assert!(config.show_progress);
15115 assert!(config.vendors_per_company > 0);
15116 assert!(config.customers_per_company > 0);
15117 }
15118
15119 #[test]
15120 fn test_get_coa_before_generation() {
15121 let config = create_test_config();
15122 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15123
15124 assert!(orchestrator.get_coa().is_none());
15126 }
15127
15128 #[test]
15129 fn test_get_coa_after_generation() {
15130 let config = create_test_config();
15131 let phase_config = PhaseConfig {
15132 generate_master_data: false,
15133 generate_document_flows: false,
15134 generate_journal_entries: true,
15135 inject_anomalies: false,
15136 show_progress: false,
15137 ..Default::default()
15138 };
15139
15140 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15141 let _ = orchestrator.generate().unwrap();
15142
15143 assert!(orchestrator.get_coa().is_some());
15145 }
15146
15147 #[test]
15148 fn test_get_master_data() {
15149 let config = create_test_config();
15150 let phase_config = PhaseConfig {
15151 generate_master_data: true,
15152 generate_document_flows: false,
15153 generate_journal_entries: false,
15154 inject_anomalies: false,
15155 show_progress: false,
15156 vendors_per_company: 5,
15157 customers_per_company: 5,
15158 materials_per_company: 5,
15159 assets_per_company: 5,
15160 employees_per_company: 5,
15161 ..Default::default()
15162 };
15163
15164 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15165 let result = orchestrator.generate().unwrap();
15166
15167 assert!(!result.master_data.vendors.is_empty());
15169 }
15170
15171 #[test]
15172 fn test_with_progress_builder() {
15173 let config = create_test_config();
15174 let orchestrator = EnhancedOrchestrator::with_defaults(config)
15175 .unwrap()
15176 .with_progress(false);
15177
15178 assert!(!orchestrator.phase_config.show_progress);
15180 }
15181
15182 #[test]
15183 fn test_multi_company_generation() {
15184 let mut config = create_test_config();
15185 config.companies.push(CompanyConfig {
15186 code: "2000".to_string(),
15187 name: "Subsidiary".to_string(),
15188 currency: "EUR".to_string(),
15189 functional_currency: None,
15190 country: "DE".to_string(),
15191 annual_transaction_volume: TransactionVolume::TenK,
15192 volume_weight: 0.5,
15193 fiscal_year_variant: "K4".to_string(),
15194 });
15195
15196 let phase_config = PhaseConfig {
15197 generate_master_data: true,
15198 generate_document_flows: false,
15199 generate_journal_entries: true,
15200 inject_anomalies: false,
15201 show_progress: false,
15202 vendors_per_company: 5,
15203 customers_per_company: 5,
15204 materials_per_company: 5,
15205 assets_per_company: 5,
15206 employees_per_company: 5,
15207 ..Default::default()
15208 };
15209
15210 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15211 let result = orchestrator.generate().unwrap();
15212
15213 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
15216 assert!(result.statistics.companies_count == 2);
15217 }
15218
15219 #[test]
15220 fn test_empty_master_data_skips_document_flows() {
15221 let config = create_test_config();
15222 let phase_config = PhaseConfig {
15223 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
15226 inject_anomalies: false,
15227 show_progress: false,
15228 ..Default::default()
15229 };
15230
15231 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15232 let result = orchestrator.generate().unwrap();
15233
15234 assert!(result.document_flows.p2p_chains.is_empty());
15236 assert!(result.document_flows.o2c_chains.is_empty());
15237 }
15238
15239 #[test]
15240 fn test_journal_entry_line_item_count() {
15241 let config = create_test_config();
15242 let phase_config = PhaseConfig {
15243 generate_master_data: false,
15244 generate_document_flows: false,
15245 generate_journal_entries: true,
15246 inject_anomalies: false,
15247 show_progress: false,
15248 ..Default::default()
15249 };
15250
15251 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15252 let result = orchestrator.generate().unwrap();
15253
15254 let calculated_line_items: u64 = result
15256 .journal_entries
15257 .iter()
15258 .map(|e| e.line_count() as u64)
15259 .sum();
15260 assert_eq!(result.statistics.total_line_items, calculated_line_items);
15261 }
15262
15263 #[test]
15264 fn test_audit_generation() {
15265 let config = create_test_config();
15266 let phase_config = PhaseConfig {
15267 generate_master_data: false,
15268 generate_document_flows: false,
15269 generate_journal_entries: true,
15270 inject_anomalies: false,
15271 show_progress: false,
15272 generate_audit: true,
15273 audit_engagements: 2,
15274 workpapers_per_engagement: 5,
15275 evidence_per_workpaper: 2,
15276 risks_per_engagement: 3,
15277 findings_per_engagement: 2,
15278 judgments_per_engagement: 2,
15279 ..Default::default()
15280 };
15281
15282 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15283 let result = orchestrator.generate().unwrap();
15284
15285 assert_eq!(result.audit.engagements.len(), 2);
15287 assert!(!result.audit.workpapers.is_empty());
15288 assert!(!result.audit.evidence.is_empty());
15289 assert!(!result.audit.risk_assessments.is_empty());
15290 assert!(!result.audit.findings.is_empty());
15291 assert!(!result.audit.judgments.is_empty());
15292
15293 assert!(
15295 !result.audit.confirmations.is_empty(),
15296 "ISA 505 confirmations should be generated"
15297 );
15298 assert!(
15299 !result.audit.confirmation_responses.is_empty(),
15300 "ISA 505 confirmation responses should be generated"
15301 );
15302 assert!(
15303 !result.audit.procedure_steps.is_empty(),
15304 "ISA 330 procedure steps should be generated"
15305 );
15306 assert!(
15308 !result.audit.analytical_results.is_empty(),
15309 "ISA 520 analytical procedures should be generated"
15310 );
15311 assert!(
15312 !result.audit.ia_functions.is_empty(),
15313 "ISA 610 IA functions should be generated (one per engagement)"
15314 );
15315 assert!(
15316 !result.audit.related_parties.is_empty(),
15317 "ISA 550 related parties should be generated"
15318 );
15319
15320 assert_eq!(
15322 result.statistics.audit_engagement_count,
15323 result.audit.engagements.len()
15324 );
15325 assert_eq!(
15326 result.statistics.audit_workpaper_count,
15327 result.audit.workpapers.len()
15328 );
15329 assert_eq!(
15330 result.statistics.audit_evidence_count,
15331 result.audit.evidence.len()
15332 );
15333 assert_eq!(
15334 result.statistics.audit_risk_count,
15335 result.audit.risk_assessments.len()
15336 );
15337 assert_eq!(
15338 result.statistics.audit_finding_count,
15339 result.audit.findings.len()
15340 );
15341 assert_eq!(
15342 result.statistics.audit_judgment_count,
15343 result.audit.judgments.len()
15344 );
15345 assert_eq!(
15346 result.statistics.audit_confirmation_count,
15347 result.audit.confirmations.len()
15348 );
15349 assert_eq!(
15350 result.statistics.audit_confirmation_response_count,
15351 result.audit.confirmation_responses.len()
15352 );
15353 assert_eq!(
15354 result.statistics.audit_procedure_step_count,
15355 result.audit.procedure_steps.len()
15356 );
15357 assert_eq!(
15358 result.statistics.audit_sample_count,
15359 result.audit.samples.len()
15360 );
15361 assert_eq!(
15362 result.statistics.audit_analytical_result_count,
15363 result.audit.analytical_results.len()
15364 );
15365 assert_eq!(
15366 result.statistics.audit_ia_function_count,
15367 result.audit.ia_functions.len()
15368 );
15369 assert_eq!(
15370 result.statistics.audit_ia_report_count,
15371 result.audit.ia_reports.len()
15372 );
15373 assert_eq!(
15374 result.statistics.audit_related_party_count,
15375 result.audit.related_parties.len()
15376 );
15377 assert_eq!(
15378 result.statistics.audit_related_party_transaction_count,
15379 result.audit.related_party_transactions.len()
15380 );
15381 }
15382
15383 #[test]
15384 fn test_new_phases_disabled_by_default() {
15385 let config = create_test_config();
15386 assert!(!config.llm.enabled);
15388 assert!(!config.diffusion.enabled);
15389 assert!(!config.causal.enabled);
15390
15391 let phase_config = PhaseConfig {
15392 generate_master_data: false,
15393 generate_document_flows: false,
15394 generate_journal_entries: true,
15395 inject_anomalies: false,
15396 show_progress: false,
15397 ..Default::default()
15398 };
15399
15400 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15401 let result = orchestrator.generate().unwrap();
15402
15403 assert_eq!(result.statistics.llm_enrichment_ms, 0);
15405 assert_eq!(result.statistics.llm_vendors_enriched, 0);
15406 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
15407 assert_eq!(result.statistics.diffusion_samples_generated, 0);
15408 assert_eq!(result.statistics.causal_generation_ms, 0);
15409 assert_eq!(result.statistics.causal_samples_generated, 0);
15410 assert!(result.statistics.causal_validation_passed.is_none());
15411 assert_eq!(result.statistics.counterfactual_pair_count, 0);
15412 assert!(result.counterfactual_pairs.is_empty());
15413 }
15414
15415 #[test]
15416 fn test_counterfactual_generation_enabled() {
15417 let config = create_test_config();
15418 let phase_config = PhaseConfig {
15419 generate_master_data: false,
15420 generate_document_flows: false,
15421 generate_journal_entries: true,
15422 inject_anomalies: false,
15423 show_progress: false,
15424 generate_counterfactuals: true,
15425 generate_period_close: false, ..Default::default()
15427 };
15428
15429 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15430 let result = orchestrator.generate().unwrap();
15431
15432 if !result.journal_entries.is_empty() {
15434 assert_eq!(
15435 result.counterfactual_pairs.len(),
15436 result.journal_entries.len()
15437 );
15438 assert_eq!(
15439 result.statistics.counterfactual_pair_count,
15440 result.journal_entries.len()
15441 );
15442 let ids: std::collections::HashSet<_> = result
15444 .counterfactual_pairs
15445 .iter()
15446 .map(|p| p.pair_id.clone())
15447 .collect();
15448 assert_eq!(ids.len(), result.counterfactual_pairs.len());
15449 }
15450 }
15451
15452 #[test]
15453 fn test_llm_enrichment_enabled() {
15454 let mut config = create_test_config();
15455 config.llm.enabled = true;
15456 config.llm.max_vendor_enrichments = 3;
15457
15458 let phase_config = PhaseConfig {
15459 generate_master_data: true,
15460 generate_document_flows: false,
15461 generate_journal_entries: false,
15462 inject_anomalies: false,
15463 show_progress: false,
15464 vendors_per_company: 5,
15465 customers_per_company: 3,
15466 materials_per_company: 3,
15467 assets_per_company: 3,
15468 employees_per_company: 3,
15469 ..Default::default()
15470 };
15471
15472 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15473 let result = orchestrator.generate().unwrap();
15474
15475 assert!(result.statistics.llm_vendors_enriched > 0);
15477 assert!(result.statistics.llm_vendors_enriched <= 3);
15478 }
15479
15480 #[test]
15481 fn test_diffusion_enhancement_enabled() {
15482 let mut config = create_test_config();
15483 config.diffusion.enabled = true;
15484 config.diffusion.n_steps = 50;
15485 config.diffusion.sample_size = 20;
15486
15487 let phase_config = PhaseConfig {
15488 generate_master_data: false,
15489 generate_document_flows: false,
15490 generate_journal_entries: true,
15491 inject_anomalies: false,
15492 show_progress: false,
15493 ..Default::default()
15494 };
15495
15496 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15497 let result = orchestrator.generate().unwrap();
15498
15499 assert_eq!(result.statistics.diffusion_samples_generated, 20);
15501 }
15502
15503 #[test]
15504 fn test_causal_overlay_enabled() {
15505 let mut config = create_test_config();
15506 config.causal.enabled = true;
15507 config.causal.template = "fraud_detection".to_string();
15508 config.causal.sample_size = 100;
15509 config.causal.validate = true;
15510
15511 let phase_config = PhaseConfig {
15512 generate_master_data: false,
15513 generate_document_flows: false,
15514 generate_journal_entries: true,
15515 inject_anomalies: false,
15516 show_progress: false,
15517 ..Default::default()
15518 };
15519
15520 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15521 let result = orchestrator.generate().unwrap();
15522
15523 assert_eq!(result.statistics.causal_samples_generated, 100);
15525 assert!(result.statistics.causal_validation_passed.is_some());
15527 }
15528
15529 #[test]
15530 fn test_causal_overlay_revenue_cycle_template() {
15531 let mut config = create_test_config();
15532 config.causal.enabled = true;
15533 config.causal.template = "revenue_cycle".to_string();
15534 config.causal.sample_size = 50;
15535 config.causal.validate = false;
15536
15537 let phase_config = PhaseConfig {
15538 generate_master_data: false,
15539 generate_document_flows: false,
15540 generate_journal_entries: true,
15541 inject_anomalies: false,
15542 show_progress: false,
15543 ..Default::default()
15544 };
15545
15546 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15547 let result = orchestrator.generate().unwrap();
15548
15549 assert_eq!(result.statistics.causal_samples_generated, 50);
15551 assert!(result.statistics.causal_validation_passed.is_none());
15553 }
15554
15555 #[test]
15556 fn test_all_new_phases_enabled_together() {
15557 let mut config = create_test_config();
15558 config.llm.enabled = true;
15559 config.llm.max_vendor_enrichments = 2;
15560 config.diffusion.enabled = true;
15561 config.diffusion.n_steps = 20;
15562 config.diffusion.sample_size = 10;
15563 config.causal.enabled = true;
15564 config.causal.sample_size = 50;
15565 config.causal.validate = true;
15566
15567 let phase_config = PhaseConfig {
15568 generate_master_data: true,
15569 generate_document_flows: false,
15570 generate_journal_entries: true,
15571 inject_anomalies: false,
15572 show_progress: false,
15573 vendors_per_company: 5,
15574 customers_per_company: 3,
15575 materials_per_company: 3,
15576 assets_per_company: 3,
15577 employees_per_company: 3,
15578 ..Default::default()
15579 };
15580
15581 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15582 let result = orchestrator.generate().unwrap();
15583
15584 assert!(result.statistics.llm_vendors_enriched > 0);
15586 assert_eq!(result.statistics.diffusion_samples_generated, 10);
15587 assert_eq!(result.statistics.causal_samples_generated, 50);
15588 assert!(result.statistics.causal_validation_passed.is_some());
15589 }
15590
15591 #[test]
15592 fn test_statistics_serialization_with_new_fields() {
15593 let stats = EnhancedGenerationStatistics {
15594 total_entries: 100,
15595 total_line_items: 500,
15596 llm_enrichment_ms: 42,
15597 llm_vendors_enriched: 10,
15598 diffusion_enhancement_ms: 100,
15599 diffusion_samples_generated: 50,
15600 causal_generation_ms: 200,
15601 causal_samples_generated: 100,
15602 causal_validation_passed: Some(true),
15603 ..Default::default()
15604 };
15605
15606 let json = serde_json::to_string(&stats).unwrap();
15607 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
15608
15609 assert_eq!(deserialized.llm_enrichment_ms, 42);
15610 assert_eq!(deserialized.llm_vendors_enriched, 10);
15611 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
15612 assert_eq!(deserialized.diffusion_samples_generated, 50);
15613 assert_eq!(deserialized.causal_generation_ms, 200);
15614 assert_eq!(deserialized.causal_samples_generated, 100);
15615 assert_eq!(deserialized.causal_validation_passed, Some(true));
15616 }
15617
15618 #[test]
15619 fn test_statistics_backward_compat_deserialization() {
15620 let old_json = r#"{
15622 "total_entries": 100,
15623 "total_line_items": 500,
15624 "accounts_count": 50,
15625 "companies_count": 1,
15626 "period_months": 12,
15627 "vendor_count": 10,
15628 "customer_count": 20,
15629 "material_count": 15,
15630 "asset_count": 5,
15631 "employee_count": 8,
15632 "p2p_chain_count": 5,
15633 "o2c_chain_count": 5,
15634 "ap_invoice_count": 5,
15635 "ar_invoice_count": 5,
15636 "ocpm_event_count": 0,
15637 "ocpm_object_count": 0,
15638 "ocpm_case_count": 0,
15639 "audit_engagement_count": 0,
15640 "audit_workpaper_count": 0,
15641 "audit_evidence_count": 0,
15642 "audit_risk_count": 0,
15643 "audit_finding_count": 0,
15644 "audit_judgment_count": 0,
15645 "anomalies_injected": 0,
15646 "data_quality_issues": 0,
15647 "banking_customer_count": 0,
15648 "banking_account_count": 0,
15649 "banking_transaction_count": 0,
15650 "banking_suspicious_count": 0,
15651 "graph_export_count": 0,
15652 "graph_node_count": 0,
15653 "graph_edge_count": 0
15654 }"#;
15655
15656 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
15657
15658 assert_eq!(stats.llm_enrichment_ms, 0);
15660 assert_eq!(stats.llm_vendors_enriched, 0);
15661 assert_eq!(stats.diffusion_enhancement_ms, 0);
15662 assert_eq!(stats.diffusion_samples_generated, 0);
15663 assert_eq!(stats.causal_generation_ms, 0);
15664 assert_eq!(stats.causal_samples_generated, 0);
15665 assert!(stats.causal_validation_passed.is_none());
15666 }
15667}