1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338 fn default() -> Self {
339 Self {
340 generate_master_data: true,
341 generate_document_flows: true,
342 generate_ocpm_events: false, generate_journal_entries: true,
344 inject_anomalies: false,
345 inject_data_quality: false, validate_balances: true,
347 show_progress: true,
348 vendors_per_company: 50,
349 customers_per_company: 100,
350 materials_per_company: 200,
351 assets_per_company: 50,
352 employees_per_company: 100,
353 p2p_chains: 100,
354 o2c_chains: 100,
355 generate_audit: false, audit_engagements: 5,
357 workpapers_per_engagement: 20,
358 evidence_per_workpaper: 5,
359 risks_per_engagement: 15,
360 findings_per_engagement: 8,
361 judgments_per_engagement: 10,
362 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
381 }
382}
383
384impl PhaseConfig {
385 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390 Self {
391 generate_master_data: true,
393 generate_document_flows: true,
394 generate_journal_entries: true,
395 validate_balances: true,
396 generate_period_close: true,
397 generate_evolution_events: true,
398 show_progress: true,
399
400 generate_audit: cfg.audit.enabled,
402 generate_banking: cfg.banking.enabled,
403 generate_graph_export: cfg.graph_export.enabled,
404 generate_sourcing: cfg.source_to_pay.enabled,
405 generate_intercompany: cfg.intercompany.enabled,
406 generate_financial_statements: cfg.financial_reporting.enabled,
407 generate_bank_reconciliation: cfg.financial_reporting.enabled,
408 generate_accounting_standards: cfg.accounting_standards.enabled,
409 generate_manufacturing: cfg.manufacturing.enabled,
410 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411 generate_tax: cfg.tax.enabled,
412 generate_esg: cfg.esg.enabled,
413 generate_ocpm_events: cfg.ocpm.enabled,
414 generate_compliance_regulations: cfg.compliance_regulations.enabled,
415 generate_hr: cfg.hr.enabled,
416 generate_treasury: cfg.treasury.enabled,
417 generate_project_accounting: cfg.project_accounting.enabled,
418
419 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423 inject_data_quality: cfg.data_quality.enabled,
424
425 vendors_per_company: 50,
427 customers_per_company: 100,
428 materials_per_company: 200,
429 assets_per_company: 50,
430 employees_per_company: 100,
431 p2p_chains: 100,
432 o2c_chains: 100,
433 audit_engagements: 5,
434 workpapers_per_engagement: 20,
435 evidence_per_workpaper: 5,
436 risks_per_engagement: 15,
437 findings_per_engagement: 8,
438 judgments_per_engagement: 10,
439 }
440 }
441}
442
443#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446 pub vendors: Vec<Vendor>,
448 pub customers: Vec<Customer>,
450 pub materials: Vec<Material>,
452 pub assets: Vec<FixedAsset>,
454 pub employees: Vec<Employee>,
456 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465 pub node_count: usize,
467 pub edge_count: usize,
469 pub hyperedge_count: usize,
471 pub output_path: PathBuf,
473}
474
475#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478 pub p2p_chains: Vec<P2PDocumentChain>,
480 pub o2c_chains: Vec<O2CDocumentChain>,
482 pub purchase_orders: Vec<documents::PurchaseOrder>,
484 pub goods_receipts: Vec<documents::GoodsReceipt>,
486 pub vendor_invoices: Vec<documents::VendorInvoice>,
488 pub sales_orders: Vec<documents::SalesOrder>,
490 pub deliveries: Vec<documents::Delivery>,
492 pub customer_invoices: Vec<documents::CustomerInvoice>,
494 pub payments: Vec<documents::Payment>,
496 pub document_references: Vec<documents::DocumentReference>,
499}
500
501#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504 pub ap_invoices: Vec<APInvoice>,
506 pub ar_invoices: Vec<ARInvoice>,
508 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514 pub ar_aging_reports: Vec<ARAgingReport>,
516 pub ap_aging_reports: Vec<APAgingReport>,
518 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531 pub event_log: Option<OcpmEventLog>,
533 pub event_count: usize,
535 pub object_count: usize,
537 pub case_count: usize,
539}
540
541#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544 pub engagements: Vec<AuditEngagement>,
546 pub workpapers: Vec<Workpaper>,
548 pub evidence: Vec<AuditEvidence>,
550 pub risk_assessments: Vec<RiskAssessment>,
552 pub findings: Vec<AuditFinding>,
554 pub judgments: Vec<ProfessionalJudgment>,
556 pub confirmations: Vec<ExternalConfirmation>,
558 pub confirmation_responses: Vec<ConfirmationResponse>,
560 pub procedure_steps: Vec<AuditProcedureStep>,
562 pub samples: Vec<AuditSample>,
564 pub analytical_results: Vec<AnalyticalProcedureResult>,
566 pub ia_functions: Vec<InternalAuditFunction>,
568 pub ia_reports: Vec<InternalAuditReport>,
570 pub related_parties: Vec<RelatedParty>,
572 pub related_party_transactions: Vec<RelatedPartyTransaction>,
574 pub component_auditors: Vec<ComponentAuditor>,
577 pub group_audit_plan: Option<GroupAuditPlan>,
579 pub component_instructions: Vec<ComponentInstruction>,
581 pub component_reports: Vec<ComponentAuditorReport>,
583 pub engagement_letters: Vec<EngagementLetter>,
586 pub subsequent_events: Vec<SubsequentEvent>,
589 pub service_organizations: Vec<ServiceOrganization>,
592 pub soc_reports: Vec<SocReport>,
594 pub user_entity_controls: Vec<UserEntityControl>,
596 pub going_concern_assessments:
599 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600 pub accounting_estimates:
603 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614 pub materiality_calculations:
617 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618 pub combined_risk_assessments:
621 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627 pub significant_transaction_classes:
630 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634 pub analytical_relationships:
637 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657 pub customers: Vec<BankingCustomer>,
659 pub accounts: Vec<BankAccount>,
661 pub transactions: Vec<BankTransaction>,
663 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673 pub suspicious_count: usize,
675 pub scenario_count: usize,
677}
678
679#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682 pub exported: bool,
684 pub graph_count: usize,
686 pub exports: HashMap<String, GraphExportInfo>,
688}
689
690#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693 pub name: String,
695 pub format: String,
697 pub output_path: PathBuf,
699 pub node_count: usize,
701 pub edge_count: usize,
703}
704
705#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708 pub spend_analyses: Vec<SpendAnalysis>,
710 pub sourcing_projects: Vec<SourcingProject>,
712 pub qualifications: Vec<SupplierQualification>,
714 pub rfx_events: Vec<RfxEvent>,
716 pub bids: Vec<SupplierBid>,
718 pub bid_evaluations: Vec<BidEvaluation>,
720 pub contracts: Vec<ProcurementContract>,
722 pub catalog_items: Vec<CatalogItem>,
724 pub scorecards: Vec<SupplierScorecard>,
726}
727
728#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731 pub fiscal_year: u16,
733 pub fiscal_period: u8,
735 pub period_start: NaiveDate,
737 pub period_end: NaiveDate,
739 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746 pub financial_statements: Vec<FinancialStatement>,
749 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752 pub consolidated_statements: Vec<FinancialStatement>,
754 pub consolidation_schedules: Vec<ConsolidationSchedule>,
756 pub bank_reconciliations: Vec<BankReconciliation>,
758 pub trial_balances: Vec<PeriodTrialBalance>,
760 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771 pub payroll_runs: Vec<PayrollRun>,
773 pub payroll_line_items: Vec<PayrollLineItem>,
775 pub time_entries: Vec<TimeEntry>,
777 pub expense_reports: Vec<ExpenseReport>,
779 pub benefit_enrollments: Vec<BenefitEnrollment>,
781 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789 pub pension_journal_entries: Vec<JournalEntry>,
791 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795 pub stock_comp_journal_entries: Vec<JournalEntry>,
797 pub payroll_run_count: usize,
799 pub payroll_line_item_count: usize,
801 pub time_entry_count: usize,
803 pub expense_report_count: usize,
805 pub benefit_enrollment_count: usize,
807 pub pension_plan_count: usize,
809 pub stock_grant_count: usize,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820 pub business_combinations:
822 Vec<datasynth_core::models::business_combination::BusinessCombination>,
823 pub business_combination_journal_entries: Vec<JournalEntry>,
825 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827 pub ecl_provision_movements:
829 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830 pub ecl_journal_entries: Vec<JournalEntry>,
832 pub provisions: Vec<datasynth_core::models::provision::Provision>,
834 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838 pub provision_journal_entries: Vec<JournalEntry>,
840 pub currency_translation_results:
842 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843 pub revenue_contract_count: usize,
845 pub impairment_test_count: usize,
847 pub business_combination_count: usize,
849 pub ecl_model_count: usize,
851 pub provision_count: usize,
853 pub currency_translation_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872 pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879 pub production_orders: Vec<ProductionOrder>,
881 pub quality_inspections: Vec<QualityInspection>,
883 pub cycle_counts: Vec<CycleCount>,
885 pub bom_components: Vec<BomComponent>,
887 pub inventory_movements: Vec<InventoryMovement>,
889 pub production_order_count: usize,
891 pub quality_inspection_count: usize,
893 pub cycle_count_count: usize,
895 pub bom_component_count: usize,
897 pub inventory_movement_count: usize,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904 pub sales_quotes: Vec<SalesQuote>,
906 pub kpis: Vec<ManagementKpi>,
908 pub budgets: Vec<Budget>,
910 pub sales_quote_count: usize,
912 pub kpi_count: usize,
914 pub budget_line_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921 pub labels: Vec<LabeledAnomaly>,
923 pub summary: Option<AnomalySummary>,
925 pub by_type: HashMap<String, usize>,
927}
928
929#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932 pub validated: bool,
934 pub is_balanced: bool,
936 pub entries_processed: u64,
938 pub total_debits: rust_decimal::Decimal,
940 pub total_credits: rust_decimal::Decimal,
942 pub accounts_tracked: usize,
944 pub companies_tracked: usize,
946 pub validation_errors: Vec<ValidationError>,
948 pub has_unbalanced_entries: bool,
950}
951
952#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955 pub jurisdictions: Vec<TaxJurisdiction>,
957 pub codes: Vec<TaxCode>,
959 pub tax_lines: Vec<TaxLine>,
961 pub tax_returns: Vec<TaxReturn>,
963 pub tax_provisions: Vec<TaxProvision>,
965 pub withholding_records: Vec<WithholdingTaxRecord>,
967 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969 pub jurisdiction_count: usize,
971 pub code_count: usize,
973 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975 pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986 pub seller_journal_entries: Vec<JournalEntry>,
988 pub buyer_journal_entries: Vec<JournalEntry>,
990 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994 #[serde(skip)]
996 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997 pub matched_pair_count: usize,
999 pub elimination_entry_count: usize,
1001 pub match_rate: f64,
1003}
1004
1005#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008 pub emissions: Vec<EmissionRecord>,
1010 pub energy: Vec<EnergyConsumption>,
1012 pub water: Vec<WaterUsage>,
1014 pub waste: Vec<WasteRecord>,
1016 pub diversity: Vec<WorkforceDiversityMetric>,
1018 pub pay_equity: Vec<PayEquityMetric>,
1020 pub safety_incidents: Vec<SafetyIncident>,
1022 pub safety_metrics: Vec<SafetyMetric>,
1024 pub governance: Vec<GovernanceMetric>,
1026 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028 pub materiality: Vec<MaterialityAssessment>,
1030 pub disclosures: Vec<EsgDisclosure>,
1032 pub climate_scenarios: Vec<ClimateScenario>,
1034 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036 pub emission_count: usize,
1038 pub disclosure_count: usize,
1040}
1041
1042#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045 pub cash_positions: Vec<CashPosition>,
1047 pub cash_forecasts: Vec<CashForecast>,
1049 pub cash_pools: Vec<CashPool>,
1051 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053 pub hedging_instruments: Vec<HedgingInstrument>,
1055 pub hedge_relationships: Vec<HedgeRelationship>,
1057 pub debt_instruments: Vec<DebtInstrument>,
1059 pub bank_guarantees: Vec<BankGuarantee>,
1061 pub netting_runs: Vec<NettingRun>,
1063 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065 pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073 pub projects: Vec<Project>,
1075 pub cost_lines: Vec<ProjectCostLine>,
1077 pub revenue_records: Vec<ProjectRevenue>,
1079 pub earned_value_metrics: Vec<EarnedValueMetric>,
1081 pub change_orders: Vec<ChangeOrder>,
1083 pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090 pub chart_of_accounts: ChartOfAccounts,
1092 pub master_data: MasterDataSnapshot,
1094 pub document_flows: DocumentFlowSnapshot,
1096 pub subledger: SubledgerSnapshot,
1098 pub ocpm: OcpmSnapshot,
1100 pub audit: AuditSnapshot,
1102 pub banking: BankingSnapshot,
1104 pub graph_export: GraphExportSnapshot,
1106 pub sourcing: SourcingSnapshot,
1108 pub financial_reporting: FinancialReportingSnapshot,
1110 pub hr: HrSnapshot,
1112 pub accounting_standards: AccountingStandardsSnapshot,
1114 pub manufacturing: ManufacturingSnapshot,
1116 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118 pub tax: TaxSnapshot,
1120 pub esg: EsgSnapshot,
1122 pub treasury: TreasurySnapshot,
1124 pub project_accounting: ProjectAccountingSnapshot,
1126 pub process_evolution: Vec<ProcessEvolutionEvent>,
1128 pub organizational_events: Vec<OrganizationalEvent>,
1130 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132 pub intercompany: IntercompanySnapshot,
1134 pub journal_entries: Vec<JournalEntry>,
1136 pub anomaly_labels: AnomalyLabels,
1138 pub balance_validation: BalanceValidationResult,
1140 pub data_quality_stats: DataQualityStats,
1142 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144 pub statistics: EnhancedGenerationStatistics,
1146 pub lineage: Option<super::lineage::LineageGraph>,
1148 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150 pub internal_controls: Vec<InternalControl>,
1152 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156 pub opening_balances: Vec<GeneratedOpeningBalance>,
1158 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166 pub temporal_vendor_chains:
1168 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175 pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182 pub total_entries: u64,
1184 pub total_line_items: u64,
1186 pub accounts_count: usize,
1188 pub companies_count: usize,
1190 pub period_months: u32,
1192 pub vendor_count: usize,
1194 pub customer_count: usize,
1195 pub material_count: usize,
1196 pub asset_count: usize,
1197 pub employee_count: usize,
1198 pub p2p_chain_count: usize,
1200 pub o2c_chain_count: usize,
1201 pub ap_invoice_count: usize,
1203 pub ar_invoice_count: usize,
1204 pub ocpm_event_count: usize,
1206 pub ocpm_object_count: usize,
1207 pub ocpm_case_count: usize,
1208 pub audit_engagement_count: usize,
1210 pub audit_workpaper_count: usize,
1211 pub audit_evidence_count: usize,
1212 pub audit_risk_count: usize,
1213 pub audit_finding_count: usize,
1214 pub audit_judgment_count: usize,
1215 #[serde(default)]
1217 pub audit_confirmation_count: usize,
1218 #[serde(default)]
1219 pub audit_confirmation_response_count: usize,
1220 #[serde(default)]
1222 pub audit_procedure_step_count: usize,
1223 #[serde(default)]
1224 pub audit_sample_count: usize,
1225 #[serde(default)]
1227 pub audit_analytical_result_count: usize,
1228 #[serde(default)]
1230 pub audit_ia_function_count: usize,
1231 #[serde(default)]
1232 pub audit_ia_report_count: usize,
1233 #[serde(default)]
1235 pub audit_related_party_count: usize,
1236 #[serde(default)]
1237 pub audit_related_party_transaction_count: usize,
1238 pub anomalies_injected: usize,
1240 pub data_quality_issues: usize,
1242 pub banking_customer_count: usize,
1244 pub banking_account_count: usize,
1245 pub banking_transaction_count: usize,
1246 pub banking_suspicious_count: usize,
1247 pub graph_export_count: usize,
1249 pub graph_node_count: usize,
1250 pub graph_edge_count: usize,
1251 #[serde(default)]
1253 pub llm_enrichment_ms: u64,
1254 #[serde(default)]
1256 pub llm_vendors_enriched: usize,
1257 #[serde(default)]
1259 pub diffusion_enhancement_ms: u64,
1260 #[serde(default)]
1262 pub diffusion_samples_generated: usize,
1263 #[serde(default)]
1265 pub causal_generation_ms: u64,
1266 #[serde(default)]
1268 pub causal_samples_generated: usize,
1269 #[serde(default)]
1271 pub causal_validation_passed: Option<bool>,
1272 #[serde(default)]
1274 pub sourcing_project_count: usize,
1275 #[serde(default)]
1276 pub rfx_event_count: usize,
1277 #[serde(default)]
1278 pub bid_count: usize,
1279 #[serde(default)]
1280 pub contract_count: usize,
1281 #[serde(default)]
1282 pub catalog_item_count: usize,
1283 #[serde(default)]
1284 pub scorecard_count: usize,
1285 #[serde(default)]
1287 pub financial_statement_count: usize,
1288 #[serde(default)]
1289 pub bank_reconciliation_count: usize,
1290 #[serde(default)]
1292 pub payroll_run_count: usize,
1293 #[serde(default)]
1294 pub time_entry_count: usize,
1295 #[serde(default)]
1296 pub expense_report_count: usize,
1297 #[serde(default)]
1298 pub benefit_enrollment_count: usize,
1299 #[serde(default)]
1300 pub pension_plan_count: usize,
1301 #[serde(default)]
1302 pub stock_grant_count: usize,
1303 #[serde(default)]
1305 pub revenue_contract_count: usize,
1306 #[serde(default)]
1307 pub impairment_test_count: usize,
1308 #[serde(default)]
1309 pub business_combination_count: usize,
1310 #[serde(default)]
1311 pub ecl_model_count: usize,
1312 #[serde(default)]
1313 pub provision_count: usize,
1314 #[serde(default)]
1316 pub production_order_count: usize,
1317 #[serde(default)]
1318 pub quality_inspection_count: usize,
1319 #[serde(default)]
1320 pub cycle_count_count: usize,
1321 #[serde(default)]
1322 pub bom_component_count: usize,
1323 #[serde(default)]
1324 pub inventory_movement_count: usize,
1325 #[serde(default)]
1327 pub sales_quote_count: usize,
1328 #[serde(default)]
1329 pub kpi_count: usize,
1330 #[serde(default)]
1331 pub budget_line_count: usize,
1332 #[serde(default)]
1334 pub tax_jurisdiction_count: usize,
1335 #[serde(default)]
1336 pub tax_code_count: usize,
1337 #[serde(default)]
1339 pub esg_emission_count: usize,
1340 #[serde(default)]
1341 pub esg_disclosure_count: usize,
1342 #[serde(default)]
1344 pub ic_matched_pair_count: usize,
1345 #[serde(default)]
1346 pub ic_elimination_count: usize,
1347 #[serde(default)]
1349 pub ic_transaction_count: usize,
1350 #[serde(default)]
1352 pub fa_subledger_count: usize,
1353 #[serde(default)]
1355 pub inventory_subledger_count: usize,
1356 #[serde(default)]
1358 pub treasury_debt_instrument_count: usize,
1359 #[serde(default)]
1361 pub treasury_hedging_instrument_count: usize,
1362 #[serde(default)]
1364 pub project_count: usize,
1365 #[serde(default)]
1367 pub project_change_order_count: usize,
1368 #[serde(default)]
1370 pub tax_provision_count: usize,
1371 #[serde(default)]
1373 pub opening_balance_count: usize,
1374 #[serde(default)]
1376 pub subledger_reconciliation_count: usize,
1377 #[serde(default)]
1379 pub tax_line_count: usize,
1380 #[serde(default)]
1382 pub project_cost_line_count: usize,
1383 #[serde(default)]
1385 pub cash_position_count: usize,
1386 #[serde(default)]
1388 pub cash_forecast_count: usize,
1389 #[serde(default)]
1391 pub cash_pool_count: usize,
1392 #[serde(default)]
1394 pub process_evolution_event_count: usize,
1395 #[serde(default)]
1397 pub organizational_event_count: usize,
1398 #[serde(default)]
1400 pub counterfactual_pair_count: usize,
1401 #[serde(default)]
1403 pub red_flag_count: usize,
1404 #[serde(default)]
1406 pub collusion_ring_count: usize,
1407 #[serde(default)]
1409 pub temporal_version_chain_count: usize,
1410 #[serde(default)]
1412 pub entity_relationship_node_count: usize,
1413 #[serde(default)]
1415 pub entity_relationship_edge_count: usize,
1416 #[serde(default)]
1418 pub cross_process_link_count: usize,
1419 #[serde(default)]
1421 pub disruption_event_count: usize,
1422 #[serde(default)]
1424 pub industry_gl_account_count: usize,
1425 #[serde(default)]
1427 pub period_close_je_count: usize,
1428}
1429
1430pub struct EnhancedOrchestrator {
1432 config: GeneratorConfig,
1433 phase_config: PhaseConfig,
1434 coa: Option<Arc<ChartOfAccounts>>,
1435 master_data: MasterDataSnapshot,
1436 seed: u64,
1437 multi_progress: Option<MultiProgress>,
1438 resource_guard: ResourceGuard,
1440 output_path: Option<PathBuf>,
1442 copula_generators: Vec<CopulaGeneratorSpec>,
1444 country_pack_registry: datasynth_core::CountryPackRegistry,
1446 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453 datasynth_config::validate_config(&config)?;
1454
1455 let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457 let resource_guard = Self::build_resource_guard(&config, None);
1459
1460 let country_pack_registry = match &config.country_packs {
1462 Some(cp) => {
1463 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464 .map_err(|e| SynthError::config(e.to_string()))?
1465 }
1466 None => datasynth_core::CountryPackRegistry::builtin_only()
1467 .map_err(|e| SynthError::config(e.to_string()))?,
1468 };
1469
1470 Ok(Self {
1471 config,
1472 phase_config,
1473 coa: None,
1474 master_data: MasterDataSnapshot::default(),
1475 seed,
1476 multi_progress: None,
1477 resource_guard,
1478 output_path: None,
1479 copula_generators: Vec::new(),
1480 country_pack_registry,
1481 phase_sink: None,
1482 })
1483 }
1484
1485 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487 Self::new(config, PhaseConfig::default())
1488 }
1489
1490 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492 self.phase_sink = Some(sink);
1493 self
1494 }
1495
1496 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498 self.phase_sink = Some(sink);
1499 }
1500
1501 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503 if let Some(ref sink) = self.phase_sink {
1504 for item in items {
1505 if let Ok(value) = serde_json::to_value(item) {
1506 if let Err(e) = sink.emit(phase, type_name, &value) {
1507 warn!(
1508 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509 );
1510 }
1511 }
1512 }
1513 if let Err(e) = sink.phase_complete(phase) {
1514 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515 }
1516 }
1517 }
1518
1519 pub fn with_progress(mut self, show: bool) -> Self {
1521 self.phase_config.show_progress = show;
1522 if show {
1523 self.multi_progress = Some(MultiProgress::new());
1524 }
1525 self
1526 }
1527
1528 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530 let path = path.into();
1531 self.output_path = Some(path.clone());
1532 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534 self
1535 }
1536
1537 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539 &self.country_pack_registry
1540 }
1541
1542 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544 self.country_pack_registry.get_by_str(country)
1545 }
1546
1547 fn primary_country_code(&self) -> &str {
1550 self.config
1551 .companies
1552 .first()
1553 .map(|c| c.country.as_str())
1554 .unwrap_or("US")
1555 }
1556
1557 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559 self.country_pack_for(self.primary_country_code())
1560 }
1561
1562 fn resolve_coa_framework(&self) -> CoAFramework {
1564 if self.config.accounting_standards.enabled {
1565 match self.config.accounting_standards.framework {
1566 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567 return CoAFramework::FrenchPcg;
1568 }
1569 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570 return CoAFramework::GermanSkr04;
1571 }
1572 _ => {}
1573 }
1574 }
1575 let pack = self.primary_pack();
1577 match pack.accounting.framework.as_str() {
1578 "french_gaap" => CoAFramework::FrenchPcg,
1579 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580 _ => CoAFramework::UsGaap,
1581 }
1582 }
1583
1584 pub fn has_copulas(&self) -> bool {
1589 !self.copula_generators.is_empty()
1590 }
1591
1592 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598 &self.copula_generators
1599 }
1600
1601 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605 &mut self.copula_generators
1606 }
1607
1608 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612 self.copula_generators
1613 .iter_mut()
1614 .find(|c| c.name == copula_name)
1615 .map(|c| c.generator.sample())
1616 }
1617
1618 pub fn from_fingerprint(
1641 fingerprint_path: &std::path::Path,
1642 phase_config: PhaseConfig,
1643 scale: f64,
1644 ) -> SynthResult<Self> {
1645 info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647 let reader = FingerprintReader::new();
1649 let fingerprint = reader
1650 .read_from_file(fingerprint_path)
1651 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654 }
1655
1656 pub fn from_fingerprint_data(
1663 fingerprint: Fingerprint,
1664 phase_config: PhaseConfig,
1665 scale: f64,
1666 ) -> SynthResult<Self> {
1667 info!(
1668 "Synthesizing config from fingerprint (version: {}, tables: {})",
1669 fingerprint.manifest.version,
1670 fingerprint.schema.tables.len()
1671 );
1672
1673 let seed: u64 = rand::random();
1675 info!("Fingerprint synthesis seed: {}", seed);
1676
1677 let options = SynthesisOptions {
1679 scale,
1680 seed: Some(seed),
1681 preserve_correlations: true,
1682 inject_anomalies: true,
1683 };
1684 let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686 let synthesis_result = synthesizer
1688 .synthesize_full(&fingerprint, seed)
1689 .map_err(|e| {
1690 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691 })?;
1692
1693 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695 Self::base_config_for_industry(industry)
1696 } else {
1697 Self::base_config_for_industry("manufacturing")
1698 };
1699
1700 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703 info!(
1705 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706 fingerprint.schema.tables.len(),
1707 scale,
1708 synthesis_result.copula_generators.len()
1709 );
1710
1711 if !synthesis_result.copula_generators.is_empty() {
1712 for spec in &synthesis_result.copula_generators {
1713 info!(
1714 " Copula '{}' for table '{}': {} columns",
1715 spec.name,
1716 spec.table,
1717 spec.columns.len()
1718 );
1719 }
1720 }
1721
1722 let mut orchestrator = Self::new(config, phase_config)?;
1724
1725 orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728 Ok(orchestrator)
1729 }
1730
1731 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733 use datasynth_config::presets::create_preset;
1734 use datasynth_config::TransactionVolume;
1735 use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737 let sector = match industry.to_lowercase().as_str() {
1738 "manufacturing" => IndustrySector::Manufacturing,
1739 "retail" => IndustrySector::Retail,
1740 "financial" | "financial_services" => IndustrySector::FinancialServices,
1741 "healthcare" => IndustrySector::Healthcare,
1742 "technology" | "tech" => IndustrySector::Technology,
1743 _ => IndustrySector::Manufacturing,
1744 };
1745
1746 create_preset(
1748 sector,
1749 1, 12, CoAComplexity::Medium,
1752 TransactionVolume::TenK,
1753 )
1754 }
1755
1756 fn apply_config_patch(
1758 mut config: GeneratorConfig,
1759 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760 ) -> GeneratorConfig {
1761 use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763 for (key, value) in patch.values() {
1764 match (key.as_str(), value) {
1765 ("transactions.count", ConfigValue::Integer(n)) => {
1768 info!(
1769 "Fingerprint suggests {} transactions (apply via company volumes)",
1770 n
1771 );
1772 }
1773 ("global.period_months", ConfigValue::Integer(n)) => {
1774 config.global.period_months = (*n).clamp(1, 120) as u32;
1775 }
1776 ("global.start_date", ConfigValue::String(s)) => {
1777 config.global.start_date = s.clone();
1778 }
1779 ("global.seed", ConfigValue::Integer(n)) => {
1780 config.global.seed = Some(*n as u64);
1781 }
1782 ("fraud.enabled", ConfigValue::Bool(b)) => {
1783 config.fraud.enabled = *b;
1784 }
1785 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786 config.fraud.fraud_rate = *f;
1787 }
1788 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789 config.data_quality.enabled = *b;
1790 }
1791 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793 config.fraud.enabled = *b;
1794 }
1795 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796 config.fraud.fraud_rate = *f;
1797 }
1798 _ => {
1799 debug!("Ignoring unknown config patch key: {}", key);
1800 }
1801 }
1802 }
1803
1804 config
1805 }
1806
1807 fn build_resource_guard(
1809 config: &GeneratorConfig,
1810 output_path: Option<PathBuf>,
1811 ) -> ResourceGuard {
1812 let mut builder = ResourceGuardBuilder::new();
1813
1814 if config.global.memory_limit_mb > 0 {
1816 builder = builder.memory_limit(config.global.memory_limit_mb);
1817 }
1818
1819 if let Some(path) = output_path {
1821 builder = builder.output_path(path).min_free_disk(100); }
1823
1824 builder = builder.conservative();
1826
1827 builder.build()
1828 }
1829
1830 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835 self.resource_guard.check()
1836 }
1837
1838 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840 let level = self.resource_guard.check()?;
1841
1842 if level != DegradationLevel::Normal {
1843 warn!(
1844 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845 phase,
1846 level,
1847 self.resource_guard.current_memory_mb(),
1848 self.resource_guard.available_disk_mb()
1849 );
1850 }
1851
1852 Ok(level)
1853 }
1854
1855 fn get_degradation_actions(&self) -> DegradationActions {
1857 self.resource_guard.get_actions()
1858 }
1859
1860 fn check_memory_limit(&self) -> SynthResult<()> {
1862 self.check_resources()?;
1863 Ok(())
1864 }
1865
1866 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868 info!("Starting enhanced generation workflow");
1869 info!(
1870 "Config: industry={:?}, period_months={}, companies={}",
1871 self.config.global.industry,
1872 self.config.global.period_months,
1873 self.config.companies.len()
1874 );
1875
1876 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879 datasynth_core::serde_decimal::set_numeric_native(is_native);
1880 struct NumericModeGuard;
1881 impl Drop for NumericModeGuard {
1882 fn drop(&mut self) {
1883 datasynth_core::serde_decimal::set_numeric_native(false);
1884 }
1885 }
1886 let _numeric_guard = if is_native {
1887 Some(NumericModeGuard)
1888 } else {
1889 None
1890 };
1891
1892 let initial_level = self.check_resources_with_log("initial")?;
1894 if initial_level == DegradationLevel::Emergency {
1895 return Err(SynthError::resource(
1896 "Insufficient resources to start generation",
1897 ));
1898 }
1899
1900 let mut stats = EnhancedGenerationStatistics {
1901 companies_count: self.config.companies.len(),
1902 period_months: self.config.global.period_months,
1903 ..Default::default()
1904 };
1905
1906 let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909 self.phase_master_data(&mut stats)?;
1911
1912 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917 let (mut document_flows, mut subledger, fa_journal_entries) =
1919 self.phase_document_flows(&mut stats)?;
1920
1921 self.emit_phase_items(
1923 "document_flows",
1924 "PurchaseOrder",
1925 &document_flows.purchase_orders,
1926 );
1927 self.emit_phase_items(
1928 "document_flows",
1929 "GoodsReceipt",
1930 &document_flows.goods_receipts,
1931 );
1932 self.emit_phase_items(
1933 "document_flows",
1934 "VendorInvoice",
1935 &document_flows.vendor_invoices,
1936 );
1937 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948 .iter()
1949 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950 .collect();
1951 if !opening_balance_jes.is_empty() {
1952 debug!(
1953 "Prepending {} opening balance JEs to entries",
1954 opening_balance_jes.len()
1955 );
1956 }
1957
1958 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961 if !opening_balance_jes.is_empty() {
1964 let mut combined = opening_balance_jes;
1965 combined.extend(entries);
1966 entries = combined;
1967 }
1968
1969 if !fa_journal_entries.is_empty() {
1971 debug!(
1972 "Appending {} FA acquisition JEs to main entries",
1973 fa_journal_entries.len()
1974 );
1975 entries.extend(fa_journal_entries);
1976 }
1977
1978 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981 let actions = self.get_degradation_actions();
1983
1984 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987 if !sourcing.contracts.is_empty() {
1990 let mut linked_count = 0usize;
1991 let po_vendor_pairs: Vec<(String, String)> = document_flows
1993 .p2p_chains
1994 .iter()
1995 .map(|chain| {
1996 (
1997 chain.purchase_order.vendor_id.clone(),
1998 chain.purchase_order.header.document_id.clone(),
1999 )
2000 })
2001 .collect();
2002
2003 for chain in &mut document_flows.p2p_chains {
2004 if chain.purchase_order.contract_id.is_none() {
2005 if let Some(contract) = sourcing
2006 .contracts
2007 .iter()
2008 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009 {
2010 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011 linked_count += 1;
2012 }
2013 }
2014 }
2015
2016 for contract in &mut sourcing.contracts {
2018 let po_ids: Vec<String> = po_vendor_pairs
2019 .iter()
2020 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021 .map(|(_, po_id)| po_id.clone())
2022 .collect();
2023 if !po_ids.is_empty() {
2024 contract.purchase_order_ids = po_ids;
2025 }
2026 }
2027
2028 if linked_count > 0 {
2029 debug!(
2030 "Linked {} purchase orders to S2C contracts by vendor match",
2031 linked_count
2032 );
2033 }
2034 }
2035
2036 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039 if !intercompany.seller_journal_entries.is_empty()
2041 || !intercompany.buyer_journal_entries.is_empty()
2042 {
2043 let ic_je_count = intercompany.seller_journal_entries.len()
2044 + intercompany.buyer_journal_entries.len();
2045 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047 debug!(
2048 "Appended {} IC journal entries to main entries",
2049 ic_je_count
2050 );
2051 }
2052
2053 if !intercompany.elimination_entries.is_empty() {
2055 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056 &intercompany.elimination_entries,
2057 );
2058 if !elim_jes.is_empty() {
2059 debug!(
2060 "Appended {} elimination journal entries to main entries",
2061 elim_jes.len()
2062 );
2063 let elim_debit: rust_decimal::Decimal =
2065 elim_jes.iter().map(|je| je.total_debit()).sum();
2066 let elim_credit: rust_decimal::Decimal =
2067 elim_jes.iter().map(|je| je.total_credit()).sum();
2068 if elim_debit != elim_credit {
2069 warn!(
2070 "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2071 elim_debit,
2072 elim_credit,
2073 elim_debit - elim_credit
2074 );
2075 }
2076 entries.extend(elim_jes);
2077 }
2078 }
2079
2080 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2082 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2083 document_flows
2084 .customer_invoices
2085 .extend(ic_docs.seller_invoices.iter().cloned());
2086 document_flows
2087 .purchase_orders
2088 .extend(ic_docs.buyer_orders.iter().cloned());
2089 document_flows
2090 .goods_receipts
2091 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2092 document_flows
2093 .vendor_invoices
2094 .extend(ic_docs.buyer_invoices.iter().cloned());
2095 debug!(
2096 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2097 ic_docs.seller_invoices.len(),
2098 ic_docs.buyer_orders.len(),
2099 ic_docs.buyer_goods_receipts.len(),
2100 ic_docs.buyer_invoices.len(),
2101 );
2102 }
2103 }
2104
2105 let hr = self.phase_hr_data(&mut stats)?;
2107
2108 if !hr.payroll_runs.is_empty() {
2110 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2111 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2112 entries.extend(payroll_jes);
2113 }
2114
2115 if !hr.pension_journal_entries.is_empty() {
2117 debug!(
2118 "Generated {} JEs from pension plans",
2119 hr.pension_journal_entries.len()
2120 );
2121 entries.extend(hr.pension_journal_entries.iter().cloned());
2122 }
2123
2124 if !hr.stock_comp_journal_entries.is_empty() {
2126 debug!(
2127 "Generated {} JEs from stock-based compensation",
2128 hr.stock_comp_journal_entries.len()
2129 );
2130 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2131 }
2132
2133 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2135
2136 if !manufacturing_snap.production_orders.is_empty() {
2138 let currency = self
2139 .config
2140 .companies
2141 .first()
2142 .map(|c| c.currency.as_str())
2143 .unwrap_or("USD");
2144 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2145 &manufacturing_snap.production_orders,
2146 &manufacturing_snap.quality_inspections,
2147 currency,
2148 );
2149 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2150 entries.extend(mfg_jes);
2151 }
2152
2153 if !manufacturing_snap.quality_inspections.is_empty() {
2155 let framework = match self.config.accounting_standards.framework {
2156 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2157 _ => "US_GAAP",
2158 };
2159 for company in &self.config.companies {
2160 let company_orders: Vec<_> = manufacturing_snap
2161 .production_orders
2162 .iter()
2163 .filter(|o| o.company_code == company.code)
2164 .cloned()
2165 .collect();
2166 let company_inspections: Vec<_> = manufacturing_snap
2167 .quality_inspections
2168 .iter()
2169 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2170 .cloned()
2171 .collect();
2172 if company_inspections.is_empty() {
2173 continue;
2174 }
2175 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2176 let warranty_result = warranty_gen.generate(
2177 &company.code,
2178 &company_orders,
2179 &company_inspections,
2180 &company.currency,
2181 framework,
2182 );
2183 if !warranty_result.journal_entries.is_empty() {
2184 debug!(
2185 "Generated {} warranty provision JEs for {}",
2186 warranty_result.journal_entries.len(),
2187 company.code
2188 );
2189 entries.extend(warranty_result.journal_entries);
2190 }
2191 }
2192 }
2193
2194 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2196 {
2197 let cogs_currency = self
2198 .config
2199 .companies
2200 .first()
2201 .map(|c| c.currency.as_str())
2202 .unwrap_or("USD");
2203 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2204 &document_flows.deliveries,
2205 &manufacturing_snap.production_orders,
2206 cogs_currency,
2207 );
2208 if !cogs_jes.is_empty() {
2209 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2210 entries.extend(cogs_jes);
2211 }
2212 }
2213
2214 if !manufacturing_snap.inventory_movements.is_empty()
2220 && !subledger.inventory_positions.is_empty()
2221 {
2222 use datasynth_core::models::MovementType as MfgMovementType;
2223 let mut receipt_count = 0usize;
2224 let mut issue_count = 0usize;
2225 for movement in &manufacturing_snap.inventory_movements {
2226 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2228 p.material_id == movement.material_code
2229 && p.company_code == movement.entity_code
2230 }) {
2231 match movement.movement_type {
2232 MfgMovementType::GoodsReceipt => {
2233 pos.add_quantity(
2235 movement.quantity,
2236 movement.value,
2237 movement.movement_date,
2238 );
2239 receipt_count += 1;
2240 }
2241 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2242 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2244 issue_count += 1;
2245 }
2246 _ => {}
2247 }
2248 }
2249 }
2250 debug!(
2251 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2252 manufacturing_snap.inventory_movements.len(),
2253 receipt_count,
2254 issue_count,
2255 );
2256 }
2257
2258 if !entries.is_empty() {
2261 stats.total_entries = entries.len() as u64;
2262 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2263 debug!(
2264 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2265 stats.total_entries, stats.total_line_items
2266 );
2267 }
2268
2269 if self.config.internal_controls.enabled && !entries.is_empty() {
2271 info!("Phase 7b: Applying internal controls to journal entries");
2272 let control_config = ControlGeneratorConfig {
2273 exception_rate: self.config.internal_controls.exception_rate,
2274 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2275 enable_sox_marking: true,
2276 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2277 self.config.internal_controls.sox_materiality_threshold,
2278 )
2279 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2280 ..Default::default()
2281 };
2282 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2283 for entry in &mut entries {
2284 control_gen.apply_controls(entry, &coa);
2285 }
2286 let with_controls = entries
2287 .iter()
2288 .filter(|e| !e.header.control_ids.is_empty())
2289 .count();
2290 info!(
2291 "Applied controls to {} entries ({} with control IDs assigned)",
2292 entries.len(),
2293 with_controls
2294 );
2295 }
2296
2297 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2301 .iter()
2302 .filter(|e| e.header.sod_violation)
2303 .filter_map(|e| {
2304 e.header.sod_conflict_type.map(|ct| {
2305 use datasynth_core::models::{RiskLevel, SodViolation};
2306 let severity = match ct {
2307 datasynth_core::models::SodConflictType::PaymentReleaser
2308 | datasynth_core::models::SodConflictType::RequesterApprover => {
2309 RiskLevel::Critical
2310 }
2311 datasynth_core::models::SodConflictType::PreparerApprover
2312 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2313 | datasynth_core::models::SodConflictType::JournalEntryPoster
2314 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2315 RiskLevel::High
2316 }
2317 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2318 RiskLevel::Medium
2319 }
2320 };
2321 let action = format!(
2322 "SoD conflict {:?} on entry {} ({})",
2323 ct, e.header.document_id, e.header.company_code
2324 );
2325 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2326 })
2327 })
2328 .collect();
2329 if !sod_violations.is_empty() {
2330 info!(
2331 "Phase 7c: Extracted {} SoD violations from {} entries",
2332 sod_violations.len(),
2333 entries.len()
2334 );
2335 }
2336
2337 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2339
2340 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2342
2343 self.emit_phase_items(
2345 "anomaly_injection",
2346 "LabeledAnomaly",
2347 &anomaly_labels.labels,
2348 );
2349
2350 {
2354 use std::collections::HashMap;
2355 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2368 for je in &entries {
2369 if je.header.is_fraud {
2370 if let Some(ref fraud_type) = je.header.fraud_type {
2371 if let Some(ref reference) = je.header.reference {
2372 fraud_map.insert(reference.clone(), *fraud_type);
2374 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2377 if !bare.is_empty() {
2378 fraud_map.insert(bare.to_string(), *fraud_type);
2379 }
2380 }
2381 }
2382 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2384 }
2385 }
2386 }
2387 if !fraud_map.is_empty() {
2388 let mut propagated = 0usize;
2389 macro_rules! propagate_to {
2391 ($collection:expr) => {
2392 for doc in &mut $collection {
2393 if doc.header.propagate_fraud(&fraud_map) {
2394 propagated += 1;
2395 }
2396 }
2397 };
2398 }
2399 propagate_to!(document_flows.purchase_orders);
2400 propagate_to!(document_flows.goods_receipts);
2401 propagate_to!(document_flows.vendor_invoices);
2402 propagate_to!(document_flows.payments);
2403 propagate_to!(document_flows.sales_orders);
2404 propagate_to!(document_flows.deliveries);
2405 propagate_to!(document_flows.customer_invoices);
2406 if propagated > 0 {
2407 info!(
2408 "Propagated fraud labels to {} document flow records",
2409 propagated
2410 );
2411 }
2412 }
2413 }
2414
2415 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2417
2418 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2420
2421 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2423
2424 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2426
2427 let balance_validation = self.phase_balance_validation(&entries)?;
2429
2430 let subledger_reconciliation =
2432 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2433
2434 let (data_quality_stats, quality_issues) =
2436 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2437
2438 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2440
2441 let audit = self.phase_audit_data(&entries, &mut stats)?;
2443
2444 let mut banking = self.phase_banking_data(&mut stats)?;
2446
2447 if self.phase_config.generate_banking
2452 && !document_flows.payments.is_empty()
2453 && !banking.accounts.is_empty()
2454 {
2455 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2456 if bridge_rate > 0.0 {
2457 let mut bridge =
2458 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2459 self.seed,
2460 );
2461 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2462 &document_flows.payments,
2463 &banking.customers,
2464 &banking.accounts,
2465 bridge_rate,
2466 );
2467 info!(
2468 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2469 bridge_stats.bridged_count,
2470 bridge_stats.transactions_emitted,
2471 bridge_stats.fraud_propagated,
2472 );
2473 let bridged_count = bridged_txns.len();
2474 banking.transactions.extend(bridged_txns);
2475
2476 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2479 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2480 &mut banking.transactions,
2481 );
2482 }
2483
2484 banking.suspicious_count = banking
2486 .transactions
2487 .iter()
2488 .filter(|t| t.is_suspicious)
2489 .count();
2490 stats.banking_transaction_count = banking.transactions.len();
2491 stats.banking_suspicious_count = banking.suspicious_count;
2492 }
2493 }
2494
2495 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2497
2498 self.phase_llm_enrichment(&mut stats);
2500
2501 self.phase_diffusion_enhancement(&mut stats);
2503
2504 self.phase_causal_overlay(&mut stats);
2506
2507 let mut financial_reporting = self.phase_financial_reporting(
2511 &document_flows,
2512 &entries,
2513 &coa,
2514 &hr,
2515 &audit,
2516 &mut stats,
2517 )?;
2518
2519 {
2521 use datasynth_core::models::StatementType;
2522 for stmt in &financial_reporting.consolidated_statements {
2523 if stmt.statement_type == StatementType::BalanceSheet {
2524 let total_assets: rust_decimal::Decimal = stmt
2525 .line_items
2526 .iter()
2527 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2528 .map(|li| li.amount)
2529 .sum();
2530 let total_le: rust_decimal::Decimal = stmt
2531 .line_items
2532 .iter()
2533 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2534 .map(|li| li.amount)
2535 .sum();
2536 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2537 warn!(
2538 "BS equation imbalance: assets={}, L+E={}",
2539 total_assets, total_le
2540 );
2541 }
2542 }
2543 }
2544 }
2545
2546 let accounting_standards =
2548 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2549
2550 if !accounting_standards.ecl_journal_entries.is_empty() {
2552 debug!(
2553 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2554 accounting_standards.ecl_journal_entries.len()
2555 );
2556 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2557 }
2558
2559 if !accounting_standards.provision_journal_entries.is_empty() {
2561 debug!(
2562 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2563 accounting_standards.provision_journal_entries.len()
2564 );
2565 entries.extend(
2566 accounting_standards
2567 .provision_journal_entries
2568 .iter()
2569 .cloned(),
2570 );
2571 }
2572
2573 let ocpm = self.phase_ocpm_events(
2575 &document_flows,
2576 &sourcing,
2577 &hr,
2578 &manufacturing_snap,
2579 &banking,
2580 &audit,
2581 &financial_reporting,
2582 &mut stats,
2583 )?;
2584
2585 if let Some(ref event_log) = ocpm.event_log {
2587 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2588 }
2589
2590 let sales_kpi_budgets =
2592 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2593
2594 let treasury =
2598 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2599
2600 if !treasury.journal_entries.is_empty() {
2602 debug!(
2603 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2604 treasury.journal_entries.len()
2605 );
2606 entries.extend(treasury.journal_entries.iter().cloned());
2607 }
2608
2609 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2611
2612 if !tax.tax_posting_journal_entries.is_empty() {
2614 debug!(
2615 "Merging {} tax posting JEs into GL",
2616 tax.tax_posting_journal_entries.len()
2617 );
2618 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2619 }
2620
2621 {
2625 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2626
2627 let framework_str = {
2628 use datasynth_config::schema::AccountingFrameworkConfig;
2629 match self
2630 .config
2631 .accounting_standards
2632 .framework
2633 .unwrap_or_default()
2634 {
2635 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2636 "IFRS"
2637 }
2638 _ => "US_GAAP",
2639 }
2640 };
2641
2642 let depreciation_total: rust_decimal::Decimal = entries
2644 .iter()
2645 .filter(|je| je.header.document_type == "CL")
2646 .flat_map(|je| je.lines.iter())
2647 .filter(|l| l.gl_account.starts_with("6000"))
2648 .map(|l| l.debit_amount)
2649 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2650
2651 let interest_paid: rust_decimal::Decimal = entries
2653 .iter()
2654 .flat_map(|je| je.lines.iter())
2655 .filter(|l| l.gl_account.starts_with("7100"))
2656 .map(|l| l.debit_amount)
2657 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2658
2659 let tax_paid: rust_decimal::Decimal = entries
2661 .iter()
2662 .flat_map(|je| je.lines.iter())
2663 .filter(|l| l.gl_account.starts_with("8000"))
2664 .map(|l| l.debit_amount)
2665 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2666
2667 let capex: rust_decimal::Decimal = entries
2669 .iter()
2670 .flat_map(|je| je.lines.iter())
2671 .filter(|l| l.gl_account.starts_with("1500"))
2672 .map(|l| l.debit_amount)
2673 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2674
2675 let dividends_paid: rust_decimal::Decimal = entries
2677 .iter()
2678 .flat_map(|je| je.lines.iter())
2679 .filter(|l| l.gl_account == "2170")
2680 .map(|l| l.debit_amount)
2681 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2682
2683 let cf_data = CashFlowSourceData {
2684 depreciation_total,
2685 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
2687 delta_ap: rust_decimal::Decimal::ZERO,
2688 delta_inventory: rust_decimal::Decimal::ZERO,
2689 capex,
2690 debt_issuance: rust_decimal::Decimal::ZERO,
2691 debt_repayment: rust_decimal::Decimal::ZERO,
2692 interest_paid,
2693 tax_paid,
2694 dividends_paid,
2695 framework: framework_str.to_string(),
2696 };
2697
2698 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2699 if !enhanced_cf_items.is_empty() {
2700 use datasynth_core::models::StatementType;
2702 let merge_count = enhanced_cf_items.len();
2703 for stmt in financial_reporting
2704 .financial_statements
2705 .iter_mut()
2706 .chain(financial_reporting.consolidated_statements.iter_mut())
2707 .chain(
2708 financial_reporting
2709 .standalone_statements
2710 .values_mut()
2711 .flat_map(|v| v.iter_mut()),
2712 )
2713 {
2714 if stmt.statement_type == StatementType::CashFlowStatement {
2715 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2716 }
2717 }
2718 info!(
2719 "Enhanced cash flow: {} supplementary items merged into CF statements",
2720 merge_count
2721 );
2722 }
2723 }
2724
2725 self.generate_notes_to_financial_statements(
2728 &mut financial_reporting,
2729 &accounting_standards,
2730 &tax,
2731 &hr,
2732 &audit,
2733 &treasury,
2734 );
2735
2736 if self.config.companies.len() >= 2 && !entries.is_empty() {
2740 let companies: Vec<(String, String)> = self
2741 .config
2742 .companies
2743 .iter()
2744 .map(|c| (c.code.clone(), c.name.clone()))
2745 .collect();
2746 let ic_elim: rust_decimal::Decimal =
2747 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2748 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2749 .unwrap_or(NaiveDate::MIN);
2750 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2751 let period_label = format!(
2752 "{}-{:02}",
2753 end_date.year(),
2754 (end_date - chrono::Days::new(1)).month()
2755 );
2756
2757 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2758 let (je_segments, je_recon) =
2759 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2760 if !je_segments.is_empty() {
2761 info!(
2762 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2763 je_segments.len(),
2764 ic_elim,
2765 );
2766 if financial_reporting.segment_reports.is_empty() {
2768 financial_reporting.segment_reports = je_segments;
2769 financial_reporting.segment_reconciliations = vec![je_recon];
2770 } else {
2771 financial_reporting.segment_reports.extend(je_segments);
2772 financial_reporting.segment_reconciliations.push(je_recon);
2773 }
2774 }
2775 }
2776
2777 let esg_snap =
2779 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2780
2781 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2783
2784 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2786
2787 let disruption_events = self.phase_disruption_events(&mut stats)?;
2789
2790 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2792
2793 let (entity_relationship_graph, cross_process_links) =
2795 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2796
2797 let industry_output = self.phase_industry_data(&mut stats);
2799
2800 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2802
2803 self.phase_hypergraph_export(
2805 &coa,
2806 &entries,
2807 &document_flows,
2808 &sourcing,
2809 &hr,
2810 &manufacturing_snap,
2811 &banking,
2812 &audit,
2813 &financial_reporting,
2814 &ocpm,
2815 &compliance_regulations,
2816 &mut stats,
2817 )?;
2818
2819 if self.phase_config.generate_graph_export {
2822 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2823 }
2824
2825 if self.config.streaming.enabled {
2827 info!("Note: streaming config is enabled but batch mode does not use it");
2828 }
2829 if self.config.vendor_network.enabled {
2830 debug!("Vendor network config available; relationship graph generation is partial");
2831 }
2832 if self.config.customer_segmentation.enabled {
2833 debug!("Customer segmentation config available; segment-aware generation is partial");
2834 }
2835
2836 let resource_stats = self.resource_guard.stats();
2838 info!(
2839 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2840 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2841 resource_stats.disk.estimated_bytes_written,
2842 resource_stats.degradation_level
2843 );
2844
2845 if let Some(ref sink) = self.phase_sink {
2847 if let Err(e) = sink.flush() {
2848 warn!("Stream sink flush failed: {e}");
2849 }
2850 }
2851
2852 let lineage = self.build_lineage_graph();
2854
2855 let gate_result = if self.config.quality_gates.enabled {
2857 let profile_name = &self.config.quality_gates.profile;
2858 match datasynth_eval::gates::get_profile(profile_name) {
2859 Some(profile) => {
2860 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2862
2863 if balance_validation.validated {
2865 eval.coherence.balance =
2866 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2867 equation_balanced: balance_validation.is_balanced,
2868 max_imbalance: (balance_validation.total_debits
2869 - balance_validation.total_credits)
2870 .abs(),
2871 periods_evaluated: 1,
2872 periods_imbalanced: if balance_validation.is_balanced {
2873 0
2874 } else {
2875 1
2876 },
2877 period_results: Vec::new(),
2878 companies_evaluated: self.config.companies.len(),
2879 });
2880 }
2881
2882 eval.coherence.passes = balance_validation.is_balanced;
2884 if !balance_validation.is_balanced {
2885 eval.coherence
2886 .failures
2887 .push("Balance sheet equation not satisfied".to_string());
2888 }
2889
2890 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2892 eval.statistical.passes = !entries.is_empty();
2893
2894 eval.quality.overall_score = 0.9; eval.quality.passes = true;
2897
2898 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2899 info!(
2900 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2901 profile_name, result.gates_passed, result.gates_total, result.summary
2902 );
2903 Some(result)
2904 }
2905 None => {
2906 warn!(
2907 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2908 profile_name
2909 );
2910 None
2911 }
2912 }
2913 } else {
2914 None
2915 };
2916
2917 let internal_controls = if self.config.internal_controls.enabled {
2919 InternalControl::standard_controls()
2920 } else {
2921 Vec::new()
2922 };
2923
2924 Ok(EnhancedGenerationResult {
2925 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2926 master_data: std::mem::take(&mut self.master_data),
2927 document_flows,
2928 subledger,
2929 ocpm,
2930 audit,
2931 banking,
2932 graph_export,
2933 sourcing,
2934 financial_reporting,
2935 hr,
2936 accounting_standards,
2937 manufacturing: manufacturing_snap,
2938 sales_kpi_budgets,
2939 tax,
2940 esg: esg_snap,
2941 treasury,
2942 project_accounting,
2943 process_evolution,
2944 organizational_events,
2945 disruption_events,
2946 intercompany,
2947 journal_entries: entries,
2948 anomaly_labels,
2949 balance_validation,
2950 data_quality_stats,
2951 quality_issues,
2952 statistics: stats,
2953 lineage: Some(lineage),
2954 gate_result,
2955 internal_controls,
2956 sod_violations,
2957 opening_balances,
2958 subledger_reconciliation,
2959 counterfactual_pairs,
2960 red_flags,
2961 collusion_rings,
2962 temporal_vendor_chains,
2963 entity_relationship_graph,
2964 cross_process_links,
2965 industry_output,
2966 compliance_regulations,
2967 })
2968 }
2969
2970 fn phase_chart_of_accounts(
2976 &mut self,
2977 stats: &mut EnhancedGenerationStatistics,
2978 ) -> SynthResult<Arc<ChartOfAccounts>> {
2979 info!("Phase 1: Generating Chart of Accounts");
2980 let coa = self.generate_coa()?;
2981 stats.accounts_count = coa.account_count();
2982 info!(
2983 "Chart of Accounts generated: {} accounts",
2984 stats.accounts_count
2985 );
2986 self.check_resources_with_log("post-coa")?;
2987 Ok(coa)
2988 }
2989
2990 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2992 if self.phase_config.generate_master_data {
2993 info!("Phase 2: Generating Master Data");
2994 self.generate_master_data()?;
2995 stats.vendor_count = self.master_data.vendors.len();
2996 stats.customer_count = self.master_data.customers.len();
2997 stats.material_count = self.master_data.materials.len();
2998 stats.asset_count = self.master_data.assets.len();
2999 stats.employee_count = self.master_data.employees.len();
3000 info!(
3001 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3002 stats.vendor_count, stats.customer_count, stats.material_count,
3003 stats.asset_count, stats.employee_count
3004 );
3005 self.check_resources_with_log("post-master-data")?;
3006 } else {
3007 debug!("Phase 2: Skipped (master data generation disabled)");
3008 }
3009 Ok(())
3010 }
3011
3012 fn phase_document_flows(
3014 &mut self,
3015 stats: &mut EnhancedGenerationStatistics,
3016 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3017 let mut document_flows = DocumentFlowSnapshot::default();
3018 let mut subledger = SubledgerSnapshot::default();
3019 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3022
3023 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3024 info!("Phase 3: Generating Document Flows");
3025 self.generate_document_flows(&mut document_flows)?;
3026 stats.p2p_chain_count = document_flows.p2p_chains.len();
3027 stats.o2c_chain_count = document_flows.o2c_chains.len();
3028 info!(
3029 "Document flows generated: {} P2P chains, {} O2C chains",
3030 stats.p2p_chain_count, stats.o2c_chain_count
3031 );
3032
3033 debug!("Phase 3b: Linking document flows to subledgers");
3035 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3036 stats.ap_invoice_count = subledger.ap_invoices.len();
3037 stats.ar_invoice_count = subledger.ar_invoices.len();
3038 debug!(
3039 "Subledgers linked: {} AP invoices, {} AR invoices",
3040 stats.ap_invoice_count, stats.ar_invoice_count
3041 );
3042
3043 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3048 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3049 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3050 debug!("Payment settlements applied to AP and AR subledgers");
3051
3052 if let Ok(start_date) =
3055 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3056 {
3057 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3058 - chrono::Days::new(1);
3059 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3060 for company in &self.config.companies {
3067 let ar_report = ARAgingReport::from_invoices(
3068 company.code.clone(),
3069 &subledger.ar_invoices,
3070 as_of_date,
3071 );
3072 subledger.ar_aging_reports.push(ar_report);
3073
3074 let ap_report = APAgingReport::from_invoices(
3075 company.code.clone(),
3076 &subledger.ap_invoices,
3077 as_of_date,
3078 );
3079 subledger.ap_aging_reports.push(ap_report);
3080 }
3081 debug!(
3082 "AR/AP aging reports built: {} AR, {} AP",
3083 subledger.ar_aging_reports.len(),
3084 subledger.ap_aging_reports.len()
3085 );
3086
3087 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3089 {
3090 use datasynth_generators::DunningGenerator;
3091 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3092 for company in &self.config.companies {
3093 let currency = company.currency.as_str();
3094 let mut company_invoices: Vec<
3097 datasynth_core::models::subledger::ar::ARInvoice,
3098 > = subledger
3099 .ar_invoices
3100 .iter()
3101 .filter(|inv| inv.company_code == company.code)
3102 .cloned()
3103 .collect();
3104
3105 if company_invoices.is_empty() {
3106 continue;
3107 }
3108
3109 let result = dunning_gen.execute_dunning_run(
3110 &company.code,
3111 as_of_date,
3112 &mut company_invoices,
3113 currency,
3114 );
3115
3116 for updated in &company_invoices {
3118 if let Some(orig) = subledger
3119 .ar_invoices
3120 .iter_mut()
3121 .find(|i| i.invoice_number == updated.invoice_number)
3122 {
3123 orig.dunning_info = updated.dunning_info.clone();
3124 }
3125 }
3126
3127 subledger.dunning_runs.push(result.dunning_run);
3128 subledger.dunning_letters.extend(result.letters);
3129 dunning_journal_entries.extend(result.journal_entries);
3131 }
3132 debug!(
3133 "Dunning runs complete: {} runs, {} letters",
3134 subledger.dunning_runs.len(),
3135 subledger.dunning_letters.len()
3136 );
3137 }
3138 }
3139
3140 self.check_resources_with_log("post-document-flows")?;
3141 } else {
3142 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3143 }
3144
3145 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3147 if !self.master_data.assets.is_empty() {
3148 debug!("Generating FA subledger records");
3149 let company_code = self
3150 .config
3151 .companies
3152 .first()
3153 .map(|c| c.code.as_str())
3154 .unwrap_or("1000");
3155 let currency = self
3156 .config
3157 .companies
3158 .first()
3159 .map(|c| c.currency.as_str())
3160 .unwrap_or("USD");
3161
3162 let mut fa_gen = datasynth_generators::FAGenerator::new(
3163 datasynth_generators::FAGeneratorConfig::default(),
3164 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3165 );
3166
3167 for asset in &self.master_data.assets {
3168 let (record, je) = fa_gen.generate_asset_acquisition(
3169 company_code,
3170 &format!("{:?}", asset.asset_class),
3171 &asset.description,
3172 asset.acquisition_date,
3173 currency,
3174 asset.cost_center.as_deref(),
3175 );
3176 subledger.fa_records.push(record);
3177 fa_journal_entries.push(je);
3178 }
3179
3180 stats.fa_subledger_count = subledger.fa_records.len();
3181 debug!(
3182 "FA subledger records generated: {} (with {} acquisition JEs)",
3183 stats.fa_subledger_count,
3184 fa_journal_entries.len()
3185 );
3186 }
3187
3188 if !self.master_data.materials.is_empty() {
3190 debug!("Generating Inventory subledger records");
3191 let first_company = self.config.companies.first();
3192 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3193 let inv_currency = first_company
3194 .map(|c| c.currency.clone())
3195 .unwrap_or_else(|| "USD".to_string());
3196
3197 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3198 datasynth_generators::InventoryGeneratorConfig::default(),
3199 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3200 inv_currency.clone(),
3201 );
3202
3203 for (i, material) in self.master_data.materials.iter().enumerate() {
3204 let plant = format!("PLANT{:02}", (i % 3) + 1);
3205 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3206 let initial_qty = rust_decimal::Decimal::from(
3207 material
3208 .safety_stock
3209 .to_string()
3210 .parse::<i64>()
3211 .unwrap_or(100),
3212 );
3213
3214 let position = inv_gen.generate_position(
3215 company_code,
3216 &plant,
3217 &storage_loc,
3218 &material.material_id,
3219 &material.description,
3220 initial_qty,
3221 Some(material.standard_cost),
3222 &inv_currency,
3223 );
3224 subledger.inventory_positions.push(position);
3225 }
3226
3227 stats.inventory_subledger_count = subledger.inventory_positions.len();
3228 debug!(
3229 "Inventory subledger records generated: {}",
3230 stats.inventory_subledger_count
3231 );
3232 }
3233
3234 if !subledger.fa_records.is_empty() {
3236 if let Ok(start_date) =
3237 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3238 {
3239 let company_code = self
3240 .config
3241 .companies
3242 .first()
3243 .map(|c| c.code.as_str())
3244 .unwrap_or("1000");
3245 let fiscal_year = start_date.year();
3246 let start_period = start_date.month();
3247 let end_period =
3248 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3249
3250 let depr_cfg = FaDepreciationScheduleConfig {
3251 fiscal_year,
3252 start_period,
3253 end_period,
3254 seed_offset: 800,
3255 };
3256 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3257 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3258 let run_count = runs.len();
3259 subledger.depreciation_runs = runs;
3260 debug!(
3261 "Depreciation runs generated: {} runs for {} periods",
3262 run_count, self.config.global.period_months
3263 );
3264 }
3265 }
3266
3267 if !subledger.inventory_positions.is_empty() {
3269 if let Ok(start_date) =
3270 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3271 {
3272 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3273 - chrono::Days::new(1);
3274
3275 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3276 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3277
3278 for company in &self.config.companies {
3279 let result = inv_val_gen.generate(
3280 &company.code,
3281 &subledger.inventory_positions,
3282 as_of_date,
3283 );
3284 subledger.inventory_valuations.push(result);
3285 }
3286 debug!(
3287 "Inventory valuations generated: {} company reports",
3288 subledger.inventory_valuations.len()
3289 );
3290 }
3291 }
3292
3293 Ok((document_flows, subledger, fa_journal_entries))
3294 }
3295
3296 #[allow(clippy::too_many_arguments)]
3298 fn phase_ocpm_events(
3299 &mut self,
3300 document_flows: &DocumentFlowSnapshot,
3301 sourcing: &SourcingSnapshot,
3302 hr: &HrSnapshot,
3303 manufacturing: &ManufacturingSnapshot,
3304 banking: &BankingSnapshot,
3305 audit: &AuditSnapshot,
3306 financial_reporting: &FinancialReportingSnapshot,
3307 stats: &mut EnhancedGenerationStatistics,
3308 ) -> SynthResult<OcpmSnapshot> {
3309 let degradation = self.check_resources()?;
3310 if degradation >= DegradationLevel::Reduced {
3311 debug!(
3312 "Phase skipped due to resource pressure (degradation: {:?})",
3313 degradation
3314 );
3315 return Ok(OcpmSnapshot::default());
3316 }
3317 if self.phase_config.generate_ocpm_events {
3318 info!("Phase 3c: Generating OCPM Events");
3319 let ocpm_snapshot = self.generate_ocpm_events(
3320 document_flows,
3321 sourcing,
3322 hr,
3323 manufacturing,
3324 banking,
3325 audit,
3326 financial_reporting,
3327 )?;
3328 stats.ocpm_event_count = ocpm_snapshot.event_count;
3329 stats.ocpm_object_count = ocpm_snapshot.object_count;
3330 stats.ocpm_case_count = ocpm_snapshot.case_count;
3331 info!(
3332 "OCPM events generated: {} events, {} objects, {} cases",
3333 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3334 );
3335 self.check_resources_with_log("post-ocpm")?;
3336 Ok(ocpm_snapshot)
3337 } else {
3338 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3339 Ok(OcpmSnapshot::default())
3340 }
3341 }
3342
3343 fn phase_journal_entries(
3345 &mut self,
3346 coa: &Arc<ChartOfAccounts>,
3347 document_flows: &DocumentFlowSnapshot,
3348 _stats: &mut EnhancedGenerationStatistics,
3349 ) -> SynthResult<Vec<JournalEntry>> {
3350 let mut entries = Vec::new();
3351
3352 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3354 debug!("Phase 4a: Generating JEs from document flows");
3355 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3356 debug!("Generated {} JEs from document flows", flow_entries.len());
3357 entries.extend(flow_entries);
3358 }
3359
3360 if self.phase_config.generate_journal_entries {
3362 info!("Phase 4: Generating Journal Entries");
3363 let je_entries = self.generate_journal_entries(coa)?;
3364 info!("Generated {} standalone journal entries", je_entries.len());
3365 entries.extend(je_entries);
3366 } else {
3367 debug!("Phase 4: Skipped (journal entry generation disabled)");
3368 }
3369
3370 if !entries.is_empty() {
3371 self.check_resources_with_log("post-journal-entries")?;
3374 }
3375
3376 Ok(entries)
3377 }
3378
3379 fn phase_anomaly_injection(
3381 &mut self,
3382 entries: &mut [JournalEntry],
3383 actions: &DegradationActions,
3384 stats: &mut EnhancedGenerationStatistics,
3385 ) -> SynthResult<AnomalyLabels> {
3386 if self.phase_config.inject_anomalies
3387 && !entries.is_empty()
3388 && !actions.skip_anomaly_injection
3389 {
3390 info!("Phase 5: Injecting Anomalies");
3391 let result = self.inject_anomalies(entries)?;
3392 stats.anomalies_injected = result.labels.len();
3393 info!("Injected {} anomalies", stats.anomalies_injected);
3394 self.check_resources_with_log("post-anomaly-injection")?;
3395 Ok(result)
3396 } else if actions.skip_anomaly_injection {
3397 warn!("Phase 5: Skipped due to resource degradation");
3398 Ok(AnomalyLabels::default())
3399 } else {
3400 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3401 Ok(AnomalyLabels::default())
3402 }
3403 }
3404
3405 fn phase_balance_validation(
3407 &mut self,
3408 entries: &[JournalEntry],
3409 ) -> SynthResult<BalanceValidationResult> {
3410 if self.phase_config.validate_balances && !entries.is_empty() {
3411 debug!("Phase 6: Validating Balances");
3412 let balance_validation = self.validate_journal_entries(entries)?;
3413 if balance_validation.is_balanced {
3414 debug!("Balance validation passed");
3415 } else {
3416 warn!(
3417 "Balance validation found {} errors",
3418 balance_validation.validation_errors.len()
3419 );
3420 }
3421 Ok(balance_validation)
3422 } else {
3423 Ok(BalanceValidationResult::default())
3424 }
3425 }
3426
3427 fn phase_data_quality_injection(
3429 &mut self,
3430 entries: &mut [JournalEntry],
3431 actions: &DegradationActions,
3432 stats: &mut EnhancedGenerationStatistics,
3433 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3434 if self.phase_config.inject_data_quality
3435 && !entries.is_empty()
3436 && !actions.skip_data_quality
3437 {
3438 info!("Phase 7: Injecting Data Quality Variations");
3439 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3440 stats.data_quality_issues = dq_stats.records_with_issues;
3441 info!("Injected {} data quality issues", stats.data_quality_issues);
3442 self.check_resources_with_log("post-data-quality")?;
3443 Ok((dq_stats, quality_issues))
3444 } else if actions.skip_data_quality {
3445 warn!("Phase 7: Skipped due to resource degradation");
3446 Ok((DataQualityStats::default(), Vec::new()))
3447 } else {
3448 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3449 Ok((DataQualityStats::default(), Vec::new()))
3450 }
3451 }
3452
3453 fn phase_period_close(
3463 &mut self,
3464 entries: &mut Vec<JournalEntry>,
3465 subledger: &SubledgerSnapshot,
3466 stats: &mut EnhancedGenerationStatistics,
3467 ) -> SynthResult<()> {
3468 if !self.phase_config.generate_period_close || entries.is_empty() {
3469 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3470 return Ok(());
3471 }
3472
3473 info!("Phase 10b: Generating period-close journal entries");
3474
3475 use datasynth_core::accounts::{
3476 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3477 };
3478 use rust_decimal::Decimal;
3479
3480 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3481 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3482 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3483 let close_date = end_date - chrono::Days::new(1);
3485
3486 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3491 .config
3492 .companies
3493 .iter()
3494 .map(|c| c.code.clone())
3495 .collect();
3496
3497 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3499 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3500
3501 let period_months = self.config.global.period_months;
3505 for asset in &subledger.fa_records {
3506 use datasynth_core::models::subledger::fa::AssetStatus;
3508 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3509 continue;
3510 }
3511 let useful_life_months = asset.useful_life_months();
3512 if useful_life_months == 0 {
3513 continue;
3515 }
3516 let salvage_value = asset.salvage_value();
3517 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3518 if depreciable_base == Decimal::ZERO {
3519 continue;
3520 }
3521 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3522 * Decimal::from(period_months))
3523 .round_dp(2);
3524 if period_depr <= Decimal::ZERO {
3525 continue;
3526 }
3527
3528 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3529 depr_header.document_type = "CL".to_string();
3530 depr_header.header_text = Some(format!(
3531 "Depreciation - {} {}",
3532 asset.asset_number, asset.description
3533 ));
3534 depr_header.created_by = "CLOSE_ENGINE".to_string();
3535 depr_header.source = TransactionSource::Automated;
3536 depr_header.business_process = Some(BusinessProcess::R2R);
3537
3538 let doc_id = depr_header.document_id;
3539 let mut depr_je = JournalEntry::new(depr_header);
3540
3541 depr_je.add_line(JournalEntryLine::debit(
3543 doc_id,
3544 1,
3545 expense_accounts::DEPRECIATION.to_string(),
3546 period_depr,
3547 ));
3548 depr_je.add_line(JournalEntryLine::credit(
3550 doc_id,
3551 2,
3552 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3553 period_depr,
3554 ));
3555
3556 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3557 close_jes.push(depr_je);
3558 }
3559
3560 if !subledger.fa_records.is_empty() {
3561 debug!(
3562 "Generated {} depreciation JEs from {} FA records",
3563 close_jes.len(),
3564 subledger.fa_records.len()
3565 );
3566 }
3567
3568 {
3572 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3573 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3574
3575 let accrual_items: &[(&str, &str, &str)] = &[
3577 ("Accrued Utilities", "6200", "2100"),
3578 ("Accrued Rent", "6300", "2100"),
3579 ("Accrued Interest", "6100", "2150"),
3580 ];
3581
3582 for company_code in &company_codes {
3583 let company_revenue: Decimal = entries
3585 .iter()
3586 .filter(|e| e.header.company_code == *company_code)
3587 .flat_map(|e| e.lines.iter())
3588 .filter(|l| l.gl_account.starts_with('4'))
3589 .map(|l| l.credit_amount - l.debit_amount)
3590 .fold(Decimal::ZERO, |acc, v| acc + v);
3591
3592 if company_revenue <= Decimal::ZERO {
3593 continue;
3594 }
3595
3596 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3598 if accrual_base <= Decimal::ZERO {
3599 continue;
3600 }
3601
3602 for (description, expense_acct, liability_acct) in accrual_items {
3603 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3604 company_code,
3605 description,
3606 accrual_base,
3607 expense_acct,
3608 liability_acct,
3609 close_date,
3610 None,
3611 );
3612 close_jes.push(accrual_je);
3613 if let Some(rev_je) = reversal_je {
3614 close_jes.push(rev_je);
3615 }
3616 }
3617 }
3618
3619 debug!(
3620 "Generated accrual entries for {} companies",
3621 company_codes.len()
3622 );
3623 }
3624
3625 for company_code in &company_codes {
3626 let mut total_revenue = Decimal::ZERO;
3631 let mut total_expenses = Decimal::ZERO;
3632
3633 for entry in entries.iter() {
3634 if entry.header.company_code != *company_code {
3635 continue;
3636 }
3637 for line in &entry.lines {
3638 let category = AccountCategory::from_account(&line.gl_account);
3639 match category {
3640 AccountCategory::Revenue => {
3641 total_revenue += line.credit_amount - line.debit_amount;
3643 }
3644 AccountCategory::Cogs
3645 | AccountCategory::OperatingExpense
3646 | AccountCategory::OtherIncomeExpense
3647 | AccountCategory::Tax => {
3648 total_expenses += line.debit_amount - line.credit_amount;
3650 }
3651 _ => {}
3652 }
3653 }
3654 }
3655
3656 let pre_tax_income = total_revenue - total_expenses;
3657
3658 if pre_tax_income == Decimal::ZERO {
3660 debug!(
3661 "Company {}: no pre-tax income, skipping period close",
3662 company_code
3663 );
3664 continue;
3665 }
3666
3667 if pre_tax_income > Decimal::ZERO {
3669 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3671
3672 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3673 tax_header.document_type = "CL".to_string();
3674 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3675 tax_header.created_by = "CLOSE_ENGINE".to_string();
3676 tax_header.source = TransactionSource::Automated;
3677 tax_header.business_process = Some(BusinessProcess::R2R);
3678
3679 let doc_id = tax_header.document_id;
3680 let mut tax_je = JournalEntry::new(tax_header);
3681
3682 tax_je.add_line(JournalEntryLine::debit(
3684 doc_id,
3685 1,
3686 tax_accounts::TAX_EXPENSE.to_string(),
3687 tax_amount,
3688 ));
3689 tax_je.add_line(JournalEntryLine::credit(
3691 doc_id,
3692 2,
3693 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3694 tax_amount,
3695 ));
3696
3697 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3698 close_jes.push(tax_je);
3699 } else {
3700 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3703 if dta_amount > Decimal::ZERO {
3704 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3705 dta_header.document_type = "CL".to_string();
3706 dta_header.header_text =
3707 Some(format!("Deferred tax asset (DTA) - {}", company_code));
3708 dta_header.created_by = "CLOSE_ENGINE".to_string();
3709 dta_header.source = TransactionSource::Automated;
3710 dta_header.business_process = Some(BusinessProcess::R2R);
3711
3712 let doc_id = dta_header.document_id;
3713 let mut dta_je = JournalEntry::new(dta_header);
3714
3715 dta_je.add_line(JournalEntryLine::debit(
3717 doc_id,
3718 1,
3719 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3720 dta_amount,
3721 ));
3722 dta_je.add_line(JournalEntryLine::credit(
3725 doc_id,
3726 2,
3727 tax_accounts::TAX_EXPENSE.to_string(),
3728 dta_amount,
3729 ));
3730
3731 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3732 close_jes.push(dta_je);
3733 debug!(
3734 "Company {}: loss year — recognised DTA of {}",
3735 company_code, dta_amount
3736 );
3737 }
3738 }
3739
3740 let tax_provision = if pre_tax_income > Decimal::ZERO {
3746 (pre_tax_income * tax_rate).round_dp(2)
3747 } else {
3748 Decimal::ZERO
3749 };
3750 let net_income = pre_tax_income - tax_provision;
3751
3752 if net_income > Decimal::ZERO {
3753 use datasynth_generators::DividendGenerator;
3754 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
3756 let currency_str = self
3757 .config
3758 .companies
3759 .iter()
3760 .find(|c| c.code == *company_code)
3761 .map(|c| c.currency.as_str())
3762 .unwrap_or("USD");
3763 let div_result = div_gen.generate(
3764 company_code,
3765 close_date,
3766 Decimal::new(1, 0), dividend_amount,
3768 currency_str,
3769 );
3770 let div_je_count = div_result.journal_entries.len();
3771 close_jes.extend(div_result.journal_entries);
3772 debug!(
3773 "Company {}: declared dividend of {} ({} JEs)",
3774 company_code, dividend_amount, div_je_count
3775 );
3776 }
3777
3778 if net_income != Decimal::ZERO {
3783 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3784 close_header.document_type = "CL".to_string();
3785 close_header.header_text =
3786 Some(format!("Income statement close - {}", company_code));
3787 close_header.created_by = "CLOSE_ENGINE".to_string();
3788 close_header.source = TransactionSource::Automated;
3789 close_header.business_process = Some(BusinessProcess::R2R);
3790
3791 let doc_id = close_header.document_id;
3792 let mut close_je = JournalEntry::new(close_header);
3793
3794 let abs_net_income = net_income.abs();
3795
3796 if net_income > Decimal::ZERO {
3797 close_je.add_line(JournalEntryLine::debit(
3799 doc_id,
3800 1,
3801 equity_accounts::INCOME_SUMMARY.to_string(),
3802 abs_net_income,
3803 ));
3804 close_je.add_line(JournalEntryLine::credit(
3805 doc_id,
3806 2,
3807 equity_accounts::RETAINED_EARNINGS.to_string(),
3808 abs_net_income,
3809 ));
3810 } else {
3811 close_je.add_line(JournalEntryLine::debit(
3813 doc_id,
3814 1,
3815 equity_accounts::RETAINED_EARNINGS.to_string(),
3816 abs_net_income,
3817 ));
3818 close_je.add_line(JournalEntryLine::credit(
3819 doc_id,
3820 2,
3821 equity_accounts::INCOME_SUMMARY.to_string(),
3822 abs_net_income,
3823 ));
3824 }
3825
3826 debug_assert!(
3827 close_je.is_balanced(),
3828 "Income statement closing JE must be balanced"
3829 );
3830 close_jes.push(close_je);
3831 }
3832 }
3833
3834 let close_count = close_jes.len();
3835 if close_count > 0 {
3836 info!("Generated {} period-close journal entries", close_count);
3837 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3838 entries.extend(close_jes);
3839 stats.period_close_je_count = close_count;
3840
3841 stats.total_entries = entries.len() as u64;
3843 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3844 } else {
3845 debug!("No period-close entries generated (no income statement activity)");
3846 }
3847
3848 Ok(())
3849 }
3850
3851 fn phase_audit_data(
3853 &mut self,
3854 entries: &[JournalEntry],
3855 stats: &mut EnhancedGenerationStatistics,
3856 ) -> SynthResult<AuditSnapshot> {
3857 if self.phase_config.generate_audit {
3858 info!("Phase 8: Generating Audit Data");
3859 let audit_snapshot = self.generate_audit_data(entries)?;
3860 stats.audit_engagement_count = audit_snapshot.engagements.len();
3861 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3862 stats.audit_evidence_count = audit_snapshot.evidence.len();
3863 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3864 stats.audit_finding_count = audit_snapshot.findings.len();
3865 stats.audit_judgment_count = audit_snapshot.judgments.len();
3866 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3867 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3868 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3869 stats.audit_sample_count = audit_snapshot.samples.len();
3870 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3871 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3872 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3873 stats.audit_related_party_count = audit_snapshot.related_parties.len();
3874 stats.audit_related_party_transaction_count =
3875 audit_snapshot.related_party_transactions.len();
3876 info!(
3877 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3878 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3879 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3880 {} RP transactions",
3881 stats.audit_engagement_count,
3882 stats.audit_workpaper_count,
3883 stats.audit_evidence_count,
3884 stats.audit_risk_count,
3885 stats.audit_finding_count,
3886 stats.audit_judgment_count,
3887 stats.audit_confirmation_count,
3888 stats.audit_procedure_step_count,
3889 stats.audit_sample_count,
3890 stats.audit_analytical_result_count,
3891 stats.audit_ia_function_count,
3892 stats.audit_ia_report_count,
3893 stats.audit_related_party_count,
3894 stats.audit_related_party_transaction_count,
3895 );
3896 self.check_resources_with_log("post-audit")?;
3897 Ok(audit_snapshot)
3898 } else {
3899 debug!("Phase 8: Skipped (audit generation disabled)");
3900 Ok(AuditSnapshot::default())
3901 }
3902 }
3903
3904 fn phase_banking_data(
3906 &mut self,
3907 stats: &mut EnhancedGenerationStatistics,
3908 ) -> SynthResult<BankingSnapshot> {
3909 if self.phase_config.generate_banking {
3910 info!("Phase 9: Generating Banking KYC/AML Data");
3911 let banking_snapshot = self.generate_banking_data()?;
3912 stats.banking_customer_count = banking_snapshot.customers.len();
3913 stats.banking_account_count = banking_snapshot.accounts.len();
3914 stats.banking_transaction_count = banking_snapshot.transactions.len();
3915 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3916 info!(
3917 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3918 stats.banking_customer_count, stats.banking_account_count,
3919 stats.banking_transaction_count, stats.banking_suspicious_count
3920 );
3921 self.check_resources_with_log("post-banking")?;
3922 Ok(banking_snapshot)
3923 } else {
3924 debug!("Phase 9: Skipped (banking generation disabled)");
3925 Ok(BankingSnapshot::default())
3926 }
3927 }
3928
3929 fn phase_graph_export(
3931 &mut self,
3932 entries: &[JournalEntry],
3933 coa: &Arc<ChartOfAccounts>,
3934 stats: &mut EnhancedGenerationStatistics,
3935 ) -> SynthResult<GraphExportSnapshot> {
3936 if self.phase_config.generate_graph_export && !entries.is_empty() {
3937 info!("Phase 10: Exporting Accounting Network Graphs");
3938 match self.export_graphs(entries, coa, stats) {
3939 Ok(snapshot) => {
3940 info!(
3941 "Graph export complete: {} graphs ({} nodes, {} edges)",
3942 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3943 );
3944 Ok(snapshot)
3945 }
3946 Err(e) => {
3947 warn!("Phase 10: Graph export failed: {}", e);
3948 Ok(GraphExportSnapshot::default())
3949 }
3950 }
3951 } else {
3952 debug!("Phase 10: Skipped (graph export disabled or no entries)");
3953 Ok(GraphExportSnapshot::default())
3954 }
3955 }
3956
3957 #[allow(clippy::too_many_arguments)]
3959 fn phase_hypergraph_export(
3960 &self,
3961 coa: &Arc<ChartOfAccounts>,
3962 entries: &[JournalEntry],
3963 document_flows: &DocumentFlowSnapshot,
3964 sourcing: &SourcingSnapshot,
3965 hr: &HrSnapshot,
3966 manufacturing: &ManufacturingSnapshot,
3967 banking: &BankingSnapshot,
3968 audit: &AuditSnapshot,
3969 financial_reporting: &FinancialReportingSnapshot,
3970 ocpm: &OcpmSnapshot,
3971 compliance: &ComplianceRegulationsSnapshot,
3972 stats: &mut EnhancedGenerationStatistics,
3973 ) -> SynthResult<()> {
3974 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3975 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3976 match self.export_hypergraph(
3977 coa,
3978 entries,
3979 document_flows,
3980 sourcing,
3981 hr,
3982 manufacturing,
3983 banking,
3984 audit,
3985 financial_reporting,
3986 ocpm,
3987 compliance,
3988 stats,
3989 ) {
3990 Ok(info) => {
3991 info!(
3992 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3993 info.node_count, info.edge_count, info.hyperedge_count
3994 );
3995 }
3996 Err(e) => {
3997 warn!("Phase 10b: Hypergraph export failed: {}", e);
3998 }
3999 }
4000 } else {
4001 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4002 }
4003 Ok(())
4004 }
4005
4006 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4012 if !self.config.llm.enabled {
4013 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4014 return;
4015 }
4016
4017 info!("Phase 11: Starting LLM Enrichment");
4018 let start = std::time::Instant::now();
4019
4020 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4021 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4024 let schema_provider = &self.config.llm.provider;
4025 let api_key_env = match schema_provider.as_str() {
4026 "openai" => Some("OPENAI_API_KEY"),
4027 "anthropic" => Some("ANTHROPIC_API_KEY"),
4028 "custom" => Some("LLM_API_KEY"),
4029 _ => None,
4030 };
4031 if let Some(key_env) = api_key_env {
4032 if std::env::var(key_env).is_ok() {
4033 let llm_config = datasynth_core::llm::LlmConfig {
4034 model: self.config.llm.model.clone(),
4035 api_key_env: key_env.to_string(),
4036 ..datasynth_core::llm::LlmConfig::default()
4037 };
4038 match HttpLlmProvider::new(llm_config) {
4039 Ok(p) => Arc::new(p),
4040 Err(e) => {
4041 warn!(
4042 "Failed to create HttpLlmProvider: {}; falling back to mock",
4043 e
4044 );
4045 Arc::new(MockLlmProvider::new(self.seed))
4046 }
4047 }
4048 } else {
4049 Arc::new(MockLlmProvider::new(self.seed))
4050 }
4051 } else {
4052 Arc::new(MockLlmProvider::new(self.seed))
4053 }
4054 };
4055 let enricher = VendorLlmEnricher::new(provider);
4056
4057 let industry = format!("{:?}", self.config.global.industry);
4058 let max_enrichments = self
4059 .config
4060 .llm
4061 .max_vendor_enrichments
4062 .min(self.master_data.vendors.len());
4063
4064 let mut enriched_count = 0usize;
4065 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4066 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4067 Ok(name) => {
4068 vendor.name = name;
4069 enriched_count += 1;
4070 }
4071 Err(e) => {
4072 warn!(
4073 "LLM vendor enrichment failed for {}: {}",
4074 vendor.vendor_id, e
4075 );
4076 }
4077 }
4078 }
4079
4080 enriched_count
4081 }));
4082
4083 match result {
4084 Ok(enriched_count) => {
4085 stats.llm_vendors_enriched = enriched_count;
4086 let elapsed = start.elapsed();
4087 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4088 info!(
4089 "Phase 11 complete: {} vendors enriched in {}ms",
4090 enriched_count, stats.llm_enrichment_ms
4091 );
4092 }
4093 Err(_) => {
4094 let elapsed = start.elapsed();
4095 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4096 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4097 }
4098 }
4099 }
4100
4101 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4107 if !self.config.diffusion.enabled {
4108 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4109 return;
4110 }
4111
4112 info!("Phase 12: Starting Diffusion Enhancement");
4113 let start = std::time::Instant::now();
4114
4115 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4116 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4119
4120 let diffusion_config = DiffusionConfig {
4121 n_steps: self.config.diffusion.n_steps,
4122 seed: self.seed,
4123 ..Default::default()
4124 };
4125
4126 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4127
4128 let n_samples = self.config.diffusion.sample_size;
4129 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
4131
4132 samples.len()
4133 }));
4134
4135 match result {
4136 Ok(sample_count) => {
4137 stats.diffusion_samples_generated = sample_count;
4138 let elapsed = start.elapsed();
4139 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4140 info!(
4141 "Phase 12 complete: {} diffusion samples generated in {}ms",
4142 sample_count, stats.diffusion_enhancement_ms
4143 );
4144 }
4145 Err(_) => {
4146 let elapsed = start.elapsed();
4147 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4148 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4149 }
4150 }
4151 }
4152
4153 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4160 if !self.config.causal.enabled {
4161 debug!("Phase 13: Skipped (causal generation disabled)");
4162 return;
4163 }
4164
4165 info!("Phase 13: Starting Causal Overlay");
4166 let start = std::time::Instant::now();
4167
4168 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4169 let graph = match self.config.causal.template.as_str() {
4171 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4172 _ => CausalGraph::fraud_detection_template(),
4173 };
4174
4175 let scm = StructuralCausalModel::new(graph.clone())
4176 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4177
4178 let n_samples = self.config.causal.sample_size;
4179 let samples = scm
4180 .generate(n_samples, self.seed)
4181 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4182
4183 let validation_passed = if self.config.causal.validate {
4185 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4186 if report.valid {
4187 info!(
4188 "Causal validation passed: all {} checks OK",
4189 report.checks.len()
4190 );
4191 } else {
4192 warn!(
4193 "Causal validation: {} violations detected: {:?}",
4194 report.violations.len(),
4195 report.violations
4196 );
4197 }
4198 Some(report.valid)
4199 } else {
4200 None
4201 };
4202
4203 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4204 }));
4205
4206 match result {
4207 Ok(Ok((sample_count, validation_passed))) => {
4208 stats.causal_samples_generated = sample_count;
4209 stats.causal_validation_passed = validation_passed;
4210 let elapsed = start.elapsed();
4211 stats.causal_generation_ms = elapsed.as_millis() as u64;
4212 info!(
4213 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4214 sample_count, stats.causal_generation_ms, validation_passed,
4215 );
4216 }
4217 Ok(Err(e)) => {
4218 let elapsed = start.elapsed();
4219 stats.causal_generation_ms = elapsed.as_millis() as u64;
4220 warn!("Phase 13: Causal generation failed: {}", e);
4221 }
4222 Err(_) => {
4223 let elapsed = start.elapsed();
4224 stats.causal_generation_ms = elapsed.as_millis() as u64;
4225 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4226 }
4227 }
4228 }
4229
4230 fn phase_sourcing_data(
4232 &mut self,
4233 stats: &mut EnhancedGenerationStatistics,
4234 ) -> SynthResult<SourcingSnapshot> {
4235 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4236 debug!("Phase 14: Skipped (sourcing generation disabled)");
4237 return Ok(SourcingSnapshot::default());
4238 }
4239 let degradation = self.check_resources()?;
4240 if degradation >= DegradationLevel::Reduced {
4241 debug!(
4242 "Phase skipped due to resource pressure (degradation: {:?})",
4243 degradation
4244 );
4245 return Ok(SourcingSnapshot::default());
4246 }
4247
4248 info!("Phase 14: Generating S2C Sourcing Data");
4249 let seed = self.seed;
4250
4251 let vendor_ids: Vec<String> = self
4253 .master_data
4254 .vendors
4255 .iter()
4256 .map(|v| v.vendor_id.clone())
4257 .collect();
4258 if vendor_ids.is_empty() {
4259 debug!("Phase 14: Skipped (no vendors available)");
4260 return Ok(SourcingSnapshot::default());
4261 }
4262
4263 let categories: Vec<(String, String)> = vec![
4264 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4265 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4266 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4267 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4268 ("CAT-LOG".to_string(), "Logistics".to_string()),
4269 ];
4270 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4271 .iter()
4272 .map(|(id, name)| {
4273 (
4274 id.clone(),
4275 name.clone(),
4276 rust_decimal::Decimal::from(100_000),
4277 )
4278 })
4279 .collect();
4280
4281 let company_code = self
4282 .config
4283 .companies
4284 .first()
4285 .map(|c| c.code.as_str())
4286 .unwrap_or("1000");
4287 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4288 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4289 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4290 let fiscal_year = start_date.year() as u16;
4291 let owner_ids: Vec<String> = self
4292 .master_data
4293 .employees
4294 .iter()
4295 .take(5)
4296 .map(|e| e.employee_id.clone())
4297 .collect();
4298 let owner_id = owner_ids
4299 .first()
4300 .map(std::string::String::as_str)
4301 .unwrap_or("BUYER-001");
4302
4303 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4305 let spend_analyses =
4306 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4307
4308 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4310 let sourcing_projects = if owner_ids.is_empty() {
4311 Vec::new()
4312 } else {
4313 project_gen.generate(
4314 company_code,
4315 &categories_with_spend,
4316 &owner_ids,
4317 start_date,
4318 self.config.global.period_months,
4319 )
4320 };
4321 stats.sourcing_project_count = sourcing_projects.len();
4322
4323 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4325 let mut qual_gen = QualificationGenerator::new(seed + 2);
4326 let qualifications = qual_gen.generate(
4327 company_code,
4328 &qual_vendor_ids,
4329 sourcing_projects.first().map(|p| p.project_id.as_str()),
4330 owner_id,
4331 start_date,
4332 );
4333
4334 let mut rfx_gen = RfxGenerator::new(seed + 3);
4336 let rfx_events: Vec<RfxEvent> = sourcing_projects
4337 .iter()
4338 .map(|proj| {
4339 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4340 rfx_gen.generate(
4341 company_code,
4342 &proj.project_id,
4343 &proj.category_id,
4344 &qualified_vids,
4345 owner_id,
4346 start_date,
4347 50000.0,
4348 )
4349 })
4350 .collect();
4351 stats.rfx_event_count = rfx_events.len();
4352
4353 let mut bid_gen = BidGenerator::new(seed + 4);
4355 let mut all_bids = Vec::new();
4356 for rfx in &rfx_events {
4357 let bidder_count = vendor_ids.len().clamp(2, 5);
4358 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4359 let bids = bid_gen.generate(rfx, &responding, start_date);
4360 all_bids.extend(bids);
4361 }
4362 stats.bid_count = all_bids.len();
4363
4364 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4366 let bid_evaluations: Vec<BidEvaluation> = rfx_events
4367 .iter()
4368 .map(|rfx| {
4369 let rfx_bids: Vec<SupplierBid> = all_bids
4370 .iter()
4371 .filter(|b| b.rfx_id == rfx.rfx_id)
4372 .cloned()
4373 .collect();
4374 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4375 })
4376 .collect();
4377
4378 let mut contract_gen = ContractGenerator::new(seed + 6);
4380 let contracts: Vec<ProcurementContract> = bid_evaluations
4381 .iter()
4382 .zip(rfx_events.iter())
4383 .filter_map(|(eval, rfx)| {
4384 eval.ranked_bids.first().and_then(|winner| {
4385 all_bids
4386 .iter()
4387 .find(|b| b.bid_id == winner.bid_id)
4388 .map(|winning_bid| {
4389 contract_gen.generate_from_bid(
4390 winning_bid,
4391 Some(&rfx.sourcing_project_id),
4392 &rfx.category_id,
4393 owner_id,
4394 start_date,
4395 )
4396 })
4397 })
4398 })
4399 .collect();
4400 stats.contract_count = contracts.len();
4401
4402 let mut catalog_gen = CatalogGenerator::new(seed + 7);
4404 let catalog_items = catalog_gen.generate(&contracts);
4405 stats.catalog_item_count = catalog_items.len();
4406
4407 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4409 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4410 .iter()
4411 .fold(
4412 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4413 |mut acc, c| {
4414 acc.entry(c.vendor_id.clone()).or_default().push(c);
4415 acc
4416 },
4417 )
4418 .into_iter()
4419 .collect();
4420 let scorecards = scorecard_gen.generate(
4421 company_code,
4422 &vendor_contracts,
4423 start_date,
4424 end_date,
4425 owner_id,
4426 );
4427 stats.scorecard_count = scorecards.len();
4428
4429 let mut sourcing_projects = sourcing_projects;
4432 for project in &mut sourcing_projects {
4433 project.rfx_ids = rfx_events
4435 .iter()
4436 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4437 .map(|rfx| rfx.rfx_id.clone())
4438 .collect();
4439
4440 project.contract_id = contracts
4442 .iter()
4443 .find(|c| {
4444 c.sourcing_project_id
4445 .as_deref()
4446 .is_some_and(|sp| sp == project.project_id)
4447 })
4448 .map(|c| c.contract_id.clone());
4449
4450 project.spend_analysis_id = spend_analyses
4452 .iter()
4453 .find(|sa| sa.category_id == project.category_id)
4454 .map(|sa| sa.category_id.clone());
4455 }
4456
4457 info!(
4458 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4459 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4460 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4461 );
4462 self.check_resources_with_log("post-sourcing")?;
4463
4464 Ok(SourcingSnapshot {
4465 spend_analyses,
4466 sourcing_projects,
4467 qualifications,
4468 rfx_events,
4469 bids: all_bids,
4470 bid_evaluations,
4471 contracts,
4472 catalog_items,
4473 scorecards,
4474 })
4475 }
4476
4477 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4483 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4484
4485 let parent_code = self
4486 .config
4487 .companies
4488 .first()
4489 .map(|c| c.code.clone())
4490 .unwrap_or_else(|| "PARENT".to_string());
4491
4492 let mut group = GroupStructure::new(parent_code);
4493
4494 for company in self.config.companies.iter().skip(1) {
4495 let sub =
4496 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4497 group.add_subsidiary(sub);
4498 }
4499
4500 group
4501 }
4502
4503 fn phase_intercompany(
4505 &mut self,
4506 journal_entries: &[JournalEntry],
4507 stats: &mut EnhancedGenerationStatistics,
4508 ) -> SynthResult<IntercompanySnapshot> {
4509 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4511 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4512 return Ok(IntercompanySnapshot::default());
4513 }
4514
4515 if self.config.companies.len() < 2 {
4517 debug!(
4518 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4519 self.config.companies.len()
4520 );
4521 return Ok(IntercompanySnapshot::default());
4522 }
4523
4524 info!("Phase 14b: Generating Intercompany Transactions");
4525
4526 let group_structure = self.build_group_structure();
4529 debug!(
4530 "Group structure built: parent={}, subsidiaries={}",
4531 group_structure.parent_entity,
4532 group_structure.subsidiaries.len()
4533 );
4534
4535 let seed = self.seed;
4536 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4537 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4538 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4539
4540 let parent_code = self.config.companies[0].code.clone();
4543 let mut ownership_structure =
4544 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4545
4546 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4547 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4548 format!("REL{:03}", i + 1),
4549 parent_code.clone(),
4550 company.code.clone(),
4551 rust_decimal::Decimal::from(100), start_date,
4553 );
4554 ownership_structure.add_relationship(relationship);
4555 }
4556
4557 let tp_method = match self.config.intercompany.transfer_pricing_method {
4559 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4560 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4561 }
4562 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4563 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4564 }
4565 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4566 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4567 }
4568 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4569 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4570 }
4571 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4572 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4573 }
4574 };
4575
4576 let ic_currency = self
4578 .config
4579 .companies
4580 .first()
4581 .map(|c| c.currency.clone())
4582 .unwrap_or_else(|| "USD".to_string());
4583 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4584 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4585 transfer_pricing_method: tp_method,
4586 markup_percent: rust_decimal::Decimal::from_f64_retain(
4587 self.config.intercompany.markup_percent,
4588 )
4589 .unwrap_or(rust_decimal::Decimal::from(5)),
4590 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4591 default_currency: ic_currency,
4592 ..Default::default()
4593 };
4594
4595 let mut ic_generator = datasynth_generators::ICGenerator::new(
4597 ic_gen_config,
4598 ownership_structure.clone(),
4599 seed + 50,
4600 );
4601
4602 let transactions_per_day = 3;
4605 let matched_pairs = ic_generator.generate_transactions_for_period(
4606 start_date,
4607 end_date,
4608 transactions_per_day,
4609 );
4610
4611 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4613 debug!(
4614 "Generated {} IC seller invoices, {} IC buyer POs",
4615 ic_doc_chains.seller_invoices.len(),
4616 ic_doc_chains.buyer_orders.len()
4617 );
4618
4619 let mut seller_entries = Vec::new();
4621 let mut buyer_entries = Vec::new();
4622 let fiscal_year = start_date.year();
4623
4624 for pair in &matched_pairs {
4625 let fiscal_period = pair.posting_date.month();
4626 let (seller_je, buyer_je) =
4627 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4628 seller_entries.push(seller_je);
4629 buyer_entries.push(buyer_je);
4630 }
4631
4632 let matching_config = datasynth_generators::ICMatchingConfig {
4634 base_currency: self
4635 .config
4636 .companies
4637 .first()
4638 .map(|c| c.currency.clone())
4639 .unwrap_or_else(|| "USD".to_string()),
4640 ..Default::default()
4641 };
4642 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4643 matching_engine.load_matched_pairs(&matched_pairs);
4644 let matching_result = matching_engine.run_matching(end_date);
4645
4646 let mut elimination_entries = Vec::new();
4648 if self.config.intercompany.generate_eliminations {
4649 let elim_config = datasynth_generators::EliminationConfig {
4650 consolidation_entity: "GROUP".to_string(),
4651 base_currency: self
4652 .config
4653 .companies
4654 .first()
4655 .map(|c| c.currency.clone())
4656 .unwrap_or_else(|| "USD".to_string()),
4657 ..Default::default()
4658 };
4659
4660 let mut elim_generator =
4661 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4662
4663 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4664 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4665 matching_result
4666 .matched_balances
4667 .iter()
4668 .chain(matching_result.unmatched_balances.iter())
4669 .cloned()
4670 .collect();
4671
4672 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4684 std::collections::HashMap::new();
4685 let mut equity_amounts: std::collections::HashMap<
4686 String,
4687 std::collections::HashMap<String, rust_decimal::Decimal>,
4688 > = std::collections::HashMap::new();
4689 {
4690 use rust_decimal::Decimal;
4691 let hundred = Decimal::from(100u32);
4692 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
4696 for sub in &group_structure.subsidiaries {
4697 let net_assets = {
4698 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4699 if na > Decimal::ZERO {
4700 na
4701 } else {
4702 Decimal::from(1_000_000u64)
4703 }
4704 };
4705 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4707 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4708
4709 let mut eq_map = std::collections::HashMap::new();
4712 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4713 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4714 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4715 equity_amounts.insert(sub.entity_code.clone(), eq_map);
4716 }
4717 }
4718
4719 let journal = elim_generator.generate_eliminations(
4720 &fiscal_period,
4721 end_date,
4722 &all_balances,
4723 &matched_pairs,
4724 &investment_amounts,
4725 &equity_amounts,
4726 );
4727
4728 elimination_entries = journal.entries.clone();
4729 }
4730
4731 let matched_pair_count = matched_pairs.len();
4732 let elimination_entry_count = elimination_entries.len();
4733 let match_rate = matching_result.match_rate;
4734
4735 stats.ic_matched_pair_count = matched_pair_count;
4736 stats.ic_elimination_count = elimination_entry_count;
4737 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4738
4739 info!(
4740 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4741 matched_pair_count,
4742 stats.ic_transaction_count,
4743 seller_entries.len(),
4744 buyer_entries.len(),
4745 elimination_entry_count,
4746 match_rate * 100.0
4747 );
4748 self.check_resources_with_log("post-intercompany")?;
4749
4750 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4754 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4755 use rust_decimal::Decimal;
4756
4757 let eight_pct = Decimal::new(8, 2); group_structure
4760 .subsidiaries
4761 .iter()
4762 .filter(|sub| {
4763 sub.nci_percentage > Decimal::ZERO
4764 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4765 })
4766 .map(|sub| {
4767 let net_assets_from_jes =
4771 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4772
4773 let net_assets = if net_assets_from_jes > Decimal::ZERO {
4774 net_assets_from_jes.round_dp(2)
4775 } else {
4776 Decimal::from(1_000_000u64)
4778 };
4779
4780 let net_income = (net_assets * eight_pct).round_dp(2);
4782
4783 NciMeasurement::compute(
4784 sub.entity_code.clone(),
4785 sub.nci_percentage,
4786 net_assets,
4787 net_income,
4788 )
4789 })
4790 .collect()
4791 };
4792
4793 if !nci_measurements.is_empty() {
4794 info!(
4795 "NCI measurements: {} subsidiaries with non-controlling interests",
4796 nci_measurements.len()
4797 );
4798 }
4799
4800 Ok(IntercompanySnapshot {
4801 group_structure: Some(group_structure),
4802 matched_pairs,
4803 seller_journal_entries: seller_entries,
4804 buyer_journal_entries: buyer_entries,
4805 elimination_entries,
4806 nci_measurements,
4807 ic_document_chains: Some(ic_doc_chains),
4808 matched_pair_count,
4809 elimination_entry_count,
4810 match_rate,
4811 })
4812 }
4813
4814 fn phase_financial_reporting(
4816 &mut self,
4817 document_flows: &DocumentFlowSnapshot,
4818 journal_entries: &[JournalEntry],
4819 coa: &Arc<ChartOfAccounts>,
4820 _hr: &HrSnapshot,
4821 _audit: &AuditSnapshot,
4822 stats: &mut EnhancedGenerationStatistics,
4823 ) -> SynthResult<FinancialReportingSnapshot> {
4824 let fs_enabled = self.phase_config.generate_financial_statements
4825 || self.config.financial_reporting.enabled;
4826 let br_enabled = self.phase_config.generate_bank_reconciliation;
4827
4828 if !fs_enabled && !br_enabled {
4829 debug!("Phase 15: Skipped (financial reporting disabled)");
4830 return Ok(FinancialReportingSnapshot::default());
4831 }
4832
4833 info!("Phase 15: Generating Financial Reporting Data");
4834
4835 let seed = self.seed;
4836 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4837 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4838
4839 let mut financial_statements = Vec::new();
4840 let mut bank_reconciliations = Vec::new();
4841 let mut trial_balances = Vec::new();
4842 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4843 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4844 Vec::new();
4845 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4847 std::collections::HashMap::new();
4848 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4850 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4852
4853 if fs_enabled {
4861 let has_journal_entries = !journal_entries.is_empty();
4862
4863 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4866 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4868
4869 let elimination_entries: Vec<&JournalEntry> = journal_entries
4871 .iter()
4872 .filter(|je| je.header.is_elimination)
4873 .collect();
4874
4875 for period in 0..self.config.global.period_months {
4877 let period_start = start_date + chrono::Months::new(period);
4878 let period_end =
4879 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4880 let fiscal_year = period_end.year() as u16;
4881 let fiscal_period = period_end.month() as u8;
4882 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4883
4884 let mut entity_tb_map: std::collections::HashMap<
4887 String,
4888 std::collections::HashMap<String, rust_decimal::Decimal>,
4889 > = std::collections::HashMap::new();
4890
4891 for (company_idx, company) in self.config.companies.iter().enumerate() {
4893 let company_code = company.code.as_str();
4894 let currency = company.currency.as_str();
4895 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4898 let mut company_fs_gen =
4899 FinancialStatementGenerator::new(seed + company_seed_offset);
4900
4901 if has_journal_entries {
4902 let tb_entries = Self::build_cumulative_trial_balance(
4903 journal_entries,
4904 coa,
4905 company_code,
4906 start_date,
4907 period_end,
4908 fiscal_year,
4909 fiscal_period,
4910 );
4911
4912 let entity_cat_map =
4914 entity_tb_map.entry(company_code.to_string()).or_default();
4915 for tb_entry in &tb_entries {
4916 let net = tb_entry.debit_balance - tb_entry.credit_balance;
4917 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4918 }
4919
4920 let stmts = company_fs_gen.generate(
4921 company_code,
4922 currency,
4923 &tb_entries,
4924 period_start,
4925 period_end,
4926 fiscal_year,
4927 fiscal_period,
4928 None,
4929 "SYS-AUTOCLOSE",
4930 );
4931
4932 let mut entity_stmts = Vec::new();
4933 for stmt in stmts {
4934 if stmt.statement_type == StatementType::CashFlowStatement {
4935 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4936 let cf_items = Self::build_cash_flow_from_trial_balances(
4937 &tb_entries,
4938 None,
4939 net_income,
4940 );
4941 entity_stmts.push(FinancialStatement {
4942 cash_flow_items: cf_items,
4943 ..stmt
4944 });
4945 } else {
4946 entity_stmts.push(stmt);
4947 }
4948 }
4949
4950 financial_statements.extend(entity_stmts.clone());
4952
4953 standalone_statements
4955 .entry(company_code.to_string())
4956 .or_default()
4957 .extend(entity_stmts);
4958
4959 if company_idx == 0 {
4962 trial_balances.push(PeriodTrialBalance {
4963 fiscal_year,
4964 fiscal_period,
4965 period_start,
4966 period_end,
4967 entries: tb_entries,
4968 });
4969 }
4970 } else {
4971 let tb_entries = Self::build_trial_balance_from_entries(
4973 journal_entries,
4974 coa,
4975 company_code,
4976 fiscal_year,
4977 fiscal_period,
4978 );
4979
4980 let stmts = company_fs_gen.generate(
4981 company_code,
4982 currency,
4983 &tb_entries,
4984 period_start,
4985 period_end,
4986 fiscal_year,
4987 fiscal_period,
4988 None,
4989 "SYS-AUTOCLOSE",
4990 );
4991 financial_statements.extend(stmts.clone());
4992 standalone_statements
4993 .entry(company_code.to_string())
4994 .or_default()
4995 .extend(stmts);
4996
4997 if company_idx == 0 && !tb_entries.is_empty() {
4998 trial_balances.push(PeriodTrialBalance {
4999 fiscal_year,
5000 fiscal_period,
5001 period_start,
5002 period_end,
5003 entries: tb_entries,
5004 });
5005 }
5006 }
5007 }
5008
5009 let group_currency = self
5012 .config
5013 .companies
5014 .first()
5015 .map(|c| c.currency.as_str())
5016 .unwrap_or("USD");
5017
5018 let period_eliminations: Vec<JournalEntry> = elimination_entries
5020 .iter()
5021 .filter(|je| {
5022 je.header.fiscal_year == fiscal_year
5023 && je.header.fiscal_period == fiscal_period
5024 })
5025 .map(|je| (*je).clone())
5026 .collect();
5027
5028 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5029 &entity_tb_map,
5030 &period_eliminations,
5031 &period_label,
5032 );
5033
5034 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5037 .line_items
5038 .iter()
5039 .map(|li| {
5040 let net = li.post_elimination_total;
5041 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5042 (net, rust_decimal::Decimal::ZERO)
5043 } else {
5044 (rust_decimal::Decimal::ZERO, -net)
5045 };
5046 datasynth_generators::TrialBalanceEntry {
5047 account_code: li.account_category.clone(),
5048 account_name: li.account_category.clone(),
5049 category: li.account_category.clone(),
5050 debit_balance: debit,
5051 credit_balance: credit,
5052 }
5053 })
5054 .collect();
5055
5056 let mut cons_stmts = cons_gen.generate(
5057 "GROUP",
5058 group_currency,
5059 &cons_tb,
5060 period_start,
5061 period_end,
5062 fiscal_year,
5063 fiscal_period,
5064 None,
5065 "SYS-AUTOCLOSE",
5066 );
5067
5068 let bs_categories: &[&str] = &[
5072 "CASH",
5073 "RECEIVABLES",
5074 "INVENTORY",
5075 "FIXEDASSETS",
5076 "PAYABLES",
5077 "ACCRUEDLIABILITIES",
5078 "LONGTERMDEBT",
5079 "EQUITY",
5080 ];
5081 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5082 cons_line_items.into_iter().partition(|li| {
5083 let upper = li.label.to_uppercase();
5084 bs_categories.iter().any(|c| upper == *c)
5085 });
5086
5087 for stmt in &mut cons_stmts {
5088 stmt.is_consolidated = true;
5089 match stmt.statement_type {
5090 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5091 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5092 _ => {} }
5094 }
5095
5096 consolidated_statements.extend(cons_stmts);
5097 consolidation_schedules.push(schedule);
5098 }
5099
5100 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5106 info!(
5107 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5108 stats.financial_statement_count,
5109 consolidated_statements.len(),
5110 has_journal_entries
5111 );
5112
5113 let entity_seeds: Vec<SegmentSeed> = self
5118 .config
5119 .companies
5120 .iter()
5121 .map(|c| SegmentSeed {
5122 code: c.code.clone(),
5123 name: c.name.clone(),
5124 currency: c.currency.clone(),
5125 })
5126 .collect();
5127
5128 let mut seg_gen = SegmentGenerator::new(seed + 30);
5129
5130 for period in 0..self.config.global.period_months {
5135 let period_end =
5136 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5137 let fiscal_year = period_end.year() as u16;
5138 let fiscal_period = period_end.month() as u8;
5139 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5140
5141 use datasynth_core::models::StatementType;
5142
5143 let cons_is = consolidated_statements.iter().find(|s| {
5145 s.fiscal_year == fiscal_year
5146 && s.fiscal_period == fiscal_period
5147 && s.statement_type == StatementType::IncomeStatement
5148 });
5149 let cons_bs = consolidated_statements.iter().find(|s| {
5150 s.fiscal_year == fiscal_year
5151 && s.fiscal_period == fiscal_period
5152 && s.statement_type == StatementType::BalanceSheet
5153 });
5154
5155 let is_stmt = cons_is.or_else(|| {
5157 financial_statements.iter().find(|s| {
5158 s.fiscal_year == fiscal_year
5159 && s.fiscal_period == fiscal_period
5160 && s.statement_type == StatementType::IncomeStatement
5161 })
5162 });
5163 let bs_stmt = cons_bs.or_else(|| {
5164 financial_statements.iter().find(|s| {
5165 s.fiscal_year == fiscal_year
5166 && s.fiscal_period == fiscal_period
5167 && s.statement_type == StatementType::BalanceSheet
5168 })
5169 });
5170
5171 let consolidated_revenue = is_stmt
5172 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5173 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5175
5176 let consolidated_profit = is_stmt
5177 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5178 .map(|li| li.amount)
5179 .unwrap_or(rust_decimal::Decimal::ZERO);
5180
5181 let consolidated_assets = bs_stmt
5182 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5183 .map(|li| li.amount)
5184 .unwrap_or(rust_decimal::Decimal::ZERO);
5185
5186 if consolidated_revenue == rust_decimal::Decimal::ZERO
5188 && consolidated_assets == rust_decimal::Decimal::ZERO
5189 {
5190 continue;
5191 }
5192
5193 let group_code = self
5194 .config
5195 .companies
5196 .first()
5197 .map(|c| c.code.as_str())
5198 .unwrap_or("GROUP");
5199
5200 let total_depr: rust_decimal::Decimal = journal_entries
5203 .iter()
5204 .filter(|je| je.header.document_type == "CL")
5205 .flat_map(|je| je.lines.iter())
5206 .filter(|l| l.gl_account.starts_with("6000"))
5207 .map(|l| l.debit_amount)
5208 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5209 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5210 Some(total_depr)
5211 } else {
5212 None
5213 };
5214
5215 let (segs, recon) = seg_gen.generate(
5216 group_code,
5217 &period_label,
5218 consolidated_revenue,
5219 consolidated_profit,
5220 consolidated_assets,
5221 &entity_seeds,
5222 depr_param,
5223 );
5224 segment_reports.extend(segs);
5225 segment_reconciliations.push(recon);
5226 }
5227
5228 info!(
5229 "Segment reports generated: {} segments, {} reconciliations",
5230 segment_reports.len(),
5231 segment_reconciliations.len()
5232 );
5233 }
5234
5235 if br_enabled && !document_flows.payments.is_empty() {
5237 let employee_ids: Vec<String> = self
5238 .master_data
5239 .employees
5240 .iter()
5241 .map(|e| e.employee_id.clone())
5242 .collect();
5243 let mut br_gen =
5244 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5245
5246 for company in &self.config.companies {
5248 let company_payments: Vec<PaymentReference> = document_flows
5249 .payments
5250 .iter()
5251 .filter(|p| p.header.company_code == company.code)
5252 .map(|p| PaymentReference {
5253 id: p.header.document_id.clone(),
5254 amount: if p.is_vendor { p.amount } else { -p.amount },
5255 date: p.header.document_date,
5256 reference: p
5257 .check_number
5258 .clone()
5259 .or_else(|| p.wire_reference.clone())
5260 .unwrap_or_else(|| p.header.document_id.clone()),
5261 })
5262 .collect();
5263
5264 if company_payments.is_empty() {
5265 continue;
5266 }
5267
5268 let bank_account_id = format!("{}-MAIN", company.code);
5269
5270 for period in 0..self.config.global.period_months {
5272 let period_start = start_date + chrono::Months::new(period);
5273 let period_end =
5274 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5275
5276 let period_payments: Vec<PaymentReference> = company_payments
5277 .iter()
5278 .filter(|p| p.date >= period_start && p.date <= period_end)
5279 .cloned()
5280 .collect();
5281
5282 let recon = br_gen.generate(
5283 &company.code,
5284 &bank_account_id,
5285 period_start,
5286 period_end,
5287 &company.currency,
5288 &period_payments,
5289 );
5290 bank_reconciliations.push(recon);
5291 }
5292 }
5293 info!(
5294 "Bank reconciliations generated: {} reconciliations",
5295 bank_reconciliations.len()
5296 );
5297 }
5298
5299 stats.bank_reconciliation_count = bank_reconciliations.len();
5300 self.check_resources_with_log("post-financial-reporting")?;
5301
5302 if !trial_balances.is_empty() {
5303 info!(
5304 "Period-close trial balances captured: {} periods",
5305 trial_balances.len()
5306 );
5307 }
5308
5309 let notes_to_financial_statements = Vec::new();
5313
5314 Ok(FinancialReportingSnapshot {
5315 financial_statements,
5316 standalone_statements,
5317 consolidated_statements,
5318 consolidation_schedules,
5319 bank_reconciliations,
5320 trial_balances,
5321 segment_reports,
5322 segment_reconciliations,
5323 notes_to_financial_statements,
5324 })
5325 }
5326
5327 fn generate_notes_to_financial_statements(
5334 &self,
5335 financial_reporting: &mut FinancialReportingSnapshot,
5336 accounting_standards: &AccountingStandardsSnapshot,
5337 tax: &TaxSnapshot,
5338 hr: &HrSnapshot,
5339 audit: &AuditSnapshot,
5340 treasury: &TreasurySnapshot,
5341 ) {
5342 use datasynth_config::schema::AccountingFrameworkConfig;
5343 use datasynth_core::models::StatementType;
5344 use datasynth_generators::period_close::notes_generator::{
5345 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5346 };
5347
5348 let seed = self.seed;
5349 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5350 {
5351 Ok(d) => d,
5352 Err(_) => return,
5353 };
5354
5355 let mut notes_gen = NotesGenerator::new(seed + 4235);
5356
5357 for company in &self.config.companies {
5358 let last_period_end = start_date
5359 + chrono::Months::new(self.config.global.period_months)
5360 - chrono::Days::new(1);
5361 let fiscal_year = last_period_end.year() as u16;
5362
5363 let entity_is = financial_reporting
5365 .standalone_statements
5366 .get(&company.code)
5367 .and_then(|stmts| {
5368 stmts.iter().find(|s| {
5369 s.fiscal_year == fiscal_year
5370 && s.statement_type == StatementType::IncomeStatement
5371 })
5372 });
5373 let entity_bs = financial_reporting
5374 .standalone_statements
5375 .get(&company.code)
5376 .and_then(|stmts| {
5377 stmts.iter().find(|s| {
5378 s.fiscal_year == fiscal_year
5379 && s.statement_type == StatementType::BalanceSheet
5380 })
5381 });
5382
5383 let revenue_amount = entity_is
5385 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5386 .map(|li| li.amount);
5387 let ppe_gross = entity_bs
5388 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5389 .map(|li| li.amount);
5390
5391 let framework = match self
5392 .config
5393 .accounting_standards
5394 .framework
5395 .unwrap_or_default()
5396 {
5397 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5398 "IFRS".to_string()
5399 }
5400 _ => "US GAAP".to_string(),
5401 };
5402
5403 let (entity_dta, entity_dtl) = {
5406 let mut dta = rust_decimal::Decimal::ZERO;
5407 let mut dtl = rust_decimal::Decimal::ZERO;
5408 for rf in &tax.deferred_tax.rollforwards {
5409 if rf.entity_code == company.code {
5410 dta += rf.closing_dta;
5411 dtl += rf.closing_dtl;
5412 }
5413 }
5414 (
5415 if dta > rust_decimal::Decimal::ZERO {
5416 Some(dta)
5417 } else {
5418 None
5419 },
5420 if dtl > rust_decimal::Decimal::ZERO {
5421 Some(dtl)
5422 } else {
5423 None
5424 },
5425 )
5426 };
5427
5428 let entity_provisions: Vec<_> = accounting_standards
5431 .provisions
5432 .iter()
5433 .filter(|p| p.entity_code == company.code)
5434 .collect();
5435 let provision_count = entity_provisions.len();
5436 let total_provisions = if provision_count > 0 {
5437 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5438 } else {
5439 None
5440 };
5441
5442 let entity_pension_plan_count = hr
5444 .pension_plans
5445 .iter()
5446 .filter(|p| p.entity_code == company.code)
5447 .count();
5448 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5449 let sum: rust_decimal::Decimal = hr
5450 .pension_disclosures
5451 .iter()
5452 .filter(|d| {
5453 hr.pension_plans
5454 .iter()
5455 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5456 })
5457 .map(|d| d.net_pension_liability)
5458 .sum();
5459 let plan_assets_sum: rust_decimal::Decimal = hr
5460 .pension_plan_assets
5461 .iter()
5462 .filter(|a| {
5463 hr.pension_plans
5464 .iter()
5465 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5466 })
5467 .map(|a| a.fair_value_closing)
5468 .sum();
5469 if entity_pension_plan_count > 0 {
5470 Some(sum + plan_assets_sum)
5471 } else {
5472 None
5473 }
5474 };
5475 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5476 let sum: rust_decimal::Decimal = hr
5477 .pension_plan_assets
5478 .iter()
5479 .filter(|a| {
5480 hr.pension_plans
5481 .iter()
5482 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5483 })
5484 .map(|a| a.fair_value_closing)
5485 .sum();
5486 if entity_pension_plan_count > 0 {
5487 Some(sum)
5488 } else {
5489 None
5490 }
5491 };
5492
5493 let rp_count = audit.related_party_transactions.len();
5496 let se_count = audit.subsequent_events.len();
5497 let adjusting_count = audit
5498 .subsequent_events
5499 .iter()
5500 .filter(|e| {
5501 matches!(
5502 e.classification,
5503 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5504 )
5505 })
5506 .count();
5507
5508 let ctx = NotesGeneratorContext {
5509 entity_code: company.code.clone(),
5510 framework,
5511 period: format!("FY{}", fiscal_year),
5512 period_end: last_period_end,
5513 currency: company.currency.clone(),
5514 revenue_amount,
5515 total_ppe_gross: ppe_gross,
5516 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5517 deferred_tax_asset: entity_dta,
5519 deferred_tax_liability: entity_dtl,
5520 provision_count,
5522 total_provisions,
5523 pension_plan_count: entity_pension_plan_count,
5525 total_dbo: entity_total_dbo,
5526 total_plan_assets: entity_total_plan_assets,
5527 related_party_transaction_count: rp_count,
5529 subsequent_event_count: se_count,
5530 adjusting_event_count: adjusting_count,
5531 ..NotesGeneratorContext::default()
5532 };
5533
5534 let entity_notes = notes_gen.generate(&ctx);
5535 let standard_note_count = entity_notes.len() as u32;
5536 info!(
5537 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5538 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5539 );
5540 financial_reporting
5541 .notes_to_financial_statements
5542 .extend(entity_notes);
5543
5544 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5546 .debt_instruments
5547 .iter()
5548 .filter(|d| d.entity_id == company.code)
5549 .map(|d| {
5550 (
5551 format!("{:?}", d.instrument_type),
5552 d.principal,
5553 d.maturity_date.to_string(),
5554 )
5555 })
5556 .collect();
5557
5558 let hedge_count = treasury.hedge_relationships.len();
5559 let effective_hedges = treasury
5560 .hedge_relationships
5561 .iter()
5562 .filter(|h| h.is_effective)
5563 .count();
5564 let total_notional: rust_decimal::Decimal = treasury
5565 .hedging_instruments
5566 .iter()
5567 .map(|h| h.notional_amount)
5568 .sum();
5569 let total_fair_value: rust_decimal::Decimal = treasury
5570 .hedging_instruments
5571 .iter()
5572 .map(|h| h.fair_value)
5573 .sum();
5574
5575 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5577 .provisions
5578 .iter()
5579 .filter(|p| p.entity_code == company.code)
5580 .map(|p| p.id.as_str())
5581 .collect();
5582 let provision_movements: Vec<(
5583 String,
5584 rust_decimal::Decimal,
5585 rust_decimal::Decimal,
5586 rust_decimal::Decimal,
5587 )> = accounting_standards
5588 .provision_movements
5589 .iter()
5590 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5591 .map(|m| {
5592 let prov_type = accounting_standards
5593 .provisions
5594 .iter()
5595 .find(|p| p.id == m.provision_id)
5596 .map(|p| format!("{:?}", p.provision_type))
5597 .unwrap_or_else(|| "Unknown".to_string());
5598 (prov_type, m.opening, m.additions, m.closing)
5599 })
5600 .collect();
5601
5602 let enhanced_ctx = EnhancedNotesContext {
5603 entity_code: company.code.clone(),
5604 period: format!("FY{}", fiscal_year),
5605 currency: company.currency.clone(),
5606 finished_goods_value: rust_decimal::Decimal::ZERO,
5608 wip_value: rust_decimal::Decimal::ZERO,
5609 raw_materials_value: rust_decimal::Decimal::ZERO,
5610 debt_instruments,
5611 hedge_count,
5612 effective_hedges,
5613 total_notional,
5614 total_fair_value,
5615 provision_movements,
5616 };
5617
5618 let enhanced_notes =
5619 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5620 if !enhanced_notes.is_empty() {
5621 info!(
5622 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5623 company.code,
5624 enhanced_notes.len(),
5625 enhanced_ctx.debt_instruments.len(),
5626 hedge_count,
5627 enhanced_ctx.provision_movements.len(),
5628 );
5629 financial_reporting
5630 .notes_to_financial_statements
5631 .extend(enhanced_notes);
5632 }
5633 }
5634 }
5635
5636 fn build_trial_balance_from_entries(
5642 journal_entries: &[JournalEntry],
5643 coa: &ChartOfAccounts,
5644 company_code: &str,
5645 fiscal_year: u16,
5646 fiscal_period: u8,
5647 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5648 use rust_decimal::Decimal;
5649
5650 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5652 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5653
5654 for je in journal_entries {
5655 if je.header.company_code != company_code
5657 || je.header.fiscal_year != fiscal_year
5658 || je.header.fiscal_period != fiscal_period
5659 {
5660 continue;
5661 }
5662
5663 for line in &je.lines {
5664 let acct = &line.gl_account;
5665 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5666 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5667 }
5668 }
5669
5670 let mut all_accounts: Vec<&String> = account_debits
5672 .keys()
5673 .chain(account_credits.keys())
5674 .collect::<std::collections::HashSet<_>>()
5675 .into_iter()
5676 .collect();
5677 all_accounts.sort();
5678
5679 let mut entries = Vec::new();
5680
5681 for acct_number in all_accounts {
5682 let debit = account_debits
5683 .get(acct_number)
5684 .copied()
5685 .unwrap_or(Decimal::ZERO);
5686 let credit = account_credits
5687 .get(acct_number)
5688 .copied()
5689 .unwrap_or(Decimal::ZERO);
5690
5691 if debit.is_zero() && credit.is_zero() {
5692 continue;
5693 }
5694
5695 let account_name = coa
5697 .get_account(acct_number)
5698 .map(|gl| gl.short_description.clone())
5699 .unwrap_or_else(|| format!("Account {acct_number}"));
5700
5701 let category = Self::category_from_account_code(acct_number);
5706
5707 entries.push(datasynth_generators::TrialBalanceEntry {
5708 account_code: acct_number.clone(),
5709 account_name,
5710 category,
5711 debit_balance: debit,
5712 credit_balance: credit,
5713 });
5714 }
5715
5716 entries
5717 }
5718
5719 fn build_cumulative_trial_balance(
5726 journal_entries: &[JournalEntry],
5727 coa: &ChartOfAccounts,
5728 company_code: &str,
5729 start_date: NaiveDate,
5730 period_end: NaiveDate,
5731 fiscal_year: u16,
5732 fiscal_period: u8,
5733 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5734 use rust_decimal::Decimal;
5735
5736 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5738 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5739
5740 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5742 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5743
5744 for je in journal_entries {
5745 if je.header.company_code != company_code {
5746 continue;
5747 }
5748
5749 for line in &je.lines {
5750 let acct = &line.gl_account;
5751 let category = Self::category_from_account_code(acct);
5752 let is_bs_account = matches!(
5753 category.as_str(),
5754 "Cash"
5755 | "Receivables"
5756 | "Inventory"
5757 | "FixedAssets"
5758 | "Payables"
5759 | "AccruedLiabilities"
5760 | "LongTermDebt"
5761 | "Equity"
5762 );
5763
5764 if is_bs_account {
5765 if je.header.document_date <= period_end
5767 && je.header.document_date >= start_date
5768 {
5769 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5770 line.debit_amount;
5771 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5772 line.credit_amount;
5773 }
5774 } else {
5775 if je.header.fiscal_year == fiscal_year
5777 && je.header.fiscal_period == fiscal_period
5778 {
5779 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5780 line.debit_amount;
5781 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5782 line.credit_amount;
5783 }
5784 }
5785 }
5786 }
5787
5788 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5790 all_accounts.extend(bs_debits.keys().cloned());
5791 all_accounts.extend(bs_credits.keys().cloned());
5792 all_accounts.extend(is_debits.keys().cloned());
5793 all_accounts.extend(is_credits.keys().cloned());
5794
5795 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5796 sorted_accounts.sort();
5797
5798 let mut entries = Vec::new();
5799
5800 for acct_number in &sorted_accounts {
5801 let category = Self::category_from_account_code(acct_number);
5802 let is_bs_account = matches!(
5803 category.as_str(),
5804 "Cash"
5805 | "Receivables"
5806 | "Inventory"
5807 | "FixedAssets"
5808 | "Payables"
5809 | "AccruedLiabilities"
5810 | "LongTermDebt"
5811 | "Equity"
5812 );
5813
5814 let (debit, credit) = if is_bs_account {
5815 (
5816 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5817 bs_credits
5818 .get(acct_number)
5819 .copied()
5820 .unwrap_or(Decimal::ZERO),
5821 )
5822 } else {
5823 (
5824 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5825 is_credits
5826 .get(acct_number)
5827 .copied()
5828 .unwrap_or(Decimal::ZERO),
5829 )
5830 };
5831
5832 if debit.is_zero() && credit.is_zero() {
5833 continue;
5834 }
5835
5836 let account_name = coa
5837 .get_account(acct_number)
5838 .map(|gl| gl.short_description.clone())
5839 .unwrap_or_else(|| format!("Account {acct_number}"));
5840
5841 entries.push(datasynth_generators::TrialBalanceEntry {
5842 account_code: acct_number.clone(),
5843 account_name,
5844 category,
5845 debit_balance: debit,
5846 credit_balance: credit,
5847 });
5848 }
5849
5850 entries
5851 }
5852
5853 fn build_cash_flow_from_trial_balances(
5858 current_tb: &[datasynth_generators::TrialBalanceEntry],
5859 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5860 net_income: rust_decimal::Decimal,
5861 ) -> Vec<CashFlowItem> {
5862 use rust_decimal::Decimal;
5863
5864 let aggregate =
5866 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5867 let mut map: HashMap<String, Decimal> = HashMap::new();
5868 for entry in tb {
5869 let net = entry.debit_balance - entry.credit_balance;
5870 *map.entry(entry.category.clone()).or_default() += net;
5871 }
5872 map
5873 };
5874
5875 let current = aggregate(current_tb);
5876 let prior = prior_tb.map(aggregate);
5877
5878 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5880 *map.get(key).unwrap_or(&Decimal::ZERO)
5881 };
5882
5883 let change = |key: &str| -> Decimal {
5885 let curr = get(¤t, key);
5886 match &prior {
5887 Some(p) => curr - get(p, key),
5888 None => curr,
5889 }
5890 };
5891
5892 let fixed_asset_change = change("FixedAssets");
5895 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5896 -fixed_asset_change
5897 } else {
5898 Decimal::ZERO
5899 };
5900
5901 let ar_change = change("Receivables");
5903 let inventory_change = change("Inventory");
5904 let ap_change = change("Payables");
5906 let accrued_change = change("AccruedLiabilities");
5907
5908 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5909 + (-ap_change)
5910 + (-accrued_change);
5911
5912 let capex = if fixed_asset_change > Decimal::ZERO {
5914 -fixed_asset_change
5915 } else {
5916 Decimal::ZERO
5917 };
5918 let investing_cf = capex;
5919
5920 let debt_change = -change("LongTermDebt");
5922 let equity_change = -change("Equity");
5923 let financing_cf = debt_change + equity_change;
5924
5925 let net_change = operating_cf + investing_cf + financing_cf;
5926
5927 vec![
5928 CashFlowItem {
5929 item_code: "CF-NI".to_string(),
5930 label: "Net Income".to_string(),
5931 category: CashFlowCategory::Operating,
5932 amount: net_income,
5933 amount_prior: None,
5934 sort_order: 1,
5935 is_total: false,
5936 },
5937 CashFlowItem {
5938 item_code: "CF-DEP".to_string(),
5939 label: "Depreciation & Amortization".to_string(),
5940 category: CashFlowCategory::Operating,
5941 amount: depreciation_addback,
5942 amount_prior: None,
5943 sort_order: 2,
5944 is_total: false,
5945 },
5946 CashFlowItem {
5947 item_code: "CF-AR".to_string(),
5948 label: "Change in Accounts Receivable".to_string(),
5949 category: CashFlowCategory::Operating,
5950 amount: -ar_change,
5951 amount_prior: None,
5952 sort_order: 3,
5953 is_total: false,
5954 },
5955 CashFlowItem {
5956 item_code: "CF-AP".to_string(),
5957 label: "Change in Accounts Payable".to_string(),
5958 category: CashFlowCategory::Operating,
5959 amount: -ap_change,
5960 amount_prior: None,
5961 sort_order: 4,
5962 is_total: false,
5963 },
5964 CashFlowItem {
5965 item_code: "CF-INV".to_string(),
5966 label: "Change in Inventory".to_string(),
5967 category: CashFlowCategory::Operating,
5968 amount: -inventory_change,
5969 amount_prior: None,
5970 sort_order: 5,
5971 is_total: false,
5972 },
5973 CashFlowItem {
5974 item_code: "CF-OP".to_string(),
5975 label: "Net Cash from Operating Activities".to_string(),
5976 category: CashFlowCategory::Operating,
5977 amount: operating_cf,
5978 amount_prior: None,
5979 sort_order: 6,
5980 is_total: true,
5981 },
5982 CashFlowItem {
5983 item_code: "CF-CAPEX".to_string(),
5984 label: "Capital Expenditures".to_string(),
5985 category: CashFlowCategory::Investing,
5986 amount: capex,
5987 amount_prior: None,
5988 sort_order: 7,
5989 is_total: false,
5990 },
5991 CashFlowItem {
5992 item_code: "CF-INV-T".to_string(),
5993 label: "Net Cash from Investing Activities".to_string(),
5994 category: CashFlowCategory::Investing,
5995 amount: investing_cf,
5996 amount_prior: None,
5997 sort_order: 8,
5998 is_total: true,
5999 },
6000 CashFlowItem {
6001 item_code: "CF-DEBT".to_string(),
6002 label: "Net Borrowings / (Repayments)".to_string(),
6003 category: CashFlowCategory::Financing,
6004 amount: debt_change,
6005 amount_prior: None,
6006 sort_order: 9,
6007 is_total: false,
6008 },
6009 CashFlowItem {
6010 item_code: "CF-EQ".to_string(),
6011 label: "Equity Changes".to_string(),
6012 category: CashFlowCategory::Financing,
6013 amount: equity_change,
6014 amount_prior: None,
6015 sort_order: 10,
6016 is_total: false,
6017 },
6018 CashFlowItem {
6019 item_code: "CF-FIN-T".to_string(),
6020 label: "Net Cash from Financing Activities".to_string(),
6021 category: CashFlowCategory::Financing,
6022 amount: financing_cf,
6023 amount_prior: None,
6024 sort_order: 11,
6025 is_total: true,
6026 },
6027 CashFlowItem {
6028 item_code: "CF-NET".to_string(),
6029 label: "Net Change in Cash".to_string(),
6030 category: CashFlowCategory::Operating,
6031 amount: net_change,
6032 amount_prior: None,
6033 sort_order: 12,
6034 is_total: true,
6035 },
6036 ]
6037 }
6038
6039 fn calculate_net_income_from_tb(
6043 tb: &[datasynth_generators::TrialBalanceEntry],
6044 ) -> rust_decimal::Decimal {
6045 use rust_decimal::Decimal;
6046
6047 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6048 for entry in tb {
6049 let net = entry.debit_balance - entry.credit_balance;
6050 *aggregated.entry(entry.category.clone()).or_default() += net;
6051 }
6052
6053 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6054 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6055 let opex = *aggregated
6056 .get("OperatingExpenses")
6057 .unwrap_or(&Decimal::ZERO);
6058 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6059 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6060
6061 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6064 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6066 operating_income - tax
6067 }
6068
6069 fn category_from_account_code(code: &str) -> String {
6076 let prefix: String = code.chars().take(2).collect();
6077 match prefix.as_str() {
6078 "10" => "Cash",
6079 "11" => "Receivables",
6080 "12" | "13" | "14" => "Inventory",
6081 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6082 "20" => "Payables",
6083 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6084 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6085 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6086 "40" | "41" | "42" | "43" | "44" => "Revenue",
6087 "50" | "51" | "52" => "CostOfSales",
6088 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6089 "OperatingExpenses"
6090 }
6091 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6092 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6093 _ => "OperatingExpenses",
6094 }
6095 .to_string()
6096 }
6097
6098 fn phase_hr_data(
6100 &mut self,
6101 stats: &mut EnhancedGenerationStatistics,
6102 ) -> SynthResult<HrSnapshot> {
6103 if !self.phase_config.generate_hr {
6104 debug!("Phase 16: Skipped (HR generation disabled)");
6105 return Ok(HrSnapshot::default());
6106 }
6107
6108 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6109
6110 let seed = self.seed;
6111 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6112 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6113 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6114 let company_code = self
6115 .config
6116 .companies
6117 .first()
6118 .map(|c| c.code.as_str())
6119 .unwrap_or("1000");
6120 let currency = self
6121 .config
6122 .companies
6123 .first()
6124 .map(|c| c.currency.as_str())
6125 .unwrap_or("USD");
6126
6127 let employee_ids: Vec<String> = self
6128 .master_data
6129 .employees
6130 .iter()
6131 .map(|e| e.employee_id.clone())
6132 .collect();
6133
6134 if employee_ids.is_empty() {
6135 debug!("Phase 16: Skipped (no employees available)");
6136 return Ok(HrSnapshot::default());
6137 }
6138
6139 let cost_center_ids: Vec<String> = self
6142 .master_data
6143 .employees
6144 .iter()
6145 .filter_map(|e| e.cost_center.clone())
6146 .collect::<std::collections::HashSet<_>>()
6147 .into_iter()
6148 .collect();
6149
6150 let mut snapshot = HrSnapshot::default();
6151
6152 if self.config.hr.payroll.enabled {
6154 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6155 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6156
6157 let payroll_pack = self.primary_pack();
6159
6160 payroll_gen.set_country_pack(payroll_pack.clone());
6163
6164 let employees_with_salary: Vec<(
6165 String,
6166 rust_decimal::Decimal,
6167 Option<String>,
6168 Option<String>,
6169 )> = self
6170 .master_data
6171 .employees
6172 .iter()
6173 .map(|e| {
6174 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6177 e.base_salary
6178 } else {
6179 rust_decimal::Decimal::from(60_000)
6180 };
6181 (
6182 e.employee_id.clone(),
6183 annual, e.cost_center.clone(),
6185 e.department_id.clone(),
6186 )
6187 })
6188 .collect();
6189
6190 let change_history = &self.master_data.employee_change_history;
6193 let has_changes = !change_history.is_empty();
6194 if has_changes {
6195 debug!(
6196 "Payroll will incorporate {} employee change events",
6197 change_history.len()
6198 );
6199 }
6200
6201 for month in 0..self.config.global.period_months {
6202 let period_start = start_date + chrono::Months::new(month);
6203 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6204 let (run, items) = if has_changes {
6205 payroll_gen.generate_with_changes(
6206 company_code,
6207 &employees_with_salary,
6208 period_start,
6209 period_end,
6210 currency,
6211 change_history,
6212 )
6213 } else {
6214 payroll_gen.generate(
6215 company_code,
6216 &employees_with_salary,
6217 period_start,
6218 period_end,
6219 currency,
6220 )
6221 };
6222 snapshot.payroll_runs.push(run);
6223 snapshot.payroll_run_count += 1;
6224 snapshot.payroll_line_item_count += items.len();
6225 snapshot.payroll_line_items.extend(items);
6226 }
6227 }
6228
6229 if self.config.hr.time_attendance.enabled {
6231 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6232 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6233 let entries = time_gen.generate(
6234 &employee_ids,
6235 start_date,
6236 end_date,
6237 &self.config.hr.time_attendance,
6238 );
6239 snapshot.time_entry_count = entries.len();
6240 snapshot.time_entries = entries;
6241 }
6242
6243 if self.config.hr.expenses.enabled {
6245 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6246 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6247 expense_gen.set_country_pack(self.primary_pack().clone());
6248 let company_currency = self
6249 .config
6250 .companies
6251 .first()
6252 .map(|c| c.currency.as_str())
6253 .unwrap_or("USD");
6254 let reports = expense_gen.generate_with_currency(
6255 &employee_ids,
6256 start_date,
6257 end_date,
6258 &self.config.hr.expenses,
6259 company_currency,
6260 );
6261 snapshot.expense_report_count = reports.len();
6262 snapshot.expense_reports = reports;
6263 }
6264
6265 if self.config.hr.payroll.enabled {
6267 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6268 let employee_pairs: Vec<(String, String)> = self
6269 .master_data
6270 .employees
6271 .iter()
6272 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6273 .collect();
6274 let enrollments =
6275 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6276 snapshot.benefit_enrollment_count = enrollments.len();
6277 snapshot.benefit_enrollments = enrollments;
6278 }
6279
6280 if self.phase_config.generate_hr {
6282 let entity_name = self
6283 .config
6284 .companies
6285 .first()
6286 .map(|c| c.name.as_str())
6287 .unwrap_or("Entity");
6288 let period_months = self.config.global.period_months;
6289 let period_label = {
6290 let y = start_date.year();
6291 let m = start_date.month();
6292 if period_months >= 12 {
6293 format!("FY{y}")
6294 } else {
6295 format!("{y}-{m:02}")
6296 }
6297 };
6298 let reporting_date =
6299 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6300
6301 let avg_salary: Option<rust_decimal::Decimal> = {
6306 let employee_count = employee_ids.len();
6307 if self.config.hr.payroll.enabled
6308 && employee_count > 0
6309 && !snapshot.payroll_runs.is_empty()
6310 {
6311 let total_gross: rust_decimal::Decimal = snapshot
6313 .payroll_runs
6314 .iter()
6315 .filter(|r| r.company_code == company_code)
6316 .map(|r| r.total_gross)
6317 .sum();
6318 if total_gross > rust_decimal::Decimal::ZERO {
6319 let annual_total = if period_months > 0 && period_months < 12 {
6321 total_gross * rust_decimal::Decimal::from(12u32)
6322 / rust_decimal::Decimal::from(period_months)
6323 } else {
6324 total_gross
6325 };
6326 Some(
6327 (annual_total / rust_decimal::Decimal::from(employee_count))
6328 .round_dp(2),
6329 )
6330 } else {
6331 None
6332 }
6333 } else {
6334 None
6335 }
6336 };
6337
6338 let mut pension_gen =
6339 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6340 let pension_snap = pension_gen.generate(
6341 company_code,
6342 entity_name,
6343 &period_label,
6344 reporting_date,
6345 employee_ids.len(),
6346 currency,
6347 avg_salary,
6348 period_months,
6349 );
6350 snapshot.pension_plan_count = pension_snap.plans.len();
6351 snapshot.pension_plans = pension_snap.plans;
6352 snapshot.pension_obligations = pension_snap.obligations;
6353 snapshot.pension_plan_assets = pension_snap.plan_assets;
6354 snapshot.pension_disclosures = pension_snap.disclosures;
6355 snapshot.pension_journal_entries = pension_snap.journal_entries;
6360 }
6361
6362 if self.phase_config.generate_hr && !employee_ids.is_empty() {
6364 let period_months = self.config.global.period_months;
6365 let period_label = {
6366 let y = start_date.year();
6367 let m = start_date.month();
6368 if period_months >= 12 {
6369 format!("FY{y}")
6370 } else {
6371 format!("{y}-{m:02}")
6372 }
6373 };
6374 let reporting_date =
6375 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6376
6377 let mut stock_comp_gen =
6378 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6379 let stock_snap = stock_comp_gen.generate(
6380 company_code,
6381 &employee_ids,
6382 start_date,
6383 &period_label,
6384 reporting_date,
6385 currency,
6386 );
6387 snapshot.stock_grant_count = stock_snap.grants.len();
6388 snapshot.stock_grants = stock_snap.grants;
6389 snapshot.stock_comp_expenses = stock_snap.expenses;
6390 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6391 }
6392
6393 stats.payroll_run_count = snapshot.payroll_run_count;
6394 stats.time_entry_count = snapshot.time_entry_count;
6395 stats.expense_report_count = snapshot.expense_report_count;
6396 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6397 stats.pension_plan_count = snapshot.pension_plan_count;
6398 stats.stock_grant_count = snapshot.stock_grant_count;
6399
6400 info!(
6401 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6402 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6403 snapshot.time_entry_count, snapshot.expense_report_count,
6404 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6405 snapshot.stock_grant_count
6406 );
6407 self.check_resources_with_log("post-hr")?;
6408
6409 Ok(snapshot)
6410 }
6411
6412 fn phase_accounting_standards(
6414 &mut self,
6415 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6416 journal_entries: &[JournalEntry],
6417 stats: &mut EnhancedGenerationStatistics,
6418 ) -> SynthResult<AccountingStandardsSnapshot> {
6419 if !self.phase_config.generate_accounting_standards {
6420 debug!("Phase 17: Skipped (accounting standards generation disabled)");
6421 return Ok(AccountingStandardsSnapshot::default());
6422 }
6423 info!("Phase 17: Generating Accounting Standards Data");
6424
6425 let seed = self.seed;
6426 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6427 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6428 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6429 let company_code = self
6430 .config
6431 .companies
6432 .first()
6433 .map(|c| c.code.as_str())
6434 .unwrap_or("1000");
6435 let currency = self
6436 .config
6437 .companies
6438 .first()
6439 .map(|c| c.currency.as_str())
6440 .unwrap_or("USD");
6441
6442 let framework = match self.config.accounting_standards.framework {
6447 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6448 datasynth_standards::framework::AccountingFramework::UsGaap
6449 }
6450 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6451 datasynth_standards::framework::AccountingFramework::Ifrs
6452 }
6453 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6454 datasynth_standards::framework::AccountingFramework::DualReporting
6455 }
6456 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6457 datasynth_standards::framework::AccountingFramework::FrenchGaap
6458 }
6459 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6460 datasynth_standards::framework::AccountingFramework::GermanGaap
6461 }
6462 None => {
6463 let pack = self.primary_pack();
6465 let pack_fw = pack.accounting.framework.as_str();
6466 match pack_fw {
6467 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6468 "dual_reporting" => {
6469 datasynth_standards::framework::AccountingFramework::DualReporting
6470 }
6471 "french_gaap" => {
6472 datasynth_standards::framework::AccountingFramework::FrenchGaap
6473 }
6474 "german_gaap" | "hgb" => {
6475 datasynth_standards::framework::AccountingFramework::GermanGaap
6476 }
6477 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6479 }
6480 }
6481 };
6482
6483 let mut snapshot = AccountingStandardsSnapshot::default();
6484
6485 if self.config.accounting_standards.revenue_recognition.enabled {
6487 let customer_ids: Vec<String> = self
6488 .master_data
6489 .customers
6490 .iter()
6491 .map(|c| c.customer_id.clone())
6492 .collect();
6493
6494 if !customer_ids.is_empty() {
6495 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6496 let contracts = rev_gen.generate(
6497 company_code,
6498 &customer_ids,
6499 start_date,
6500 end_date,
6501 currency,
6502 &self.config.accounting_standards.revenue_recognition,
6503 framework,
6504 );
6505 snapshot.revenue_contract_count = contracts.len();
6506 snapshot.contracts = contracts;
6507 }
6508 }
6509
6510 if self.config.accounting_standards.impairment.enabled {
6512 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6513 .master_data
6514 .assets
6515 .iter()
6516 .map(|a| {
6517 (
6518 a.asset_id.clone(),
6519 a.description.clone(),
6520 a.acquisition_cost,
6521 )
6522 })
6523 .collect();
6524
6525 if !asset_data.is_empty() {
6526 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6527 let tests = imp_gen.generate(
6528 company_code,
6529 &asset_data,
6530 end_date,
6531 &self.config.accounting_standards.impairment,
6532 framework,
6533 );
6534 snapshot.impairment_test_count = tests.len();
6535 snapshot.impairment_tests = tests;
6536 }
6537 }
6538
6539 if self
6541 .config
6542 .accounting_standards
6543 .business_combinations
6544 .enabled
6545 {
6546 let bc_config = &self.config.accounting_standards.business_combinations;
6547 let framework_str = match framework {
6548 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6549 _ => "US_GAAP",
6550 };
6551 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6552 let bc_snap = bc_gen.generate(
6553 company_code,
6554 currency,
6555 start_date,
6556 end_date,
6557 bc_config.acquisition_count,
6558 framework_str,
6559 );
6560 snapshot.business_combination_count = bc_snap.combinations.len();
6561 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6562 snapshot.business_combinations = bc_snap.combinations;
6563 }
6564
6565 if self
6567 .config
6568 .accounting_standards
6569 .expected_credit_loss
6570 .enabled
6571 {
6572 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6573 let framework_str = match framework {
6574 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6575 _ => "ASC_326",
6576 };
6577
6578 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6581
6582 let mut ecl_gen = EclGenerator::new(seed + 43);
6583
6584 let bucket_exposures: Vec<(
6586 datasynth_core::models::subledger::ar::AgingBucket,
6587 rust_decimal::Decimal,
6588 )> = if ar_aging_reports.is_empty() {
6589 use datasynth_core::models::subledger::ar::AgingBucket;
6591 vec![
6592 (
6593 AgingBucket::Current,
6594 rust_decimal::Decimal::from(500_000_u32),
6595 ),
6596 (
6597 AgingBucket::Days1To30,
6598 rust_decimal::Decimal::from(120_000_u32),
6599 ),
6600 (
6601 AgingBucket::Days31To60,
6602 rust_decimal::Decimal::from(45_000_u32),
6603 ),
6604 (
6605 AgingBucket::Days61To90,
6606 rust_decimal::Decimal::from(15_000_u32),
6607 ),
6608 (
6609 AgingBucket::Over90Days,
6610 rust_decimal::Decimal::from(8_000_u32),
6611 ),
6612 ]
6613 } else {
6614 use datasynth_core::models::subledger::ar::AgingBucket;
6615 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6617 std::collections::HashMap::new();
6618 for report in ar_aging_reports {
6619 for (bucket, amount) in &report.bucket_totals {
6620 *totals.entry(*bucket).or_default() += amount;
6621 }
6622 }
6623 AgingBucket::all()
6624 .into_iter()
6625 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6626 .collect()
6627 };
6628
6629 let ecl_snap = ecl_gen.generate(
6630 company_code,
6631 end_date,
6632 &bucket_exposures,
6633 ecl_config,
6634 &period_label,
6635 framework_str,
6636 );
6637
6638 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6639 snapshot.ecl_models = ecl_snap.ecl_models;
6640 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6641 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6642 }
6643
6644 {
6646 let framework_str = match framework {
6647 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6648 _ => "US_GAAP",
6649 };
6650
6651 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6656 .max(rust_decimal::Decimal::from(100_000_u32));
6657
6658 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6659
6660 let mut prov_gen = ProvisionGenerator::new(seed + 44);
6661 let prov_snap = prov_gen.generate(
6662 company_code,
6663 currency,
6664 revenue_proxy,
6665 end_date,
6666 &period_label,
6667 framework_str,
6668 None, );
6670
6671 snapshot.provision_count = prov_snap.provisions.len();
6672 snapshot.provisions = prov_snap.provisions;
6673 snapshot.provision_movements = prov_snap.movements;
6674 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6675 snapshot.provision_journal_entries = prov_snap.journal_entries;
6676 }
6677
6678 {
6682 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6683
6684 let presentation_currency = self
6685 .config
6686 .global
6687 .presentation_currency
6688 .clone()
6689 .unwrap_or_else(|| self.config.global.group_currency.clone());
6690
6691 let mut rate_table = FxRateTable::new(&presentation_currency);
6694
6695 let base_rates = base_rates_usd();
6699 for (ccy, rate) in &base_rates {
6700 rate_table.add_rate(FxRate::new(
6701 ccy,
6702 "USD",
6703 RateType::Closing,
6704 end_date,
6705 *rate,
6706 "SYNTHETIC",
6707 ));
6708 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6711 rate_table.add_rate(FxRate::new(
6712 ccy,
6713 "USD",
6714 RateType::Average,
6715 end_date,
6716 avg,
6717 "SYNTHETIC",
6718 ));
6719 }
6720
6721 let mut translation_results = Vec::new();
6722 for company in &self.config.companies {
6723 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6726 .max(rust_decimal::Decimal::from(100_000_u32));
6727
6728 let func_ccy = company
6729 .functional_currency
6730 .clone()
6731 .unwrap_or_else(|| company.currency.clone());
6732
6733 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6734 &company.code,
6735 &func_ccy,
6736 &presentation_currency,
6737 &ias21_period_label,
6738 end_date,
6739 company_revenue,
6740 &rate_table,
6741 );
6742 translation_results.push(result);
6743 }
6744
6745 snapshot.currency_translation_count = translation_results.len();
6746 snapshot.currency_translation_results = translation_results;
6747 }
6748
6749 stats.revenue_contract_count = snapshot.revenue_contract_count;
6750 stats.impairment_test_count = snapshot.impairment_test_count;
6751 stats.business_combination_count = snapshot.business_combination_count;
6752 stats.ecl_model_count = snapshot.ecl_model_count;
6753 stats.provision_count = snapshot.provision_count;
6754
6755 info!(
6756 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6757 snapshot.revenue_contract_count,
6758 snapshot.impairment_test_count,
6759 snapshot.business_combination_count,
6760 snapshot.ecl_model_count,
6761 snapshot.provision_count,
6762 snapshot.currency_translation_count
6763 );
6764 self.check_resources_with_log("post-accounting-standards")?;
6765
6766 Ok(snapshot)
6767 }
6768
6769 fn phase_manufacturing(
6771 &mut self,
6772 stats: &mut EnhancedGenerationStatistics,
6773 ) -> SynthResult<ManufacturingSnapshot> {
6774 if !self.phase_config.generate_manufacturing {
6775 debug!("Phase 18: Skipped (manufacturing generation disabled)");
6776 return Ok(ManufacturingSnapshot::default());
6777 }
6778 info!("Phase 18: Generating Manufacturing Data");
6779
6780 let seed = self.seed;
6781 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6782 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6783 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6784 let company_code = self
6785 .config
6786 .companies
6787 .first()
6788 .map(|c| c.code.as_str())
6789 .unwrap_or("1000");
6790
6791 let material_data: Vec<(String, String)> = self
6792 .master_data
6793 .materials
6794 .iter()
6795 .map(|m| (m.material_id.clone(), m.description.clone()))
6796 .collect();
6797
6798 if material_data.is_empty() {
6799 debug!("Phase 18: Skipped (no materials available)");
6800 return Ok(ManufacturingSnapshot::default());
6801 }
6802
6803 let mut snapshot = ManufacturingSnapshot::default();
6804
6805 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6807 let production_orders = prod_gen.generate(
6808 company_code,
6809 &material_data,
6810 start_date,
6811 end_date,
6812 &self.config.manufacturing.production_orders,
6813 &self.config.manufacturing.costing,
6814 &self.config.manufacturing.routing,
6815 );
6816 snapshot.production_order_count = production_orders.len();
6817
6818 let inspection_data: Vec<(String, String, String)> = production_orders
6820 .iter()
6821 .map(|po| {
6822 (
6823 po.order_id.clone(),
6824 po.material_id.clone(),
6825 po.material_description.clone(),
6826 )
6827 })
6828 .collect();
6829
6830 snapshot.production_orders = production_orders;
6831
6832 if !inspection_data.is_empty() {
6833 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6834 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6835 snapshot.quality_inspection_count = inspections.len();
6836 snapshot.quality_inspections = inspections;
6837 }
6838
6839 let storage_locations: Vec<(String, String)> = material_data
6841 .iter()
6842 .enumerate()
6843 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6844 .collect();
6845
6846 let employee_ids: Vec<String> = self
6847 .master_data
6848 .employees
6849 .iter()
6850 .map(|e| e.employee_id.clone())
6851 .collect();
6852 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
6853 .with_employee_pool(employee_ids);
6854 let mut cycle_count_total = 0usize;
6855 for month in 0..self.config.global.period_months {
6856 let count_date = start_date + chrono::Months::new(month);
6857 let items_per_count = storage_locations.len().clamp(10, 50);
6858 let cc = cc_gen.generate(
6859 company_code,
6860 &storage_locations,
6861 count_date,
6862 items_per_count,
6863 );
6864 snapshot.cycle_counts.push(cc);
6865 cycle_count_total += 1;
6866 }
6867 snapshot.cycle_count_count = cycle_count_total;
6868
6869 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
6871 let bom_components = bom_gen.generate(company_code, &material_data);
6872 snapshot.bom_component_count = bom_components.len();
6873 snapshot.bom_components = bom_components;
6874
6875 let currency = self
6877 .config
6878 .companies
6879 .first()
6880 .map(|c| c.currency.as_str())
6881 .unwrap_or("USD");
6882 let production_order_ids: Vec<String> = snapshot
6883 .production_orders
6884 .iter()
6885 .map(|po| po.order_id.clone())
6886 .collect();
6887 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
6888 let inventory_movements = inv_mov_gen.generate_with_production_orders(
6889 company_code,
6890 &material_data,
6891 start_date,
6892 end_date,
6893 2,
6894 currency,
6895 &production_order_ids,
6896 );
6897 snapshot.inventory_movement_count = inventory_movements.len();
6898 snapshot.inventory_movements = inventory_movements;
6899
6900 stats.production_order_count = snapshot.production_order_count;
6901 stats.quality_inspection_count = snapshot.quality_inspection_count;
6902 stats.cycle_count_count = snapshot.cycle_count_count;
6903 stats.bom_component_count = snapshot.bom_component_count;
6904 stats.inventory_movement_count = snapshot.inventory_movement_count;
6905
6906 info!(
6907 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6908 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6909 snapshot.bom_component_count, snapshot.inventory_movement_count
6910 );
6911 self.check_resources_with_log("post-manufacturing")?;
6912
6913 Ok(snapshot)
6914 }
6915
6916 fn phase_sales_kpi_budgets(
6918 &mut self,
6919 coa: &Arc<ChartOfAccounts>,
6920 financial_reporting: &FinancialReportingSnapshot,
6921 stats: &mut EnhancedGenerationStatistics,
6922 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6923 if !self.phase_config.generate_sales_kpi_budgets {
6924 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6925 return Ok(SalesKpiBudgetsSnapshot::default());
6926 }
6927 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6928
6929 let seed = self.seed;
6930 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6931 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6932 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6933 let company_code = self
6934 .config
6935 .companies
6936 .first()
6937 .map(|c| c.code.as_str())
6938 .unwrap_or("1000");
6939
6940 let mut snapshot = SalesKpiBudgetsSnapshot::default();
6941
6942 if self.config.sales_quotes.enabled {
6944 let customer_data: Vec<(String, String)> = self
6945 .master_data
6946 .customers
6947 .iter()
6948 .map(|c| (c.customer_id.clone(), c.name.clone()))
6949 .collect();
6950 let material_data: Vec<(String, String)> = self
6951 .master_data
6952 .materials
6953 .iter()
6954 .map(|m| (m.material_id.clone(), m.description.clone()))
6955 .collect();
6956
6957 if !customer_data.is_empty() && !material_data.is_empty() {
6958 let employee_ids: Vec<String> = self
6959 .master_data
6960 .employees
6961 .iter()
6962 .map(|e| e.employee_id.clone())
6963 .collect();
6964 let customer_ids: Vec<String> = self
6965 .master_data
6966 .customers
6967 .iter()
6968 .map(|c| c.customer_id.clone())
6969 .collect();
6970 let company_currency = self
6971 .config
6972 .companies
6973 .first()
6974 .map(|c| c.currency.as_str())
6975 .unwrap_or("USD");
6976
6977 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6978 .with_pools(employee_ids, customer_ids);
6979 let quotes = quote_gen.generate_with_currency(
6980 company_code,
6981 &customer_data,
6982 &material_data,
6983 start_date,
6984 end_date,
6985 &self.config.sales_quotes,
6986 company_currency,
6987 );
6988 snapshot.sales_quote_count = quotes.len();
6989 snapshot.sales_quotes = quotes;
6990 }
6991 }
6992
6993 if self.config.financial_reporting.management_kpis.enabled {
6995 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6996 let mut kpis = kpi_gen.generate(
6997 company_code,
6998 start_date,
6999 end_date,
7000 &self.config.financial_reporting.management_kpis,
7001 );
7002
7003 {
7005 use rust_decimal::Decimal;
7006
7007 if let Some(income_stmt) =
7008 financial_reporting.financial_statements.iter().find(|fs| {
7009 fs.statement_type == StatementType::IncomeStatement
7010 && fs.company_code == company_code
7011 })
7012 {
7013 let total_revenue: Decimal = income_stmt
7015 .line_items
7016 .iter()
7017 .filter(|li| li.section.contains("Revenue") && !li.is_total)
7018 .map(|li| li.amount)
7019 .sum();
7020 let total_cogs: Decimal = income_stmt
7021 .line_items
7022 .iter()
7023 .filter(|li| {
7024 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7025 && !li.is_total
7026 })
7027 .map(|li| li.amount.abs())
7028 .sum();
7029 let total_opex: Decimal = income_stmt
7030 .line_items
7031 .iter()
7032 .filter(|li| {
7033 li.section.contains("Expense")
7034 && !li.is_total
7035 && !li.section.contains("Cost")
7036 })
7037 .map(|li| li.amount.abs())
7038 .sum();
7039
7040 if total_revenue > Decimal::ZERO {
7041 let hundred = Decimal::from(100);
7042 let gross_margin_pct =
7043 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7044 let operating_income = total_revenue - total_cogs - total_opex;
7045 let op_margin_pct =
7046 (operating_income * hundred / total_revenue).round_dp(2);
7047
7048 for kpi in &mut kpis {
7050 if kpi.name == "Gross Margin" {
7051 kpi.value = gross_margin_pct;
7052 } else if kpi.name == "Operating Margin" {
7053 kpi.value = op_margin_pct;
7054 }
7055 }
7056 }
7057 }
7058
7059 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7061 fs.statement_type == StatementType::BalanceSheet
7062 && fs.company_code == company_code
7063 }) {
7064 let current_assets: Decimal = bs
7065 .line_items
7066 .iter()
7067 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7068 .map(|li| li.amount)
7069 .sum();
7070 let current_liabilities: Decimal = bs
7071 .line_items
7072 .iter()
7073 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7074 .map(|li| li.amount.abs())
7075 .sum();
7076
7077 if current_liabilities > Decimal::ZERO {
7078 let current_ratio = (current_assets / current_liabilities).round_dp(2);
7079 for kpi in &mut kpis {
7080 if kpi.name == "Current Ratio" {
7081 kpi.value = current_ratio;
7082 }
7083 }
7084 }
7085 }
7086 }
7087
7088 snapshot.kpi_count = kpis.len();
7089 snapshot.kpis = kpis;
7090 }
7091
7092 if self.config.financial_reporting.budgets.enabled {
7094 let account_data: Vec<(String, String)> = coa
7095 .accounts
7096 .iter()
7097 .map(|a| (a.account_number.clone(), a.short_description.clone()))
7098 .collect();
7099
7100 if !account_data.is_empty() {
7101 let fiscal_year = start_date.year() as u32;
7102 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7103 let budget = budget_gen.generate(
7104 company_code,
7105 fiscal_year,
7106 &account_data,
7107 &self.config.financial_reporting.budgets,
7108 );
7109 snapshot.budget_line_count = budget.line_items.len();
7110 snapshot.budgets.push(budget);
7111 }
7112 }
7113
7114 stats.sales_quote_count = snapshot.sales_quote_count;
7115 stats.kpi_count = snapshot.kpi_count;
7116 stats.budget_line_count = snapshot.budget_line_count;
7117
7118 info!(
7119 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7120 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7121 );
7122 self.check_resources_with_log("post-sales-kpi-budgets")?;
7123
7124 Ok(snapshot)
7125 }
7126
7127 fn compute_pre_tax_income(
7134 company_code: &str,
7135 journal_entries: &[JournalEntry],
7136 ) -> rust_decimal::Decimal {
7137 use datasynth_core::accounts::AccountCategory;
7138 use rust_decimal::Decimal;
7139
7140 let mut total_revenue = Decimal::ZERO;
7141 let mut total_expenses = Decimal::ZERO;
7142
7143 for je in journal_entries {
7144 if je.header.company_code != company_code {
7145 continue;
7146 }
7147 for line in &je.lines {
7148 let cat = AccountCategory::from_account(&line.gl_account);
7149 match cat {
7150 AccountCategory::Revenue => {
7151 total_revenue += line.credit_amount - line.debit_amount;
7152 }
7153 AccountCategory::Cogs
7154 | AccountCategory::OperatingExpense
7155 | AccountCategory::OtherIncomeExpense => {
7156 total_expenses += line.debit_amount - line.credit_amount;
7157 }
7158 _ => {}
7159 }
7160 }
7161 }
7162
7163 let pti = (total_revenue - total_expenses).round_dp(2);
7164 if pti == rust_decimal::Decimal::ZERO {
7165 rust_decimal::Decimal::from(1_000_000u32)
7168 } else {
7169 pti
7170 }
7171 }
7172
7173 fn phase_tax_generation(
7175 &mut self,
7176 document_flows: &DocumentFlowSnapshot,
7177 journal_entries: &[JournalEntry],
7178 stats: &mut EnhancedGenerationStatistics,
7179 ) -> SynthResult<TaxSnapshot> {
7180 if !self.phase_config.generate_tax {
7181 debug!("Phase 20: Skipped (tax generation disabled)");
7182 return Ok(TaxSnapshot::default());
7183 }
7184 info!("Phase 20: Generating Tax Data");
7185
7186 let seed = self.seed;
7187 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7188 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7189 let fiscal_year = start_date.year();
7190 let company_code = self
7191 .config
7192 .companies
7193 .first()
7194 .map(|c| c.code.as_str())
7195 .unwrap_or("1000");
7196
7197 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7198 seed + 370,
7199 self.config.tax.clone(),
7200 );
7201
7202 let pack = self.primary_pack().clone();
7203 let (jurisdictions, codes) =
7204 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7205
7206 let mut provisions = Vec::new();
7208 if self.config.tax.provisions.enabled {
7209 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7210 for company in &self.config.companies {
7211 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7212 let statutory_rate = rust_decimal::Decimal::new(
7213 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7214 2,
7215 );
7216 let provision = provision_gen.generate(
7217 &company.code,
7218 start_date,
7219 pre_tax_income,
7220 statutory_rate,
7221 );
7222 provisions.push(provision);
7223 }
7224 }
7225
7226 let mut tax_lines = Vec::new();
7228 if !codes.is_empty() {
7229 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7230 datasynth_generators::TaxLineGeneratorConfig::default(),
7231 codes.clone(),
7232 seed + 372,
7233 );
7234
7235 let buyer_country = self
7238 .config
7239 .companies
7240 .first()
7241 .map(|c| c.country.as_str())
7242 .unwrap_or("US");
7243 for vi in &document_flows.vendor_invoices {
7244 let lines = tax_line_gen.generate_for_document(
7245 datasynth_core::models::TaxableDocumentType::VendorInvoice,
7246 &vi.header.document_id,
7247 buyer_country, buyer_country,
7249 vi.payable_amount,
7250 vi.header.document_date,
7251 None,
7252 );
7253 tax_lines.extend(lines);
7254 }
7255
7256 for ci in &document_flows.customer_invoices {
7258 let lines = tax_line_gen.generate_for_document(
7259 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7260 &ci.header.document_id,
7261 buyer_country, buyer_country,
7263 ci.total_gross_amount,
7264 ci.header.document_date,
7265 None,
7266 );
7267 tax_lines.extend(lines);
7268 }
7269 }
7270
7271 let deferred_tax = {
7273 let companies: Vec<(&str, &str)> = self
7274 .config
7275 .companies
7276 .iter()
7277 .map(|c| (c.code.as_str(), c.country.as_str()))
7278 .collect();
7279 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7280 deferred_gen.generate(&companies, start_date, journal_entries)
7281 };
7282
7283 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7286 std::collections::HashMap::new();
7287 for vi in &document_flows.vendor_invoices {
7288 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7289 }
7290 for ci in &document_flows.customer_invoices {
7291 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7292 }
7293
7294 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7296 let tax_posting_journal_entries = if !tax_lines.is_empty() {
7297 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7298 &tax_lines,
7299 company_code,
7300 &doc_dates,
7301 end_date,
7302 );
7303 debug!("Generated {} tax posting JEs", jes.len());
7304 jes
7305 } else {
7306 Vec::new()
7307 };
7308
7309 let snapshot = TaxSnapshot {
7310 jurisdiction_count: jurisdictions.len(),
7311 code_count: codes.len(),
7312 jurisdictions,
7313 codes,
7314 tax_provisions: provisions,
7315 tax_lines,
7316 tax_returns: Vec::new(),
7317 withholding_records: Vec::new(),
7318 tax_anomaly_labels: Vec::new(),
7319 deferred_tax,
7320 tax_posting_journal_entries,
7321 };
7322
7323 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7324 stats.tax_code_count = snapshot.code_count;
7325 stats.tax_provision_count = snapshot.tax_provisions.len();
7326 stats.tax_line_count = snapshot.tax_lines.len();
7327
7328 info!(
7329 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7330 snapshot.jurisdiction_count,
7331 snapshot.code_count,
7332 snapshot.tax_provisions.len(),
7333 snapshot.deferred_tax.temporary_differences.len(),
7334 snapshot.deferred_tax.journal_entries.len(),
7335 snapshot.tax_posting_journal_entries.len(),
7336 );
7337 self.check_resources_with_log("post-tax")?;
7338
7339 Ok(snapshot)
7340 }
7341
7342 fn phase_esg_generation(
7344 &mut self,
7345 document_flows: &DocumentFlowSnapshot,
7346 manufacturing: &ManufacturingSnapshot,
7347 stats: &mut EnhancedGenerationStatistics,
7348 ) -> SynthResult<EsgSnapshot> {
7349 if !self.phase_config.generate_esg {
7350 debug!("Phase 21: Skipped (ESG generation disabled)");
7351 return Ok(EsgSnapshot::default());
7352 }
7353 let degradation = self.check_resources()?;
7354 if degradation >= DegradationLevel::Reduced {
7355 debug!(
7356 "Phase skipped due to resource pressure (degradation: {:?})",
7357 degradation
7358 );
7359 return Ok(EsgSnapshot::default());
7360 }
7361 info!("Phase 21: Generating ESG Data");
7362
7363 let seed = self.seed;
7364 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7365 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7366 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7367 let entity_id = self
7368 .config
7369 .companies
7370 .first()
7371 .map(|c| c.code.as_str())
7372 .unwrap_or("1000");
7373
7374 let esg_cfg = &self.config.esg;
7375 let mut snapshot = EsgSnapshot::default();
7376
7377 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7379 esg_cfg.environmental.energy.clone(),
7380 seed + 80,
7381 );
7382 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7383
7384 let facility_count = esg_cfg.environmental.energy.facility_count;
7386 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7387 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7388
7389 let mut waste_gen = datasynth_generators::WasteGenerator::new(
7391 seed + 82,
7392 esg_cfg.environmental.waste.diversion_target,
7393 facility_count,
7394 );
7395 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7396
7397 let mut emission_gen =
7399 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7400
7401 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7403 .iter()
7404 .map(|e| datasynth_generators::EnergyInput {
7405 facility_id: e.facility_id.clone(),
7406 energy_type: match e.energy_source {
7407 EnergySourceType::NaturalGas => {
7408 datasynth_generators::EnergyInputType::NaturalGas
7409 }
7410 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7411 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7412 _ => datasynth_generators::EnergyInputType::Electricity,
7413 },
7414 consumption_kwh: e.consumption_kwh,
7415 period: e.period,
7416 })
7417 .collect();
7418
7419 if !manufacturing.production_orders.is_empty() {
7421 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7422 &manufacturing.production_orders,
7423 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
7426 if !mfg_energy.is_empty() {
7427 info!(
7428 "ESG: {} energy inputs derived from {} production orders",
7429 mfg_energy.len(),
7430 manufacturing.production_orders.len(),
7431 );
7432 energy_inputs.extend(mfg_energy);
7433 }
7434 }
7435
7436 let mut emissions = Vec::new();
7437 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7438 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7439
7440 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7442 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7443 for payment in &document_flows.payments {
7444 if payment.is_vendor {
7445 *totals
7446 .entry(payment.business_partner_id.clone())
7447 .or_default() += payment.amount;
7448 }
7449 }
7450 totals
7451 };
7452 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7453 .master_data
7454 .vendors
7455 .iter()
7456 .map(|v| {
7457 let spend = vendor_payment_totals
7458 .get(&v.vendor_id)
7459 .copied()
7460 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7461 datasynth_generators::VendorSpendInput {
7462 vendor_id: v.vendor_id.clone(),
7463 category: format!("{:?}", v.vendor_type).to_lowercase(),
7464 spend,
7465 country: v.country.clone(),
7466 }
7467 })
7468 .collect();
7469 if !vendor_spend.is_empty() {
7470 emissions.extend(emission_gen.generate_scope3_purchased_goods(
7471 entity_id,
7472 &vendor_spend,
7473 start_date,
7474 end_date,
7475 ));
7476 }
7477
7478 let headcount = self.master_data.employees.len() as u32;
7480 if headcount > 0 {
7481 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7482 emissions.extend(emission_gen.generate_scope3_business_travel(
7483 entity_id,
7484 travel_spend,
7485 start_date,
7486 ));
7487 emissions
7488 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7489 }
7490
7491 snapshot.emission_count = emissions.len();
7492 snapshot.emissions = emissions;
7493 snapshot.energy = energy_records;
7494
7495 let mut workforce_gen =
7497 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7498 let total_headcount = headcount.max(100);
7499 snapshot.diversity =
7500 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7501 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7502
7503 if !self.master_data.employees.is_empty() {
7505 let hr_diversity = workforce_gen.generate_diversity_from_employees(
7506 entity_id,
7507 &self.master_data.employees,
7508 end_date,
7509 );
7510 if !hr_diversity.is_empty() {
7511 info!(
7512 "ESG: {} diversity metrics derived from {} actual employees",
7513 hr_diversity.len(),
7514 self.master_data.employees.len(),
7515 );
7516 snapshot.diversity.extend(hr_diversity);
7517 }
7518 }
7519
7520 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7521 entity_id,
7522 facility_count,
7523 start_date,
7524 end_date,
7525 );
7526
7527 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
7530 entity_id,
7531 &snapshot.safety_incidents,
7532 total_hours,
7533 start_date,
7534 );
7535 snapshot.safety_metrics = vec![safety_metric];
7536
7537 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7539 seed + 85,
7540 esg_cfg.governance.board_size,
7541 esg_cfg.governance.independence_target,
7542 );
7543 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7544
7545 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7547 esg_cfg.supply_chain_esg.clone(),
7548 seed + 86,
7549 );
7550 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7551 .master_data
7552 .vendors
7553 .iter()
7554 .map(|v| datasynth_generators::VendorInput {
7555 vendor_id: v.vendor_id.clone(),
7556 country: v.country.clone(),
7557 industry: format!("{:?}", v.vendor_type).to_lowercase(),
7558 quality_score: None,
7559 })
7560 .collect();
7561 snapshot.supplier_assessments =
7562 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7563
7564 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7566 seed + 87,
7567 esg_cfg.reporting.clone(),
7568 esg_cfg.climate_scenarios.clone(),
7569 );
7570 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7571 snapshot.disclosures = disclosure_gen.generate_disclosures(
7572 entity_id,
7573 &snapshot.materiality,
7574 start_date,
7575 end_date,
7576 );
7577 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7578 snapshot.disclosure_count = snapshot.disclosures.len();
7579
7580 if esg_cfg.anomaly_rate > 0.0 {
7582 let mut anomaly_injector =
7583 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7584 let mut labels = Vec::new();
7585 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7586 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7587 labels.extend(
7588 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7589 );
7590 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7591 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7592 snapshot.anomaly_labels = labels;
7593 }
7594
7595 stats.esg_emission_count = snapshot.emission_count;
7596 stats.esg_disclosure_count = snapshot.disclosure_count;
7597
7598 info!(
7599 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7600 snapshot.emission_count,
7601 snapshot.disclosure_count,
7602 snapshot.supplier_assessments.len()
7603 );
7604 self.check_resources_with_log("post-esg")?;
7605
7606 Ok(snapshot)
7607 }
7608
7609 fn phase_treasury_data(
7611 &mut self,
7612 document_flows: &DocumentFlowSnapshot,
7613 subledger: &SubledgerSnapshot,
7614 intercompany: &IntercompanySnapshot,
7615 stats: &mut EnhancedGenerationStatistics,
7616 ) -> SynthResult<TreasurySnapshot> {
7617 if !self.phase_config.generate_treasury {
7618 debug!("Phase 22: Skipped (treasury generation disabled)");
7619 return Ok(TreasurySnapshot::default());
7620 }
7621 let degradation = self.check_resources()?;
7622 if degradation >= DegradationLevel::Reduced {
7623 debug!(
7624 "Phase skipped due to resource pressure (degradation: {:?})",
7625 degradation
7626 );
7627 return Ok(TreasurySnapshot::default());
7628 }
7629 info!("Phase 22: Generating Treasury Data");
7630
7631 let seed = self.seed;
7632 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7633 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7634 let currency = self
7635 .config
7636 .companies
7637 .first()
7638 .map(|c| c.currency.as_str())
7639 .unwrap_or("USD");
7640 let entity_id = self
7641 .config
7642 .companies
7643 .first()
7644 .map(|c| c.code.as_str())
7645 .unwrap_or("1000");
7646
7647 let mut snapshot = TreasurySnapshot::default();
7648
7649 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7651 self.config.treasury.debt.clone(),
7652 seed + 90,
7653 );
7654 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7655
7656 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7658 self.config.treasury.hedging.clone(),
7659 seed + 91,
7660 );
7661 for debt in &snapshot.debt_instruments {
7662 if debt.rate_type == InterestRateType::Variable {
7663 let swap = hedge_gen.generate_ir_swap(
7664 currency,
7665 debt.principal,
7666 debt.origination_date,
7667 debt.maturity_date,
7668 );
7669 snapshot.hedging_instruments.push(swap);
7670 }
7671 }
7672
7673 {
7676 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7677 for payment in &document_flows.payments {
7678 if payment.currency != currency {
7679 let entry = fx_map
7680 .entry(payment.currency.clone())
7681 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7682 entry.0 += payment.amount;
7683 if payment.header.document_date > entry.1 {
7685 entry.1 = payment.header.document_date;
7686 }
7687 }
7688 }
7689 if !fx_map.is_empty() {
7690 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7691 .into_iter()
7692 .map(|(foreign_ccy, (net_amount, settlement_date))| {
7693 datasynth_generators::treasury::FxExposure {
7694 currency_pair: format!("{foreign_ccy}/{currency}"),
7695 foreign_currency: foreign_ccy,
7696 net_amount,
7697 settlement_date,
7698 description: "AP payment FX exposure".to_string(),
7699 }
7700 })
7701 .collect();
7702 let (fx_instruments, fx_relationships) =
7703 hedge_gen.generate(start_date, &fx_exposures);
7704 snapshot.hedging_instruments.extend(fx_instruments);
7705 snapshot.hedge_relationships.extend(fx_relationships);
7706 }
7707 }
7708
7709 if self.config.treasury.anomaly_rate > 0.0 {
7711 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7712 seed + 92,
7713 self.config.treasury.anomaly_rate,
7714 );
7715 let mut labels = Vec::new();
7716 labels.extend(
7717 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7718 );
7719 snapshot.treasury_anomaly_labels = labels;
7720 }
7721
7722 if self.config.treasury.cash_positioning.enabled {
7724 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7725
7726 for payment in &document_flows.payments {
7728 cash_flows.push(datasynth_generators::treasury::CashFlow {
7729 date: payment.header.document_date,
7730 account_id: format!("{entity_id}-MAIN"),
7731 amount: payment.amount,
7732 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7733 });
7734 }
7735
7736 for chain in &document_flows.o2c_chains {
7738 if let Some(ref receipt) = chain.customer_receipt {
7739 cash_flows.push(datasynth_generators::treasury::CashFlow {
7740 date: receipt.header.document_date,
7741 account_id: format!("{entity_id}-MAIN"),
7742 amount: receipt.amount,
7743 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7744 });
7745 }
7746 for receipt in &chain.remainder_receipts {
7748 cash_flows.push(datasynth_generators::treasury::CashFlow {
7749 date: receipt.header.document_date,
7750 account_id: format!("{entity_id}-MAIN"),
7751 amount: receipt.amount,
7752 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7753 });
7754 }
7755 }
7756
7757 if !cash_flows.is_empty() {
7758 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7759 self.config.treasury.cash_positioning.clone(),
7760 seed + 93,
7761 );
7762 let account_id = format!("{entity_id}-MAIN");
7763 snapshot.cash_positions = cash_gen.generate(
7764 entity_id,
7765 &account_id,
7766 currency,
7767 &cash_flows,
7768 start_date,
7769 start_date + chrono::Months::new(self.config.global.period_months),
7770 rust_decimal::Decimal::new(1_000_000, 0), );
7772 }
7773 }
7774
7775 if self.config.treasury.cash_forecasting.enabled {
7777 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7778
7779 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7781 .ar_invoices
7782 .iter()
7783 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7784 .map(|inv| {
7785 let days_past_due = if inv.due_date < end_date {
7786 (end_date - inv.due_date).num_days().max(0) as u32
7787 } else {
7788 0
7789 };
7790 datasynth_generators::treasury::ArAgingItem {
7791 expected_date: inv.due_date,
7792 amount: inv.amount_remaining,
7793 days_past_due,
7794 document_id: inv.invoice_number.clone(),
7795 }
7796 })
7797 .collect();
7798
7799 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7801 .ap_invoices
7802 .iter()
7803 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7804 .map(|inv| datasynth_generators::treasury::ApAgingItem {
7805 payment_date: inv.due_date,
7806 amount: inv.amount_remaining,
7807 document_id: inv.invoice_number.clone(),
7808 })
7809 .collect();
7810
7811 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7812 self.config.treasury.cash_forecasting.clone(),
7813 seed + 94,
7814 );
7815 let forecast = forecast_gen.generate(
7816 entity_id,
7817 currency,
7818 end_date,
7819 &ar_items,
7820 &ap_items,
7821 &[], );
7823 snapshot.cash_forecasts.push(forecast);
7824 }
7825
7826 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7828 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7829 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7830 self.config.treasury.cash_pooling.clone(),
7831 seed + 95,
7832 );
7833
7834 let account_ids: Vec<String> = snapshot
7836 .cash_positions
7837 .iter()
7838 .map(|cp| cp.bank_account_id.clone())
7839 .collect::<std::collections::HashSet<_>>()
7840 .into_iter()
7841 .collect();
7842
7843 if let Some(pool) =
7844 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7845 {
7846 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7848 for cp in &snapshot.cash_positions {
7849 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7850 }
7851
7852 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7853 latest_balances
7854 .into_iter()
7855 .filter(|(id, _)| pool.participant_accounts.contains(id))
7856 .map(
7857 |(id, balance)| datasynth_generators::treasury::AccountBalance {
7858 account_id: id,
7859 balance,
7860 },
7861 )
7862 .collect();
7863
7864 let sweeps =
7865 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7866 snapshot.cash_pool_sweeps = sweeps;
7867 snapshot.cash_pools.push(pool);
7868 }
7869 }
7870
7871 if self.config.treasury.bank_guarantees.enabled {
7873 let vendor_names: Vec<String> = self
7874 .master_data
7875 .vendors
7876 .iter()
7877 .map(|v| v.name.clone())
7878 .collect();
7879 if !vendor_names.is_empty() {
7880 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7881 self.config.treasury.bank_guarantees.clone(),
7882 seed + 96,
7883 );
7884 snapshot.bank_guarantees =
7885 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7886 }
7887 }
7888
7889 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7891 let entity_ids: Vec<String> = self
7892 .config
7893 .companies
7894 .iter()
7895 .map(|c| c.code.clone())
7896 .collect();
7897 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7898 .matched_pairs
7899 .iter()
7900 .map(|mp| {
7901 (
7902 mp.seller_company.clone(),
7903 mp.buyer_company.clone(),
7904 mp.amount,
7905 )
7906 })
7907 .collect();
7908 if entity_ids.len() >= 2 {
7909 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7910 self.config.treasury.netting.clone(),
7911 seed + 97,
7912 );
7913 snapshot.netting_runs = netting_gen.generate(
7914 &entity_ids,
7915 currency,
7916 start_date,
7917 self.config.global.period_months,
7918 &ic_amounts,
7919 );
7920 }
7921 }
7922
7923 {
7925 use datasynth_generators::treasury::TreasuryAccounting;
7926
7927 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7928 let mut treasury_jes = Vec::new();
7929
7930 if !snapshot.debt_instruments.is_empty() {
7932 let debt_jes =
7933 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
7934 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
7935 treasury_jes.extend(debt_jes);
7936 }
7937
7938 if !snapshot.hedging_instruments.is_empty() {
7940 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
7941 &snapshot.hedging_instruments,
7942 &snapshot.hedge_relationships,
7943 end_date,
7944 entity_id,
7945 );
7946 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
7947 treasury_jes.extend(hedge_jes);
7948 }
7949
7950 if !snapshot.cash_pool_sweeps.is_empty() {
7952 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
7953 &snapshot.cash_pool_sweeps,
7954 entity_id,
7955 );
7956 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
7957 treasury_jes.extend(sweep_jes);
7958 }
7959
7960 if !treasury_jes.is_empty() {
7961 debug!("Total treasury journal entries: {}", treasury_jes.len());
7962 }
7963 snapshot.journal_entries = treasury_jes;
7964 }
7965
7966 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7967 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7968 stats.cash_position_count = snapshot.cash_positions.len();
7969 stats.cash_forecast_count = snapshot.cash_forecasts.len();
7970 stats.cash_pool_count = snapshot.cash_pools.len();
7971
7972 info!(
7973 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
7974 snapshot.debt_instruments.len(),
7975 snapshot.hedging_instruments.len(),
7976 snapshot.cash_positions.len(),
7977 snapshot.cash_forecasts.len(),
7978 snapshot.cash_pools.len(),
7979 snapshot.bank_guarantees.len(),
7980 snapshot.netting_runs.len(),
7981 snapshot.journal_entries.len(),
7982 );
7983 self.check_resources_with_log("post-treasury")?;
7984
7985 Ok(snapshot)
7986 }
7987
7988 fn phase_project_accounting(
7990 &mut self,
7991 document_flows: &DocumentFlowSnapshot,
7992 hr: &HrSnapshot,
7993 stats: &mut EnhancedGenerationStatistics,
7994 ) -> SynthResult<ProjectAccountingSnapshot> {
7995 if !self.phase_config.generate_project_accounting {
7996 debug!("Phase 23: Skipped (project accounting disabled)");
7997 return Ok(ProjectAccountingSnapshot::default());
7998 }
7999 let degradation = self.check_resources()?;
8000 if degradation >= DegradationLevel::Reduced {
8001 debug!(
8002 "Phase skipped due to resource pressure (degradation: {:?})",
8003 degradation
8004 );
8005 return Ok(ProjectAccountingSnapshot::default());
8006 }
8007 info!("Phase 23: Generating Project Accounting Data");
8008
8009 let seed = self.seed;
8010 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8011 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8012 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8013 let company_code = self
8014 .config
8015 .companies
8016 .first()
8017 .map(|c| c.code.as_str())
8018 .unwrap_or("1000");
8019
8020 let mut snapshot = ProjectAccountingSnapshot::default();
8021
8022 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8024 self.config.project_accounting.clone(),
8025 seed + 95,
8026 );
8027 let pool = project_gen.generate(company_code, start_date, end_date);
8028 snapshot.projects = pool.projects.clone();
8029
8030 {
8032 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8033 Vec::new();
8034
8035 for te in &hr.time_entries {
8037 let total_hours = te.hours_regular + te.hours_overtime;
8038 if total_hours > 0.0 {
8039 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8040 id: te.entry_id.clone(),
8041 entity_id: company_code.to_string(),
8042 date: te.date,
8043 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8044 .unwrap_or(rust_decimal::Decimal::ZERO),
8045 source_type: CostSourceType::TimeEntry,
8046 hours: Some(
8047 rust_decimal::Decimal::from_f64_retain(total_hours)
8048 .unwrap_or(rust_decimal::Decimal::ZERO),
8049 ),
8050 });
8051 }
8052 }
8053
8054 for er in &hr.expense_reports {
8056 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8057 id: er.report_id.clone(),
8058 entity_id: company_code.to_string(),
8059 date: er.submission_date,
8060 amount: er.total_amount,
8061 source_type: CostSourceType::ExpenseReport,
8062 hours: None,
8063 });
8064 }
8065
8066 for po in &document_flows.purchase_orders {
8068 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8069 id: po.header.document_id.clone(),
8070 entity_id: company_code.to_string(),
8071 date: po.header.document_date,
8072 amount: po.total_net_amount,
8073 source_type: CostSourceType::PurchaseOrder,
8074 hours: None,
8075 });
8076 }
8077
8078 for vi in &document_flows.vendor_invoices {
8080 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8081 id: vi.header.document_id.clone(),
8082 entity_id: company_code.to_string(),
8083 date: vi.header.document_date,
8084 amount: vi.payable_amount,
8085 source_type: CostSourceType::VendorInvoice,
8086 hours: None,
8087 });
8088 }
8089
8090 if !source_docs.is_empty() && !pool.projects.is_empty() {
8091 let mut cost_gen =
8092 datasynth_generators::project_accounting::ProjectCostGenerator::new(
8093 self.config.project_accounting.cost_allocation.clone(),
8094 seed + 99,
8095 );
8096 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8097 }
8098 }
8099
8100 if self.config.project_accounting.change_orders.enabled {
8102 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8103 self.config.project_accounting.change_orders.clone(),
8104 seed + 96,
8105 );
8106 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8107 }
8108
8109 if self.config.project_accounting.milestones.enabled {
8111 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8112 self.config.project_accounting.milestones.clone(),
8113 seed + 97,
8114 );
8115 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8116 }
8117
8118 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8120 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8121 self.config.project_accounting.earned_value.clone(),
8122 seed + 98,
8123 );
8124 snapshot.earned_value_metrics =
8125 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8126 }
8127
8128 if self.config.project_accounting.revenue_recognition.enabled
8130 && !snapshot.projects.is_empty()
8131 && !snapshot.cost_lines.is_empty()
8132 {
8133 use datasynth_generators::project_accounting::RevenueGenerator;
8134 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8135 let avg_contract_value =
8136 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8137 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8138
8139 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8142 snapshot
8143 .projects
8144 .iter()
8145 .filter(|p| {
8146 matches!(
8147 p.project_type,
8148 datasynth_core::models::ProjectType::Customer
8149 )
8150 })
8151 .map(|p| {
8152 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8153 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8154 } else {
8156 avg_contract_value
8157 };
8158 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8160 })
8161 .collect();
8162
8163 if !contract_values.is_empty() {
8164 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8165 snapshot.revenue_records = rev_gen.generate(
8166 &snapshot.projects,
8167 &snapshot.cost_lines,
8168 &contract_values,
8169 start_date,
8170 end_date,
8171 );
8172 debug!(
8173 "Generated {} revenue recognition records for {} customer projects",
8174 snapshot.revenue_records.len(),
8175 contract_values.len()
8176 );
8177 }
8178 }
8179
8180 stats.project_count = snapshot.projects.len();
8181 stats.project_change_order_count = snapshot.change_orders.len();
8182 stats.project_cost_line_count = snapshot.cost_lines.len();
8183
8184 info!(
8185 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8186 snapshot.projects.len(),
8187 snapshot.change_orders.len(),
8188 snapshot.milestones.len(),
8189 snapshot.earned_value_metrics.len()
8190 );
8191 self.check_resources_with_log("post-project-accounting")?;
8192
8193 Ok(snapshot)
8194 }
8195
8196 fn phase_evolution_events(
8198 &mut self,
8199 stats: &mut EnhancedGenerationStatistics,
8200 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8201 if !self.phase_config.generate_evolution_events {
8202 debug!("Phase 24: Skipped (evolution events disabled)");
8203 return Ok((Vec::new(), Vec::new()));
8204 }
8205 info!("Phase 24: Generating Process Evolution + Organizational Events");
8206
8207 let seed = self.seed;
8208 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8209 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8210 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8211
8212 let mut proc_gen =
8214 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8215 seed + 100,
8216 );
8217 let process_events = proc_gen.generate_events(start_date, end_date);
8218
8219 let company_codes: Vec<String> = self
8221 .config
8222 .companies
8223 .iter()
8224 .map(|c| c.code.clone())
8225 .collect();
8226 let mut org_gen =
8227 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8228 seed + 101,
8229 );
8230 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8231
8232 stats.process_evolution_event_count = process_events.len();
8233 stats.organizational_event_count = org_events.len();
8234
8235 info!(
8236 "Evolution events generated: {} process evolution, {} organizational",
8237 process_events.len(),
8238 org_events.len()
8239 );
8240 self.check_resources_with_log("post-evolution-events")?;
8241
8242 Ok((process_events, org_events))
8243 }
8244
8245 fn phase_disruption_events(
8248 &self,
8249 stats: &mut EnhancedGenerationStatistics,
8250 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8251 if !self.config.organizational_events.enabled {
8252 debug!("Phase 24b: Skipped (organizational events disabled)");
8253 return Ok(Vec::new());
8254 }
8255 info!("Phase 24b: Generating Disruption Events");
8256
8257 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8258 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8259 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8260
8261 let company_codes: Vec<String> = self
8262 .config
8263 .companies
8264 .iter()
8265 .map(|c| c.code.clone())
8266 .collect();
8267
8268 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8269 let events = gen.generate(start_date, end_date, &company_codes);
8270
8271 stats.disruption_event_count = events.len();
8272 info!("Disruption events generated: {} events", events.len());
8273 self.check_resources_with_log("post-disruption-events")?;
8274
8275 Ok(events)
8276 }
8277
8278 fn phase_counterfactuals(
8285 &self,
8286 journal_entries: &[JournalEntry],
8287 stats: &mut EnhancedGenerationStatistics,
8288 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8289 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8290 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8291 return Ok(Vec::new());
8292 }
8293 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8294
8295 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8296
8297 let mut gen = CounterfactualGenerator::new(self.seed + 110);
8298
8299 let specs = [
8301 CounterfactualSpec::ScaleAmount { factor: 2.5 },
8302 CounterfactualSpec::ShiftDate { days: -14 },
8303 CounterfactualSpec::SelfApprove,
8304 CounterfactualSpec::SplitTransaction { split_count: 3 },
8305 ];
8306
8307 let pairs: Vec<_> = journal_entries
8308 .iter()
8309 .enumerate()
8310 .map(|(i, je)| {
8311 let spec = &specs[i % specs.len()];
8312 gen.generate(je, spec)
8313 })
8314 .collect();
8315
8316 stats.counterfactual_pair_count = pairs.len();
8317 info!(
8318 "Counterfactual pairs generated: {} pairs from {} journal entries",
8319 pairs.len(),
8320 journal_entries.len()
8321 );
8322 self.check_resources_with_log("post-counterfactuals")?;
8323
8324 Ok(pairs)
8325 }
8326
8327 fn phase_red_flags(
8334 &self,
8335 anomaly_labels: &AnomalyLabels,
8336 document_flows: &DocumentFlowSnapshot,
8337 stats: &mut EnhancedGenerationStatistics,
8338 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8339 if !self.config.fraud.enabled {
8340 debug!("Phase 26: Skipped (fraud generation disabled)");
8341 return Ok(Vec::new());
8342 }
8343 info!("Phase 26: Generating Fraud Red-Flag Indicators");
8344
8345 use datasynth_generators::fraud::RedFlagGenerator;
8346
8347 let generator = RedFlagGenerator::new();
8348 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8349
8350 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8352 .labels
8353 .iter()
8354 .filter(|label| label.anomaly_type.is_intentional())
8355 .map(|label| label.document_id.as_str())
8356 .collect();
8357
8358 let mut flags = Vec::new();
8359
8360 for chain in &document_flows.p2p_chains {
8362 let doc_id = &chain.purchase_order.header.document_id;
8363 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8364 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8365 }
8366
8367 for chain in &document_flows.o2c_chains {
8369 let doc_id = &chain.sales_order.header.document_id;
8370 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8371 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8372 }
8373
8374 stats.red_flag_count = flags.len();
8375 info!(
8376 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8377 flags.len(),
8378 document_flows.p2p_chains.len(),
8379 document_flows.o2c_chains.len(),
8380 fraud_doc_ids.len()
8381 );
8382 self.check_resources_with_log("post-red-flags")?;
8383
8384 Ok(flags)
8385 }
8386
8387 fn phase_collusion_rings(
8393 &mut self,
8394 stats: &mut EnhancedGenerationStatistics,
8395 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8396 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8397 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8398 return Ok(Vec::new());
8399 }
8400 info!("Phase 26b: Generating Collusion Rings");
8401
8402 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8403 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8404 let months = self.config.global.period_months;
8405
8406 let employee_ids: Vec<String> = self
8407 .master_data
8408 .employees
8409 .iter()
8410 .map(|e| e.employee_id.clone())
8411 .collect();
8412 let vendor_ids: Vec<String> = self
8413 .master_data
8414 .vendors
8415 .iter()
8416 .map(|v| v.vendor_id.clone())
8417 .collect();
8418
8419 let mut generator =
8420 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8421 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8422
8423 stats.collusion_ring_count = rings.len();
8424 info!(
8425 "Collusion rings generated: {} rings, total members: {}",
8426 rings.len(),
8427 rings
8428 .iter()
8429 .map(datasynth_generators::fraud::CollusionRing::size)
8430 .sum::<usize>()
8431 );
8432 self.check_resources_with_log("post-collusion-rings")?;
8433
8434 Ok(rings)
8435 }
8436
8437 fn phase_temporal_attributes(
8442 &mut self,
8443 stats: &mut EnhancedGenerationStatistics,
8444 ) -> SynthResult<
8445 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8446 > {
8447 if !self.config.temporal_attributes.enabled {
8448 debug!("Phase 27: Skipped (temporal attributes disabled)");
8449 return Ok(Vec::new());
8450 }
8451 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8452
8453 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8454 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8455
8456 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8460 || self.config.temporal_attributes.enabled;
8461 let temporal_config = {
8462 let ta = &self.config.temporal_attributes;
8463 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8464 .enabled(ta.enabled)
8465 .closed_probability(ta.valid_time.closed_probability)
8466 .avg_validity_days(ta.valid_time.avg_validity_days)
8467 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8468 .with_version_chains(if generate_version_chains {
8469 ta.avg_versions_per_entity
8470 } else {
8471 1.0
8472 })
8473 .build()
8474 };
8475 let temporal_config = if self
8477 .config
8478 .temporal_attributes
8479 .transaction_time
8480 .allow_backdating
8481 {
8482 let mut c = temporal_config;
8483 c.transaction_time.allow_backdating = true;
8484 c.transaction_time.backdating_probability = self
8485 .config
8486 .temporal_attributes
8487 .transaction_time
8488 .backdating_probability;
8489 c.transaction_time.max_backdate_days = self
8490 .config
8491 .temporal_attributes
8492 .transaction_time
8493 .max_backdate_days;
8494 c
8495 } else {
8496 temporal_config
8497 };
8498 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8499 temporal_config,
8500 self.seed + 130,
8501 start_date,
8502 );
8503
8504 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8505 self.seed + 130,
8506 datasynth_core::GeneratorType::Vendor,
8507 );
8508
8509 let chains: Vec<_> = self
8510 .master_data
8511 .vendors
8512 .iter()
8513 .map(|vendor| {
8514 let id = uuid_factory.next();
8515 gen.generate_version_chain(vendor.clone(), id)
8516 })
8517 .collect();
8518
8519 stats.temporal_version_chain_count = chains.len();
8520 info!("Temporal version chains generated: {} chains", chains.len());
8521 self.check_resources_with_log("post-temporal-attributes")?;
8522
8523 Ok(chains)
8524 }
8525
8526 fn phase_entity_relationships(
8536 &self,
8537 journal_entries: &[JournalEntry],
8538 document_flows: &DocumentFlowSnapshot,
8539 stats: &mut EnhancedGenerationStatistics,
8540 ) -> SynthResult<(
8541 Option<datasynth_core::models::EntityGraph>,
8542 Vec<datasynth_core::models::CrossProcessLink>,
8543 )> {
8544 use datasynth_generators::relationships::{
8545 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8546 TransactionSummary,
8547 };
8548
8549 let rs_enabled = self.config.relationship_strength.enabled;
8550 let cpl_enabled = self.config.cross_process_links.enabled
8551 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8552
8553 if !rs_enabled && !cpl_enabled {
8554 debug!(
8555 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8556 );
8557 return Ok((None, Vec::new()));
8558 }
8559
8560 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8561
8562 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8563 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8564
8565 let company_code = self
8566 .config
8567 .companies
8568 .first()
8569 .map(|c| c.code.as_str())
8570 .unwrap_or("1000");
8571
8572 let gen_config = EntityGraphConfig {
8574 enabled: rs_enabled,
8575 cross_process: datasynth_generators::relationships::CrossProcessConfig {
8576 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8577 enable_return_flows: false,
8578 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8579 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8580 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8582 1.0
8583 } else {
8584 0.30
8585 },
8586 ..Default::default()
8587 },
8588 strength_config: datasynth_generators::relationships::StrengthConfig {
8589 transaction_volume_weight: self
8590 .config
8591 .relationship_strength
8592 .calculation
8593 .transaction_volume_weight,
8594 transaction_count_weight: self
8595 .config
8596 .relationship_strength
8597 .calculation
8598 .transaction_count_weight,
8599 duration_weight: self
8600 .config
8601 .relationship_strength
8602 .calculation
8603 .relationship_duration_weight,
8604 recency_weight: self.config.relationship_strength.calculation.recency_weight,
8605 mutual_connections_weight: self
8606 .config
8607 .relationship_strength
8608 .calculation
8609 .mutual_connections_weight,
8610 recency_half_life_days: self
8611 .config
8612 .relationship_strength
8613 .calculation
8614 .recency_half_life_days,
8615 },
8616 ..Default::default()
8617 };
8618
8619 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8620
8621 let entity_graph = if rs_enabled {
8623 let vendor_summaries: Vec<EntitySummary> = self
8625 .master_data
8626 .vendors
8627 .iter()
8628 .map(|v| {
8629 EntitySummary::new(
8630 &v.vendor_id,
8631 &v.name,
8632 datasynth_core::models::GraphEntityType::Vendor,
8633 start_date,
8634 )
8635 })
8636 .collect();
8637
8638 let customer_summaries: Vec<EntitySummary> = self
8639 .master_data
8640 .customers
8641 .iter()
8642 .map(|c| {
8643 EntitySummary::new(
8644 &c.customer_id,
8645 &c.name,
8646 datasynth_core::models::GraphEntityType::Customer,
8647 start_date,
8648 )
8649 })
8650 .collect();
8651
8652 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8657 std::collections::HashMap::new();
8658
8659 for je in journal_entries {
8660 let cc = je.header.company_code.clone();
8661 let posting_date = je.header.posting_date;
8662 for line in &je.lines {
8663 if let Some(ref tp) = line.trading_partner {
8664 let amount = if line.debit_amount > line.credit_amount {
8665 line.debit_amount
8666 } else {
8667 line.credit_amount
8668 };
8669 let entry = txn_summaries
8670 .entry((cc.clone(), tp.clone()))
8671 .or_insert_with(|| TransactionSummary {
8672 total_volume: rust_decimal::Decimal::ZERO,
8673 transaction_count: 0,
8674 first_transaction_date: posting_date,
8675 last_transaction_date: posting_date,
8676 related_entities: std::collections::HashSet::new(),
8677 });
8678 entry.total_volume += amount;
8679 entry.transaction_count += 1;
8680 if posting_date < entry.first_transaction_date {
8681 entry.first_transaction_date = posting_date;
8682 }
8683 if posting_date > entry.last_transaction_date {
8684 entry.last_transaction_date = posting_date;
8685 }
8686 entry.related_entities.insert(cc.clone());
8687 }
8688 }
8689 }
8690
8691 for chain in &document_flows.p2p_chains {
8694 let cc = chain.purchase_order.header.company_code.clone();
8695 let vendor_id = chain.purchase_order.vendor_id.clone();
8696 let po_date = chain.purchase_order.header.document_date;
8697 let amount = chain.purchase_order.total_net_amount;
8698
8699 let entry = txn_summaries
8700 .entry((cc.clone(), vendor_id))
8701 .or_insert_with(|| TransactionSummary {
8702 total_volume: rust_decimal::Decimal::ZERO,
8703 transaction_count: 0,
8704 first_transaction_date: po_date,
8705 last_transaction_date: po_date,
8706 related_entities: std::collections::HashSet::new(),
8707 });
8708 entry.total_volume += amount;
8709 entry.transaction_count += 1;
8710 if po_date < entry.first_transaction_date {
8711 entry.first_transaction_date = po_date;
8712 }
8713 if po_date > entry.last_transaction_date {
8714 entry.last_transaction_date = po_date;
8715 }
8716 entry.related_entities.insert(cc);
8717 }
8718
8719 for chain in &document_flows.o2c_chains {
8721 let cc = chain.sales_order.header.company_code.clone();
8722 let customer_id = chain.sales_order.customer_id.clone();
8723 let so_date = chain.sales_order.header.document_date;
8724 let amount = chain.sales_order.total_net_amount;
8725
8726 let entry = txn_summaries
8727 .entry((cc.clone(), customer_id))
8728 .or_insert_with(|| TransactionSummary {
8729 total_volume: rust_decimal::Decimal::ZERO,
8730 transaction_count: 0,
8731 first_transaction_date: so_date,
8732 last_transaction_date: so_date,
8733 related_entities: std::collections::HashSet::new(),
8734 });
8735 entry.total_volume += amount;
8736 entry.transaction_count += 1;
8737 if so_date < entry.first_transaction_date {
8738 entry.first_transaction_date = so_date;
8739 }
8740 if so_date > entry.last_transaction_date {
8741 entry.last_transaction_date = so_date;
8742 }
8743 entry.related_entities.insert(cc);
8744 }
8745
8746 let as_of_date = journal_entries
8747 .last()
8748 .map(|je| je.header.posting_date)
8749 .unwrap_or(start_date);
8750
8751 let graph = gen.generate_entity_graph(
8752 company_code,
8753 as_of_date,
8754 &vendor_summaries,
8755 &customer_summaries,
8756 &txn_summaries,
8757 );
8758
8759 info!(
8760 "Entity relationship graph: {} nodes, {} edges",
8761 graph.nodes.len(),
8762 graph.edges.len()
8763 );
8764 stats.entity_relationship_node_count = graph.nodes.len();
8765 stats.entity_relationship_edge_count = graph.edges.len();
8766 Some(graph)
8767 } else {
8768 None
8769 };
8770
8771 let cross_process_links = if cpl_enabled {
8773 let gr_refs: Vec<GoodsReceiptRef> = document_flows
8775 .p2p_chains
8776 .iter()
8777 .flat_map(|chain| {
8778 let vendor_id = chain.purchase_order.vendor_id.clone();
8779 let cc = chain.purchase_order.header.company_code.clone();
8780 chain.goods_receipts.iter().flat_map(move |gr| {
8781 gr.items.iter().filter_map({
8782 let doc_id = gr.header.document_id.clone();
8783 let v_id = vendor_id.clone();
8784 let company = cc.clone();
8785 let receipt_date = gr.header.document_date;
8786 move |item| {
8787 item.base
8788 .material_id
8789 .as_ref()
8790 .map(|mat_id| GoodsReceiptRef {
8791 document_id: doc_id.clone(),
8792 material_id: mat_id.clone(),
8793 quantity: item.base.quantity,
8794 receipt_date,
8795 vendor_id: v_id.clone(),
8796 company_code: company.clone(),
8797 })
8798 }
8799 })
8800 })
8801 })
8802 .collect();
8803
8804 let del_refs: Vec<DeliveryRef> = document_flows
8806 .o2c_chains
8807 .iter()
8808 .flat_map(|chain| {
8809 let customer_id = chain.sales_order.customer_id.clone();
8810 let cc = chain.sales_order.header.company_code.clone();
8811 chain.deliveries.iter().flat_map(move |del| {
8812 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8813 del.items.iter().filter_map({
8814 let doc_id = del.header.document_id.clone();
8815 let c_id = customer_id.clone();
8816 let company = cc.clone();
8817 move |item| {
8818 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8819 document_id: doc_id.clone(),
8820 material_id: mat_id.clone(),
8821 quantity: item.base.quantity,
8822 delivery_date,
8823 customer_id: c_id.clone(),
8824 company_code: company.clone(),
8825 })
8826 }
8827 })
8828 })
8829 })
8830 .collect();
8831
8832 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8833 info!("Cross-process links generated: {} links", links.len());
8834 stats.cross_process_link_count = links.len();
8835 links
8836 } else {
8837 Vec::new()
8838 };
8839
8840 self.check_resources_with_log("post-entity-relationships")?;
8841 Ok((entity_graph, cross_process_links))
8842 }
8843
8844 fn phase_industry_data(
8846 &self,
8847 stats: &mut EnhancedGenerationStatistics,
8848 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8849 if !self.config.industry_specific.enabled {
8850 return None;
8851 }
8852 info!("Phase 29: Generating industry-specific data");
8853 let output = datasynth_generators::industry::factory::generate_industry_output(
8854 self.config.global.industry,
8855 );
8856 stats.industry_gl_account_count = output.gl_accounts.len();
8857 info!(
8858 "Industry data generated: {} GL accounts for {:?}",
8859 output.gl_accounts.len(),
8860 self.config.global.industry
8861 );
8862 Some(output)
8863 }
8864
8865 fn phase_opening_balances(
8867 &mut self,
8868 coa: &Arc<ChartOfAccounts>,
8869 stats: &mut EnhancedGenerationStatistics,
8870 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8871 if !self.config.balance.generate_opening_balances {
8872 debug!("Phase 3b: Skipped (opening balance generation disabled)");
8873 return Ok(Vec::new());
8874 }
8875 info!("Phase 3b: Generating Opening Balances");
8876
8877 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8878 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8879 let fiscal_year = start_date.year();
8880
8881 let industry = match self.config.global.industry {
8882 IndustrySector::Manufacturing => IndustryType::Manufacturing,
8883 IndustrySector::Retail => IndustryType::Retail,
8884 IndustrySector::FinancialServices => IndustryType::Financial,
8885 IndustrySector::Healthcare => IndustryType::Healthcare,
8886 IndustrySector::Technology => IndustryType::Technology,
8887 _ => IndustryType::Manufacturing,
8888 };
8889
8890 let config = datasynth_generators::OpeningBalanceConfig {
8891 industry,
8892 ..Default::default()
8893 };
8894 let mut gen =
8895 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8896
8897 let mut results = Vec::new();
8898 for company in &self.config.companies {
8899 let spec = OpeningBalanceSpec::new(
8900 company.code.clone(),
8901 start_date,
8902 fiscal_year,
8903 company.currency.clone(),
8904 rust_decimal::Decimal::new(10_000_000, 0),
8905 industry,
8906 );
8907 let ob = gen.generate(&spec, coa, start_date, &company.code);
8908 results.push(ob);
8909 }
8910
8911 stats.opening_balance_count = results.len();
8912 info!("Opening balances generated: {} companies", results.len());
8913 self.check_resources_with_log("post-opening-balances")?;
8914
8915 Ok(results)
8916 }
8917
8918 fn phase_subledger_reconciliation(
8920 &mut self,
8921 subledger: &SubledgerSnapshot,
8922 entries: &[JournalEntry],
8923 stats: &mut EnhancedGenerationStatistics,
8924 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8925 if !self.config.balance.reconcile_subledgers {
8926 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8927 return Ok(Vec::new());
8928 }
8929 info!("Phase 9b: Reconciling GL to subledger balances");
8930
8931 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8932 .map(|d| d + chrono::Months::new(self.config.global.period_months))
8933 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8934
8935 let tracker_config = BalanceTrackerConfig {
8937 validate_on_each_entry: false,
8938 track_history: false,
8939 fail_on_validation_error: false,
8940 ..Default::default()
8941 };
8942 let recon_currency = self
8943 .config
8944 .companies
8945 .first()
8946 .map(|c| c.currency.clone())
8947 .unwrap_or_else(|| "USD".to_string());
8948 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8949 let validation_errors = tracker.apply_entries(entries);
8950 if !validation_errors.is_empty() {
8951 warn!(
8952 error_count = validation_errors.len(),
8953 "Balance tracker encountered validation errors during subledger reconciliation"
8954 );
8955 for err in &validation_errors {
8956 debug!("Balance validation error: {:?}", err);
8957 }
8958 }
8959
8960 let mut engine = datasynth_generators::ReconciliationEngine::new(
8961 datasynth_generators::ReconciliationConfig::default(),
8962 );
8963
8964 let mut results = Vec::new();
8965 let company_code = self
8966 .config
8967 .companies
8968 .first()
8969 .map(|c| c.code.as_str())
8970 .unwrap_or("1000");
8971
8972 if !subledger.ar_invoices.is_empty() {
8974 let gl_balance = tracker
8975 .get_account_balance(
8976 company_code,
8977 datasynth_core::accounts::control_accounts::AR_CONTROL,
8978 )
8979 .map(|b| b.closing_balance)
8980 .unwrap_or_default();
8981 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8982 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8983 }
8984
8985 if !subledger.ap_invoices.is_empty() {
8987 let gl_balance = tracker
8988 .get_account_balance(
8989 company_code,
8990 datasynth_core::accounts::control_accounts::AP_CONTROL,
8991 )
8992 .map(|b| b.closing_balance)
8993 .unwrap_or_default();
8994 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8995 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8996 }
8997
8998 if !subledger.fa_records.is_empty() {
9000 let gl_asset_balance = tracker
9001 .get_account_balance(
9002 company_code,
9003 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9004 )
9005 .map(|b| b.closing_balance)
9006 .unwrap_or_default();
9007 let gl_accum_depr_balance = tracker
9008 .get_account_balance(
9009 company_code,
9010 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9011 )
9012 .map(|b| b.closing_balance)
9013 .unwrap_or_default();
9014 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9015 subledger.fa_records.iter().collect();
9016 let (asset_recon, depr_recon) = engine.reconcile_fa(
9017 company_code,
9018 end_date,
9019 gl_asset_balance,
9020 gl_accum_depr_balance,
9021 &fa_refs,
9022 );
9023 results.push(asset_recon);
9024 results.push(depr_recon);
9025 }
9026
9027 if !subledger.inventory_positions.is_empty() {
9029 let gl_balance = tracker
9030 .get_account_balance(
9031 company_code,
9032 datasynth_core::accounts::control_accounts::INVENTORY,
9033 )
9034 .map(|b| b.closing_balance)
9035 .unwrap_or_default();
9036 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9037 subledger.inventory_positions.iter().collect();
9038 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9039 }
9040
9041 stats.subledger_reconciliation_count = results.len();
9042 let passed = results.iter().filter(|r| r.is_balanced()).count();
9043 let failed = results.len() - passed;
9044 info!(
9045 "Subledger reconciliation: {} checks, {} passed, {} failed",
9046 results.len(),
9047 passed,
9048 failed
9049 );
9050 self.check_resources_with_log("post-subledger-reconciliation")?;
9051
9052 Ok(results)
9053 }
9054
9055 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9057 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9058
9059 let coa_framework = self.resolve_coa_framework();
9060
9061 let mut gen = ChartOfAccountsGenerator::new(
9062 self.config.chart_of_accounts.complexity,
9063 self.config.global.industry,
9064 self.seed,
9065 )
9066 .with_coa_framework(coa_framework);
9067
9068 let coa = Arc::new(gen.generate());
9069 self.coa = Some(Arc::clone(&coa));
9070
9071 if let Some(pb) = pb {
9072 pb.finish_with_message("Chart of Accounts complete");
9073 }
9074
9075 Ok(coa)
9076 }
9077
9078 fn generate_master_data(&mut self) -> SynthResult<()> {
9080 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9081 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9082 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9083
9084 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
9086
9087 let pack = self.primary_pack().clone();
9089
9090 let vendors_per_company = self.phase_config.vendors_per_company;
9092 let customers_per_company = self.phase_config.customers_per_company;
9093 let materials_per_company = self.phase_config.materials_per_company;
9094 let assets_per_company = self.phase_config.assets_per_company;
9095 let coa_framework = self.resolve_coa_framework();
9096
9097 let per_company_results: Vec<_> = self
9100 .config
9101 .companies
9102 .par_iter()
9103 .enumerate()
9104 .map(|(i, company)| {
9105 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9106 let pack = pack.clone();
9107
9108 let mut vendor_gen = VendorGenerator::new(company_seed);
9110 vendor_gen.set_country_pack(pack.clone());
9111 vendor_gen.set_coa_framework(coa_framework);
9112 vendor_gen.set_counter_offset(i * vendors_per_company);
9113 if self.config.vendor_network.enabled {
9115 let vn = &self.config.vendor_network;
9116 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9117 enabled: true,
9118 depth: vn.depth,
9119 tier1_count: datasynth_generators::TierCountConfig::new(
9120 vn.tier1.min,
9121 vn.tier1.max,
9122 ),
9123 tier2_per_parent: datasynth_generators::TierCountConfig::new(
9124 vn.tier2_per_parent.min,
9125 vn.tier2_per_parent.max,
9126 ),
9127 tier3_per_parent: datasynth_generators::TierCountConfig::new(
9128 vn.tier3_per_parent.min,
9129 vn.tier3_per_parent.max,
9130 ),
9131 cluster_distribution: datasynth_generators::ClusterDistribution {
9132 reliable_strategic: vn.clusters.reliable_strategic,
9133 standard_operational: vn.clusters.standard_operational,
9134 transactional: vn.clusters.transactional,
9135 problematic: vn.clusters.problematic,
9136 },
9137 concentration_limits: datasynth_generators::ConcentrationLimits {
9138 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9139 max_top5: vn.dependencies.top_5_concentration,
9140 },
9141 ..datasynth_generators::VendorNetworkConfig::default()
9142 });
9143 }
9144 let vendor_pool =
9145 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9146
9147 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9149 customer_gen.set_country_pack(pack.clone());
9150 customer_gen.set_coa_framework(coa_framework);
9151 customer_gen.set_counter_offset(i * customers_per_company);
9152 if self.config.customer_segmentation.enabled {
9154 let cs = &self.config.customer_segmentation;
9155 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9156 enabled: true,
9157 segment_distribution: datasynth_generators::SegmentDistribution {
9158 enterprise: cs.value_segments.enterprise.customer_share,
9159 mid_market: cs.value_segments.mid_market.customer_share,
9160 smb: cs.value_segments.smb.customer_share,
9161 consumer: cs.value_segments.consumer.customer_share,
9162 },
9163 referral_config: datasynth_generators::ReferralConfig {
9164 enabled: cs.networks.referrals.enabled,
9165 referral_rate: cs.networks.referrals.referral_rate,
9166 ..Default::default()
9167 },
9168 hierarchy_config: datasynth_generators::HierarchyConfig {
9169 enabled: cs.networks.corporate_hierarchies.enabled,
9170 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9171 ..Default::default()
9172 },
9173 ..Default::default()
9174 };
9175 customer_gen.set_segmentation_config(seg_cfg);
9176 }
9177 let customer_pool = customer_gen.generate_customer_pool(
9178 customers_per_company,
9179 &company.code,
9180 start_date,
9181 );
9182
9183 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9185 material_gen.set_country_pack(pack.clone());
9186 material_gen.set_counter_offset(i * materials_per_company);
9187 let material_pool = material_gen.generate_material_pool(
9188 materials_per_company,
9189 &company.code,
9190 start_date,
9191 );
9192
9193 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9195 let asset_pool = asset_gen.generate_asset_pool(
9196 assets_per_company,
9197 &company.code,
9198 (start_date, end_date),
9199 );
9200
9201 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9203 employee_gen.set_country_pack(pack);
9204 let employee_pool =
9205 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9206
9207 let employee_change_history =
9209 employee_gen.generate_all_change_history(&employee_pool, end_date);
9210
9211 let employee_ids: Vec<String> = employee_pool
9213 .employees
9214 .iter()
9215 .map(|e| e.employee_id.clone())
9216 .collect();
9217 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9218 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9219
9220 (
9221 vendor_pool.vendors,
9222 customer_pool.customers,
9223 material_pool.materials,
9224 asset_pool.assets,
9225 employee_pool.employees,
9226 employee_change_history,
9227 cost_centers,
9228 )
9229 })
9230 .collect();
9231
9232 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9234 per_company_results
9235 {
9236 self.master_data.vendors.extend(vendors);
9237 self.master_data.customers.extend(customers);
9238 self.master_data.materials.extend(materials);
9239 self.master_data.assets.extend(assets);
9240 self.master_data.employees.extend(employees);
9241 self.master_data.cost_centers.extend(cost_centers);
9242 self.master_data
9243 .employee_change_history
9244 .extend(change_history);
9245 }
9246
9247 if let Some(pb) = &pb {
9248 pb.inc(total);
9249 }
9250 if let Some(pb) = pb {
9251 pb.finish_with_message("Master data generation complete");
9252 }
9253
9254 Ok(())
9255 }
9256
9257 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9259 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9260 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9261
9262 let months = (self.config.global.period_months as usize).max(1);
9265 let p2p_count = self
9266 .phase_config
9267 .p2p_chains
9268 .min(self.master_data.vendors.len() * 2 * months);
9269 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9270
9271 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9273 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9274 p2p_gen.set_country_pack(self.primary_pack().clone());
9275
9276 for i in 0..p2p_count {
9277 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9278 let materials: Vec<&Material> = self
9279 .master_data
9280 .materials
9281 .iter()
9282 .skip(i % self.master_data.materials.len().max(1))
9283 .take(2.min(self.master_data.materials.len()))
9284 .collect();
9285
9286 if materials.is_empty() {
9287 continue;
9288 }
9289
9290 let company = &self.config.companies[i % self.config.companies.len()];
9291 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9292 let fiscal_period = po_date.month() as u8;
9293 let created_by = if self.master_data.employees.is_empty() {
9294 "SYSTEM"
9295 } else {
9296 self.master_data.employees[i % self.master_data.employees.len()]
9297 .user_id
9298 .as_str()
9299 };
9300
9301 let chain = p2p_gen.generate_chain(
9302 &company.code,
9303 vendor,
9304 &materials,
9305 po_date,
9306 start_date.year() as u16,
9307 fiscal_period,
9308 created_by,
9309 );
9310
9311 flows.purchase_orders.push(chain.purchase_order.clone());
9313 flows.goods_receipts.extend(chain.goods_receipts.clone());
9314 if let Some(vi) = &chain.vendor_invoice {
9315 flows.vendor_invoices.push(vi.clone());
9316 }
9317 if let Some(payment) = &chain.payment {
9318 flows.payments.push(payment.clone());
9319 }
9320 for remainder in &chain.remainder_payments {
9321 flows.payments.push(remainder.clone());
9322 }
9323 flows.p2p_chains.push(chain);
9324
9325 if let Some(pb) = &pb {
9326 pb.inc(1);
9327 }
9328 }
9329
9330 if let Some(pb) = pb {
9331 pb.finish_with_message("P2P document flows complete");
9332 }
9333
9334 let o2c_count = self
9337 .phase_config
9338 .o2c_chains
9339 .min(self.master_data.customers.len() * 2 * months);
9340 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9341
9342 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9344 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9345 o2c_gen.set_country_pack(self.primary_pack().clone());
9346
9347 for i in 0..o2c_count {
9348 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9349 let materials: Vec<&Material> = self
9350 .master_data
9351 .materials
9352 .iter()
9353 .skip(i % self.master_data.materials.len().max(1))
9354 .take(2.min(self.master_data.materials.len()))
9355 .collect();
9356
9357 if materials.is_empty() {
9358 continue;
9359 }
9360
9361 let company = &self.config.companies[i % self.config.companies.len()];
9362 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9363 let fiscal_period = so_date.month() as u8;
9364 let created_by = if self.master_data.employees.is_empty() {
9365 "SYSTEM"
9366 } else {
9367 self.master_data.employees[i % self.master_data.employees.len()]
9368 .user_id
9369 .as_str()
9370 };
9371
9372 let chain = o2c_gen.generate_chain(
9373 &company.code,
9374 customer,
9375 &materials,
9376 so_date,
9377 start_date.year() as u16,
9378 fiscal_period,
9379 created_by,
9380 );
9381
9382 flows.sales_orders.push(chain.sales_order.clone());
9384 flows.deliveries.extend(chain.deliveries.clone());
9385 if let Some(ci) = &chain.customer_invoice {
9386 flows.customer_invoices.push(ci.clone());
9387 }
9388 if let Some(receipt) = &chain.customer_receipt {
9389 flows.payments.push(receipt.clone());
9390 }
9391 for receipt in &chain.remainder_receipts {
9393 flows.payments.push(receipt.clone());
9394 }
9395 flows.o2c_chains.push(chain);
9396
9397 if let Some(pb) = &pb {
9398 pb.inc(1);
9399 }
9400 }
9401
9402 if let Some(pb) = pb {
9403 pb.finish_with_message("O2C document flows complete");
9404 }
9405
9406 {
9410 let mut refs = Vec::new();
9411 for doc in &flows.purchase_orders {
9412 refs.extend(doc.header.document_references.iter().cloned());
9413 }
9414 for doc in &flows.goods_receipts {
9415 refs.extend(doc.header.document_references.iter().cloned());
9416 }
9417 for doc in &flows.vendor_invoices {
9418 refs.extend(doc.header.document_references.iter().cloned());
9419 }
9420 for doc in &flows.sales_orders {
9421 refs.extend(doc.header.document_references.iter().cloned());
9422 }
9423 for doc in &flows.deliveries {
9424 refs.extend(doc.header.document_references.iter().cloned());
9425 }
9426 for doc in &flows.customer_invoices {
9427 refs.extend(doc.header.document_references.iter().cloned());
9428 }
9429 for doc in &flows.payments {
9430 refs.extend(doc.header.document_references.iter().cloned());
9431 }
9432 debug!(
9433 "Collected {} document cross-references from document headers",
9434 refs.len()
9435 );
9436 flows.document_references = refs;
9437 }
9438
9439 Ok(())
9440 }
9441
9442 fn generate_journal_entries(
9444 &mut self,
9445 coa: &Arc<ChartOfAccounts>,
9446 ) -> SynthResult<Vec<JournalEntry>> {
9447 use datasynth_core::traits::ParallelGenerator;
9448
9449 let total = self.calculate_total_transactions();
9450 let pb = self.create_progress_bar(total, "Generating Journal Entries");
9451
9452 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9453 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9454 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9455
9456 let company_codes: Vec<String> = self
9457 .config
9458 .companies
9459 .iter()
9460 .map(|c| c.code.clone())
9461 .collect();
9462
9463 let generator = JournalEntryGenerator::new_with_params(
9464 self.config.transactions.clone(),
9465 Arc::clone(coa),
9466 company_codes,
9467 start_date,
9468 end_date,
9469 self.seed,
9470 );
9471
9472 let je_pack = self.primary_pack();
9476
9477 let mut generator = generator
9478 .with_master_data(
9479 &self.master_data.vendors,
9480 &self.master_data.customers,
9481 &self.master_data.materials,
9482 )
9483 .with_country_pack_names(je_pack)
9484 .with_country_pack_temporal(
9485 self.config.temporal_patterns.clone(),
9486 self.seed + 200,
9487 je_pack,
9488 )
9489 .with_persona_errors(true)
9490 .with_fraud_config(self.config.fraud.clone());
9491
9492 if self.config.temporal.enabled {
9494 let drift_config = self.config.temporal.to_core_config();
9495 generator = generator.with_drift_config(drift_config, self.seed + 100);
9496 }
9497
9498 self.check_memory_limit()?;
9500
9501 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9503
9504 let entries = if total >= 10_000 && num_threads > 1 {
9508 let sub_generators = generator.split(num_threads);
9511 let entries_per_thread = total as usize / num_threads;
9512 let remainder = total as usize % num_threads;
9513
9514 let batches: Vec<Vec<JournalEntry>> = sub_generators
9515 .into_par_iter()
9516 .enumerate()
9517 .map(|(i, mut gen)| {
9518 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9519 gen.generate_batch(count)
9520 })
9521 .collect();
9522
9523 let entries = JournalEntryGenerator::merge_results(batches);
9525
9526 if let Some(pb) = &pb {
9527 pb.inc(total);
9528 }
9529 entries
9530 } else {
9531 let mut entries = Vec::with_capacity(total as usize);
9533 for _ in 0..total {
9534 let entry = generator.generate();
9535 entries.push(entry);
9536 if let Some(pb) = &pb {
9537 pb.inc(1);
9538 }
9539 }
9540 entries
9541 };
9542
9543 if let Some(pb) = pb {
9544 pb.finish_with_message("Journal entries complete");
9545 }
9546
9547 Ok(entries)
9548 }
9549
9550 fn generate_jes_from_document_flows(
9555 &mut self,
9556 flows: &DocumentFlowSnapshot,
9557 ) -> SynthResult<Vec<JournalEntry>> {
9558 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9559 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9560
9561 let je_config = match self.resolve_coa_framework() {
9562 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9563 CoAFramework::GermanSkr04 => {
9564 let fa = datasynth_core::FrameworkAccounts::german_gaap();
9565 DocumentFlowJeConfig::from(&fa)
9566 }
9567 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9568 };
9569
9570 let populate_fec = je_config.populate_fec_fields;
9571 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9572
9573 if populate_fec {
9577 let mut aux_lookup = std::collections::HashMap::new();
9578 for vendor in &self.master_data.vendors {
9579 if let Some(ref aux) = vendor.auxiliary_gl_account {
9580 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9581 }
9582 }
9583 for customer in &self.master_data.customers {
9584 if let Some(ref aux) = customer.auxiliary_gl_account {
9585 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9586 }
9587 }
9588 if !aux_lookup.is_empty() {
9589 generator.set_auxiliary_account_lookup(aux_lookup);
9590 }
9591 }
9592
9593 let mut entries = Vec::new();
9594
9595 for chain in &flows.p2p_chains {
9597 let chain_entries = generator.generate_from_p2p_chain(chain);
9598 entries.extend(chain_entries);
9599 if let Some(pb) = &pb {
9600 pb.inc(1);
9601 }
9602 }
9603
9604 for chain in &flows.o2c_chains {
9606 let chain_entries = generator.generate_from_o2c_chain(chain);
9607 entries.extend(chain_entries);
9608 if let Some(pb) = &pb {
9609 pb.inc(1);
9610 }
9611 }
9612
9613 if let Some(pb) = pb {
9614 pb.finish_with_message(format!(
9615 "Generated {} JEs from document flows",
9616 entries.len()
9617 ));
9618 }
9619
9620 Ok(entries)
9621 }
9622
9623 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9629 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9630
9631 let mut jes = Vec::with_capacity(payroll_runs.len());
9632
9633 for run in payroll_runs {
9634 let mut je = JournalEntry::new_simple(
9635 format!("JE-PAYROLL-{}", run.payroll_id),
9636 run.company_code.clone(),
9637 run.run_date,
9638 format!("Payroll {}", run.payroll_id),
9639 );
9640
9641 je.add_line(JournalEntryLine {
9643 line_number: 1,
9644 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9645 debit_amount: run.total_gross,
9646 reference: Some(run.payroll_id.clone()),
9647 text: Some(format!(
9648 "Payroll {} ({} employees)",
9649 run.payroll_id, run.employee_count
9650 )),
9651 ..Default::default()
9652 });
9653
9654 je.add_line(JournalEntryLine {
9656 line_number: 2,
9657 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9658 credit_amount: run.total_gross,
9659 reference: Some(run.payroll_id.clone()),
9660 ..Default::default()
9661 });
9662
9663 jes.push(je);
9664 }
9665
9666 jes
9667 }
9668
9669 fn link_document_flows_to_subledgers(
9674 &mut self,
9675 flows: &DocumentFlowSnapshot,
9676 ) -> SynthResult<SubledgerSnapshot> {
9677 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9678 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9679
9680 let vendor_names: std::collections::HashMap<String, String> = self
9682 .master_data
9683 .vendors
9684 .iter()
9685 .map(|v| (v.vendor_id.clone(), v.name.clone()))
9686 .collect();
9687 let customer_names: std::collections::HashMap<String, String> = self
9688 .master_data
9689 .customers
9690 .iter()
9691 .map(|c| (c.customer_id.clone(), c.name.clone()))
9692 .collect();
9693
9694 let mut linker = DocumentFlowLinker::new()
9695 .with_vendor_names(vendor_names)
9696 .with_customer_names(customer_names);
9697
9698 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9700 if let Some(pb) = &pb {
9701 pb.inc(flows.vendor_invoices.len() as u64);
9702 }
9703
9704 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9706 if let Some(pb) = &pb {
9707 pb.inc(flows.customer_invoices.len() as u64);
9708 }
9709
9710 if let Some(pb) = pb {
9711 pb.finish_with_message(format!(
9712 "Linked {} AP and {} AR invoices",
9713 ap_invoices.len(),
9714 ar_invoices.len()
9715 ));
9716 }
9717
9718 Ok(SubledgerSnapshot {
9719 ap_invoices,
9720 ar_invoices,
9721 fa_records: Vec::new(),
9722 inventory_positions: Vec::new(),
9723 inventory_movements: Vec::new(),
9724 ar_aging_reports: Vec::new(),
9726 ap_aging_reports: Vec::new(),
9727 depreciation_runs: Vec::new(),
9729 inventory_valuations: Vec::new(),
9730 dunning_runs: Vec::new(),
9732 dunning_letters: Vec::new(),
9733 })
9734 }
9735
9736 #[allow(clippy::too_many_arguments)]
9741 fn generate_ocpm_events(
9742 &mut self,
9743 flows: &DocumentFlowSnapshot,
9744 sourcing: &SourcingSnapshot,
9745 hr: &HrSnapshot,
9746 manufacturing: &ManufacturingSnapshot,
9747 banking: &BankingSnapshot,
9748 audit: &AuditSnapshot,
9749 financial_reporting: &FinancialReportingSnapshot,
9750 ) -> SynthResult<OcpmSnapshot> {
9751 let total_chains = flows.p2p_chains.len()
9752 + flows.o2c_chains.len()
9753 + sourcing.sourcing_projects.len()
9754 + hr.payroll_runs.len()
9755 + manufacturing.production_orders.len()
9756 + banking.customers.len()
9757 + audit.engagements.len()
9758 + financial_reporting.bank_reconciliations.len();
9759 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9760
9761 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9763 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9764
9765 let ocpm_config = OcpmGeneratorConfig {
9767 generate_p2p: true,
9768 generate_o2c: true,
9769 generate_s2c: !sourcing.sourcing_projects.is_empty(),
9770 generate_h2r: !hr.payroll_runs.is_empty(),
9771 generate_mfg: !manufacturing.production_orders.is_empty(),
9772 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9773 generate_bank: !banking.customers.is_empty(),
9774 generate_audit: !audit.engagements.is_empty(),
9775 happy_path_rate: 0.75,
9776 exception_path_rate: 0.20,
9777 error_path_rate: 0.05,
9778 add_duration_variability: true,
9779 duration_std_dev_factor: 0.3,
9780 };
9781 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9782 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9783
9784 let available_users: Vec<String> = self
9786 .master_data
9787 .employees
9788 .iter()
9789 .take(20)
9790 .map(|e| e.user_id.clone())
9791 .collect();
9792
9793 let fallback_date =
9795 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9796 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9797 .unwrap_or(fallback_date);
9798 let base_midnight = base_date
9799 .and_hms_opt(0, 0, 0)
9800 .expect("midnight is always valid");
9801 let base_datetime =
9802 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9803
9804 let add_result = |event_log: &mut OcpmEventLog,
9806 result: datasynth_ocpm::CaseGenerationResult| {
9807 for event in result.events {
9808 event_log.add_event(event);
9809 }
9810 for object in result.objects {
9811 event_log.add_object(object);
9812 }
9813 for relationship in result.relationships {
9814 event_log.add_relationship(relationship);
9815 }
9816 for corr in result.correlation_events {
9817 event_log.add_correlation_event(corr);
9818 }
9819 event_log.add_case(result.case_trace);
9820 };
9821
9822 for chain in &flows.p2p_chains {
9824 let po = &chain.purchase_order;
9825 let documents = P2pDocuments::new(
9826 &po.header.document_id,
9827 &po.vendor_id,
9828 &po.header.company_code,
9829 po.total_net_amount,
9830 &po.header.currency,
9831 &ocpm_uuid_factory,
9832 )
9833 .with_goods_receipt(
9834 chain
9835 .goods_receipts
9836 .first()
9837 .map(|gr| gr.header.document_id.as_str())
9838 .unwrap_or(""),
9839 &ocpm_uuid_factory,
9840 )
9841 .with_invoice(
9842 chain
9843 .vendor_invoice
9844 .as_ref()
9845 .map(|vi| vi.header.document_id.as_str())
9846 .unwrap_or(""),
9847 &ocpm_uuid_factory,
9848 )
9849 .with_payment(
9850 chain
9851 .payment
9852 .as_ref()
9853 .map(|p| p.header.document_id.as_str())
9854 .unwrap_or(""),
9855 &ocpm_uuid_factory,
9856 );
9857
9858 let start_time =
9859 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9860 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9861 add_result(&mut event_log, result);
9862
9863 if let Some(pb) = &pb {
9864 pb.inc(1);
9865 }
9866 }
9867
9868 for chain in &flows.o2c_chains {
9870 let so = &chain.sales_order;
9871 let documents = O2cDocuments::new(
9872 &so.header.document_id,
9873 &so.customer_id,
9874 &so.header.company_code,
9875 so.total_net_amount,
9876 &so.header.currency,
9877 &ocpm_uuid_factory,
9878 )
9879 .with_delivery(
9880 chain
9881 .deliveries
9882 .first()
9883 .map(|d| d.header.document_id.as_str())
9884 .unwrap_or(""),
9885 &ocpm_uuid_factory,
9886 )
9887 .with_invoice(
9888 chain
9889 .customer_invoice
9890 .as_ref()
9891 .map(|ci| ci.header.document_id.as_str())
9892 .unwrap_or(""),
9893 &ocpm_uuid_factory,
9894 )
9895 .with_receipt(
9896 chain
9897 .customer_receipt
9898 .as_ref()
9899 .map(|r| r.header.document_id.as_str())
9900 .unwrap_or(""),
9901 &ocpm_uuid_factory,
9902 );
9903
9904 let start_time =
9905 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9906 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9907 add_result(&mut event_log, result);
9908
9909 if let Some(pb) = &pb {
9910 pb.inc(1);
9911 }
9912 }
9913
9914 for project in &sourcing.sourcing_projects {
9916 let vendor_id = sourcing
9918 .contracts
9919 .iter()
9920 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9921 .map(|c| c.vendor_id.clone())
9922 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9923 .or_else(|| {
9924 self.master_data
9925 .vendors
9926 .first()
9927 .map(|v| v.vendor_id.clone())
9928 })
9929 .unwrap_or_else(|| "V000".to_string());
9930 let mut docs = S2cDocuments::new(
9931 &project.project_id,
9932 &vendor_id,
9933 &project.company_code,
9934 project.estimated_annual_spend,
9935 &ocpm_uuid_factory,
9936 );
9937 if let Some(rfx) = sourcing
9939 .rfx_events
9940 .iter()
9941 .find(|r| r.sourcing_project_id == project.project_id)
9942 {
9943 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9944 if let Some(bid) = sourcing.bids.iter().find(|b| {
9946 b.rfx_id == rfx.rfx_id
9947 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9948 }) {
9949 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9950 }
9951 }
9952 if let Some(contract) = sourcing
9954 .contracts
9955 .iter()
9956 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9957 {
9958 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9959 }
9960 let start_time = base_datetime - chrono::Duration::days(90);
9961 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9962 add_result(&mut event_log, result);
9963
9964 if let Some(pb) = &pb {
9965 pb.inc(1);
9966 }
9967 }
9968
9969 for run in &hr.payroll_runs {
9971 let employee_id = hr
9973 .payroll_line_items
9974 .iter()
9975 .find(|li| li.payroll_id == run.payroll_id)
9976 .map(|li| li.employee_id.as_str())
9977 .unwrap_or("EMP000");
9978 let docs = H2rDocuments::new(
9979 &run.payroll_id,
9980 employee_id,
9981 &run.company_code,
9982 run.total_gross,
9983 &ocpm_uuid_factory,
9984 )
9985 .with_time_entries(
9986 hr.time_entries
9987 .iter()
9988 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9989 .take(5)
9990 .map(|t| t.entry_id.as_str())
9991 .collect(),
9992 );
9993 let start_time = base_datetime - chrono::Duration::days(30);
9994 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9995 add_result(&mut event_log, result);
9996
9997 if let Some(pb) = &pb {
9998 pb.inc(1);
9999 }
10000 }
10001
10002 for order in &manufacturing.production_orders {
10004 let mut docs = MfgDocuments::new(
10005 &order.order_id,
10006 &order.material_id,
10007 &order.company_code,
10008 order.planned_quantity,
10009 &ocpm_uuid_factory,
10010 )
10011 .with_operations(
10012 order
10013 .operations
10014 .iter()
10015 .map(|o| format!("OP-{:04}", o.operation_number))
10016 .collect::<Vec<_>>()
10017 .iter()
10018 .map(std::string::String::as_str)
10019 .collect(),
10020 );
10021 if let Some(insp) = manufacturing
10023 .quality_inspections
10024 .iter()
10025 .find(|i| i.reference_id == order.order_id)
10026 {
10027 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10028 }
10029 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10031 cc.items
10032 .iter()
10033 .any(|item| item.material_id == order.material_id)
10034 }) {
10035 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10036 }
10037 let start_time = base_datetime - chrono::Duration::days(60);
10038 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10039 add_result(&mut event_log, result);
10040
10041 if let Some(pb) = &pb {
10042 pb.inc(1);
10043 }
10044 }
10045
10046 for customer in &banking.customers {
10048 let customer_id_str = customer.customer_id.to_string();
10049 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10050 if let Some(account) = banking
10052 .accounts
10053 .iter()
10054 .find(|a| a.primary_owner_id == customer.customer_id)
10055 {
10056 let account_id_str = account.account_id.to_string();
10057 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10058 let txn_strs: Vec<String> = banking
10060 .transactions
10061 .iter()
10062 .filter(|t| t.account_id == account.account_id)
10063 .take(10)
10064 .map(|t| t.transaction_id.to_string())
10065 .collect();
10066 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10067 let txn_amounts: Vec<rust_decimal::Decimal> = banking
10068 .transactions
10069 .iter()
10070 .filter(|t| t.account_id == account.account_id)
10071 .take(10)
10072 .map(|t| t.amount)
10073 .collect();
10074 if !txn_ids.is_empty() {
10075 docs = docs.with_transactions(txn_ids, txn_amounts);
10076 }
10077 }
10078 let start_time = base_datetime - chrono::Duration::days(180);
10079 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10080 add_result(&mut event_log, result);
10081
10082 if let Some(pb) = &pb {
10083 pb.inc(1);
10084 }
10085 }
10086
10087 for engagement in &audit.engagements {
10089 let engagement_id_str = engagement.engagement_id.to_string();
10090 let docs = AuditDocuments::new(
10091 &engagement_id_str,
10092 &engagement.client_entity_id,
10093 &ocpm_uuid_factory,
10094 )
10095 .with_workpapers(
10096 audit
10097 .workpapers
10098 .iter()
10099 .filter(|w| w.engagement_id == engagement.engagement_id)
10100 .take(10)
10101 .map(|w| w.workpaper_id.to_string())
10102 .collect::<Vec<_>>()
10103 .iter()
10104 .map(std::string::String::as_str)
10105 .collect(),
10106 )
10107 .with_evidence(
10108 audit
10109 .evidence
10110 .iter()
10111 .filter(|e| e.engagement_id == engagement.engagement_id)
10112 .take(10)
10113 .map(|e| e.evidence_id.to_string())
10114 .collect::<Vec<_>>()
10115 .iter()
10116 .map(std::string::String::as_str)
10117 .collect(),
10118 )
10119 .with_risks(
10120 audit
10121 .risk_assessments
10122 .iter()
10123 .filter(|r| r.engagement_id == engagement.engagement_id)
10124 .take(5)
10125 .map(|r| r.risk_id.to_string())
10126 .collect::<Vec<_>>()
10127 .iter()
10128 .map(std::string::String::as_str)
10129 .collect(),
10130 )
10131 .with_findings(
10132 audit
10133 .findings
10134 .iter()
10135 .filter(|f| f.engagement_id == engagement.engagement_id)
10136 .take(5)
10137 .map(|f| f.finding_id.to_string())
10138 .collect::<Vec<_>>()
10139 .iter()
10140 .map(std::string::String::as_str)
10141 .collect(),
10142 )
10143 .with_judgments(
10144 audit
10145 .judgments
10146 .iter()
10147 .filter(|j| j.engagement_id == engagement.engagement_id)
10148 .take(5)
10149 .map(|j| j.judgment_id.to_string())
10150 .collect::<Vec<_>>()
10151 .iter()
10152 .map(std::string::String::as_str)
10153 .collect(),
10154 );
10155 let start_time = base_datetime - chrono::Duration::days(120);
10156 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10157 add_result(&mut event_log, result);
10158
10159 if let Some(pb) = &pb {
10160 pb.inc(1);
10161 }
10162 }
10163
10164 for recon in &financial_reporting.bank_reconciliations {
10166 let docs = BankReconDocuments::new(
10167 &recon.reconciliation_id,
10168 &recon.bank_account_id,
10169 &recon.company_code,
10170 recon.bank_ending_balance,
10171 &ocpm_uuid_factory,
10172 )
10173 .with_statement_lines(
10174 recon
10175 .statement_lines
10176 .iter()
10177 .take(20)
10178 .map(|l| l.line_id.as_str())
10179 .collect(),
10180 )
10181 .with_reconciling_items(
10182 recon
10183 .reconciling_items
10184 .iter()
10185 .take(10)
10186 .map(|i| i.item_id.as_str())
10187 .collect(),
10188 );
10189 let start_time = base_datetime - chrono::Duration::days(30);
10190 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10191 add_result(&mut event_log, result);
10192
10193 if let Some(pb) = &pb {
10194 pb.inc(1);
10195 }
10196 }
10197
10198 event_log.compute_variants();
10200
10201 let summary = event_log.summary();
10202
10203 if let Some(pb) = pb {
10204 pb.finish_with_message(format!(
10205 "Generated {} OCPM events, {} objects",
10206 summary.event_count, summary.object_count
10207 ));
10208 }
10209
10210 Ok(OcpmSnapshot {
10211 event_count: summary.event_count,
10212 object_count: summary.object_count,
10213 case_count: summary.case_count,
10214 event_log: Some(event_log),
10215 })
10216 }
10217
10218 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10220 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10221
10222 let total_rate = if self.config.anomaly_injection.enabled {
10225 self.config.anomaly_injection.rates.total_rate
10226 } else if self.config.fraud.enabled {
10227 self.config.fraud.fraud_rate
10228 } else {
10229 0.02
10230 };
10231
10232 let fraud_rate = if self.config.anomaly_injection.enabled {
10233 self.config.anomaly_injection.rates.fraud_rate
10234 } else {
10235 AnomalyRateConfig::default().fraud_rate
10236 };
10237
10238 let error_rate = if self.config.anomaly_injection.enabled {
10239 self.config.anomaly_injection.rates.error_rate
10240 } else {
10241 AnomalyRateConfig::default().error_rate
10242 };
10243
10244 let process_issue_rate = if self.config.anomaly_injection.enabled {
10245 self.config.anomaly_injection.rates.process_rate
10246 } else {
10247 AnomalyRateConfig::default().process_issue_rate
10248 };
10249
10250 let anomaly_config = AnomalyInjectorConfig {
10251 rates: AnomalyRateConfig {
10252 total_rate,
10253 fraud_rate,
10254 error_rate,
10255 process_issue_rate,
10256 ..Default::default()
10257 },
10258 seed: self.seed + 5000,
10259 ..Default::default()
10260 };
10261
10262 let mut injector = AnomalyInjector::new(anomaly_config);
10263 let result = injector.process_entries(entries);
10264
10265 if let Some(pb) = &pb {
10266 pb.inc(entries.len() as u64);
10267 pb.finish_with_message("Anomaly injection complete");
10268 }
10269
10270 let mut by_type = HashMap::new();
10271 for label in &result.labels {
10272 *by_type
10273 .entry(format!("{:?}", label.anomaly_type))
10274 .or_insert(0) += 1;
10275 }
10276
10277 Ok(AnomalyLabels {
10278 labels: result.labels,
10279 summary: Some(result.summary),
10280 by_type,
10281 })
10282 }
10283
10284 fn validate_journal_entries(
10293 &mut self,
10294 entries: &[JournalEntry],
10295 ) -> SynthResult<BalanceValidationResult> {
10296 let clean_entries: Vec<&JournalEntry> = entries
10298 .iter()
10299 .filter(|e| {
10300 e.header
10301 .header_text
10302 .as_ref()
10303 .map(|t| !t.contains("[HUMAN_ERROR:"))
10304 .unwrap_or(true)
10305 })
10306 .collect();
10307
10308 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10309
10310 let config = BalanceTrackerConfig {
10312 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
10316 };
10317 let validation_currency = self
10318 .config
10319 .companies
10320 .first()
10321 .map(|c| c.currency.clone())
10322 .unwrap_or_else(|| "USD".to_string());
10323
10324 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10325
10326 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10328 let errors = tracker.apply_entries(&clean_refs);
10329
10330 if let Some(pb) = &pb {
10331 pb.inc(entries.len() as u64);
10332 }
10333
10334 let has_unbalanced = tracker
10337 .get_validation_errors()
10338 .iter()
10339 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10340
10341 let mut all_errors = errors;
10344 all_errors.extend(tracker.get_validation_errors().iter().cloned());
10345 let company_codes: Vec<String> = self
10346 .config
10347 .companies
10348 .iter()
10349 .map(|c| c.code.clone())
10350 .collect();
10351
10352 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10353 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10354 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10355
10356 for company_code in &company_codes {
10357 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10358 all_errors.push(e);
10359 }
10360 }
10361
10362 let stats = tracker.get_statistics();
10364
10365 let is_balanced = all_errors.is_empty();
10367
10368 if let Some(pb) = pb {
10369 let msg = if is_balanced {
10370 "Balance validation passed"
10371 } else {
10372 "Balance validation completed with errors"
10373 };
10374 pb.finish_with_message(msg);
10375 }
10376
10377 Ok(BalanceValidationResult {
10378 validated: true,
10379 is_balanced,
10380 entries_processed: stats.entries_processed,
10381 total_debits: stats.total_debits,
10382 total_credits: stats.total_credits,
10383 accounts_tracked: stats.accounts_tracked,
10384 companies_tracked: stats.companies_tracked,
10385 validation_errors: all_errors,
10386 has_unbalanced_entries: has_unbalanced,
10387 })
10388 }
10389
10390 fn inject_data_quality(
10395 &mut self,
10396 entries: &mut [JournalEntry],
10397 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10398 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10399
10400 let config = if self.config.data_quality.enabled {
10403 let dq = &self.config.data_quality;
10404 DataQualityConfig {
10405 enable_missing_values: dq.missing_values.enabled,
10406 missing_values: datasynth_generators::MissingValueConfig {
10407 global_rate: dq.effective_missing_rate(),
10408 ..Default::default()
10409 },
10410 enable_format_variations: dq.format_variations.enabled,
10411 format_variations: datasynth_generators::FormatVariationConfig {
10412 date_variation_rate: dq.format_variations.dates.rate,
10413 amount_variation_rate: dq.format_variations.amounts.rate,
10414 identifier_variation_rate: dq.format_variations.identifiers.rate,
10415 ..Default::default()
10416 },
10417 enable_duplicates: dq.duplicates.enabled,
10418 duplicates: datasynth_generators::DuplicateConfig {
10419 duplicate_rate: dq.effective_duplicate_rate(),
10420 ..Default::default()
10421 },
10422 enable_typos: dq.typos.enabled,
10423 typos: datasynth_generators::TypoConfig {
10424 char_error_rate: dq.effective_typo_rate(),
10425 ..Default::default()
10426 },
10427 enable_encoding_issues: dq.encoding_issues.enabled,
10428 encoding_issue_rate: dq.encoding_issues.rate,
10429 seed: self.seed.wrapping_add(77), track_statistics: true,
10431 }
10432 } else {
10433 DataQualityConfig::minimal()
10434 };
10435 let mut injector = DataQualityInjector::new(config);
10436
10437 injector.set_country_pack(self.primary_pack().clone());
10439
10440 let context = HashMap::new();
10442
10443 for entry in entries.iter_mut() {
10444 if let Some(text) = &entry.header.header_text {
10446 let processed = injector.process_text_field(
10447 "header_text",
10448 text,
10449 &entry.header.document_id.to_string(),
10450 &context,
10451 );
10452 match processed {
10453 Some(new_text) if new_text != *text => {
10454 entry.header.header_text = Some(new_text);
10455 }
10456 None => {
10457 entry.header.header_text = None; }
10459 _ => {}
10460 }
10461 }
10462
10463 if let Some(ref_text) = &entry.header.reference {
10465 let processed = injector.process_text_field(
10466 "reference",
10467 ref_text,
10468 &entry.header.document_id.to_string(),
10469 &context,
10470 );
10471 match processed {
10472 Some(new_text) if new_text != *ref_text => {
10473 entry.header.reference = Some(new_text);
10474 }
10475 None => {
10476 entry.header.reference = None;
10477 }
10478 _ => {}
10479 }
10480 }
10481
10482 let user_persona = entry.header.user_persona.clone();
10484 if let Some(processed) = injector.process_text_field(
10485 "user_persona",
10486 &user_persona,
10487 &entry.header.document_id.to_string(),
10488 &context,
10489 ) {
10490 if processed != user_persona {
10491 entry.header.user_persona = processed;
10492 }
10493 }
10494
10495 for line in &mut entry.lines {
10497 if let Some(ref text) = line.line_text {
10499 let processed = injector.process_text_field(
10500 "line_text",
10501 text,
10502 &entry.header.document_id.to_string(),
10503 &context,
10504 );
10505 match processed {
10506 Some(new_text) if new_text != *text => {
10507 line.line_text = Some(new_text);
10508 }
10509 None => {
10510 line.line_text = None;
10511 }
10512 _ => {}
10513 }
10514 }
10515
10516 if let Some(cc) = &line.cost_center {
10518 let processed = injector.process_text_field(
10519 "cost_center",
10520 cc,
10521 &entry.header.document_id.to_string(),
10522 &context,
10523 );
10524 match processed {
10525 Some(new_cc) if new_cc != *cc => {
10526 line.cost_center = Some(new_cc);
10527 }
10528 None => {
10529 line.cost_center = None;
10530 }
10531 _ => {}
10532 }
10533 }
10534 }
10535
10536 if let Some(pb) = &pb {
10537 pb.inc(1);
10538 }
10539 }
10540
10541 if let Some(pb) = pb {
10542 pb.finish_with_message("Data quality injection complete");
10543 }
10544
10545 let quality_issues = injector.issues().to_vec();
10546 Ok((injector.stats().clone(), quality_issues))
10547 }
10548
10549 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10560 let use_fsm = self
10562 .config
10563 .audit
10564 .fsm
10565 .as_ref()
10566 .map(|f| f.enabled)
10567 .unwrap_or(false);
10568
10569 if use_fsm {
10570 return self.generate_audit_data_with_fsm(entries);
10571 }
10572
10573 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10575 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10576 let fiscal_year = start_date.year() as u16;
10577 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10578
10579 let total_revenue: rust_decimal::Decimal = entries
10581 .iter()
10582 .flat_map(|e| e.lines.iter())
10583 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10584 .map(|l| l.credit_amount)
10585 .sum();
10586
10587 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10589
10590 let mut snapshot = AuditSnapshot::default();
10591
10592 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10594 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10595 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10596 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10597 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10598 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10599 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10600 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10601 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10602 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10603 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10604 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10605
10606 let accounts: Vec<String> = self
10608 .coa
10609 .as_ref()
10610 .map(|coa| {
10611 coa.get_postable_accounts()
10612 .iter()
10613 .map(|acc| acc.account_code().to_string())
10614 .collect()
10615 })
10616 .unwrap_or_default();
10617
10618 for (i, company) in self.config.companies.iter().enumerate() {
10620 let company_revenue = total_revenue
10622 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10623
10624 let engagements_for_company =
10626 self.phase_config.audit_engagements / self.config.companies.len().max(1);
10627 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10628 1
10629 } else {
10630 0
10631 };
10632
10633 for _eng_idx in 0..(engagements_for_company + extra) {
10634 let mut engagement = engagement_gen.generate_engagement(
10636 &company.code,
10637 &company.name,
10638 fiscal_year,
10639 period_end,
10640 company_revenue,
10641 None, );
10643
10644 if !self.master_data.employees.is_empty() {
10646 let emp_count = self.master_data.employees.len();
10647 let base = (i * 10 + _eng_idx) % emp_count;
10649 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10650 .employee_id
10651 .clone();
10652 engagement.engagement_manager_id = self.master_data.employees
10653 [(base + 1) % emp_count]
10654 .employee_id
10655 .clone();
10656 let real_team: Vec<String> = engagement
10657 .team_member_ids
10658 .iter()
10659 .enumerate()
10660 .map(|(j, _)| {
10661 self.master_data.employees[(base + 2 + j) % emp_count]
10662 .employee_id
10663 .clone()
10664 })
10665 .collect();
10666 engagement.team_member_ids = real_team;
10667 }
10668
10669 if let Some(pb) = &pb {
10670 pb.inc(1);
10671 }
10672
10673 let team_members: Vec<String> = engagement.team_member_ids.clone();
10675
10676 let workpapers =
10678 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10679
10680 for wp in &workpapers {
10681 if let Some(pb) = &pb {
10682 pb.inc(1);
10683 }
10684
10685 let evidence = evidence_gen.generate_evidence_for_workpaper(
10687 wp,
10688 &team_members,
10689 wp.preparer_date,
10690 );
10691
10692 for _ in &evidence {
10693 if let Some(pb) = &pb {
10694 pb.inc(1);
10695 }
10696 }
10697
10698 snapshot.evidence.extend(evidence);
10699 }
10700
10701 let risks =
10703 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10704
10705 for _ in &risks {
10706 if let Some(pb) = &pb {
10707 pb.inc(1);
10708 }
10709 }
10710 snapshot.risk_assessments.extend(risks);
10711
10712 let findings = finding_gen.generate_findings_for_engagement(
10714 &engagement,
10715 &workpapers,
10716 &team_members,
10717 );
10718
10719 for _ in &findings {
10720 if let Some(pb) = &pb {
10721 pb.inc(1);
10722 }
10723 }
10724 snapshot.findings.extend(findings);
10725
10726 let judgments =
10728 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10729
10730 for _ in &judgments {
10731 if let Some(pb) = &pb {
10732 pb.inc(1);
10733 }
10734 }
10735 snapshot.judgments.extend(judgments);
10736
10737 let (confs, resps) =
10739 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10740 snapshot.confirmations.extend(confs);
10741 snapshot.confirmation_responses.extend(resps);
10742
10743 let team_pairs: Vec<(String, String)> = team_members
10745 .iter()
10746 .map(|id| {
10747 let name = self
10748 .master_data
10749 .employees
10750 .iter()
10751 .find(|e| e.employee_id == *id)
10752 .map(|e| e.display_name.clone())
10753 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10754 (id.clone(), name)
10755 })
10756 .collect();
10757 for wp in &workpapers {
10758 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10759 snapshot.procedure_steps.extend(steps);
10760 }
10761
10762 for wp in &workpapers {
10764 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10765 snapshot.samples.push(sample);
10766 }
10767 }
10768
10769 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10771 snapshot.analytical_results.extend(analytical);
10772
10773 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10775 snapshot.ia_functions.push(ia_func);
10776 snapshot.ia_reports.extend(ia_reports);
10777
10778 let vendor_names: Vec<String> = self
10780 .master_data
10781 .vendors
10782 .iter()
10783 .map(|v| v.name.clone())
10784 .collect();
10785 let customer_names: Vec<String> = self
10786 .master_data
10787 .customers
10788 .iter()
10789 .map(|c| c.name.clone())
10790 .collect();
10791 let (parties, rp_txns) =
10792 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10793 snapshot.related_parties.extend(parties);
10794 snapshot.related_party_transactions.extend(rp_txns);
10795
10796 snapshot.workpapers.extend(workpapers);
10798
10799 {
10801 let scope_id = format!(
10802 "SCOPE-{}-{}",
10803 engagement.engagement_id.simple(),
10804 &engagement.client_entity_id
10805 );
10806 let scope = datasynth_core::models::audit::AuditScope::new(
10807 scope_id.clone(),
10808 engagement.engagement_id.to_string(),
10809 engagement.client_entity_id.clone(),
10810 engagement.materiality,
10811 );
10812 let mut eng = engagement;
10814 eng.scope_id = Some(scope_id);
10815 snapshot.audit_scopes.push(scope);
10816 snapshot.engagements.push(eng);
10817 }
10818 }
10819 }
10820
10821 if self.config.companies.len() > 1 {
10825 let group_materiality = snapshot
10828 .engagements
10829 .first()
10830 .map(|e| e.materiality)
10831 .unwrap_or_else(|| {
10832 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10833 total_revenue * pct
10834 });
10835
10836 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10837 let group_engagement_id = snapshot
10838 .engagements
10839 .first()
10840 .map(|e| e.engagement_id.to_string())
10841 .unwrap_or_else(|| "GROUP-ENG".to_string());
10842
10843 let component_snapshot = component_gen.generate(
10844 &self.config.companies,
10845 group_materiality,
10846 &group_engagement_id,
10847 period_end,
10848 );
10849
10850 snapshot.component_auditors = component_snapshot.component_auditors;
10851 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10852 snapshot.component_instructions = component_snapshot.component_instructions;
10853 snapshot.component_reports = component_snapshot.component_reports;
10854
10855 info!(
10856 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10857 snapshot.component_auditors.len(),
10858 snapshot.component_instructions.len(),
10859 snapshot.component_reports.len(),
10860 );
10861 }
10862
10863 {
10867 let applicable_framework = self
10868 .config
10869 .accounting_standards
10870 .framework
10871 .as_ref()
10872 .map(|f| format!("{f:?}"))
10873 .unwrap_or_else(|| "IFRS".to_string());
10874
10875 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10876 let entity_count = self.config.companies.len();
10877
10878 for engagement in &snapshot.engagements {
10879 let company = self
10880 .config
10881 .companies
10882 .iter()
10883 .find(|c| c.code == engagement.client_entity_id);
10884 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10885 let letter_date = engagement.planning_start;
10886 let letter = letter_gen.generate(
10887 &engagement.engagement_id.to_string(),
10888 &engagement.client_name,
10889 entity_count,
10890 engagement.period_end_date,
10891 currency,
10892 &applicable_framework,
10893 letter_date,
10894 );
10895 snapshot.engagement_letters.push(letter);
10896 }
10897
10898 info!(
10899 "ISA 210 engagement letters: {} generated",
10900 snapshot.engagement_letters.len()
10901 );
10902 }
10903
10904 {
10908 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10909 let entity_codes: Vec<String> = self
10910 .config
10911 .companies
10912 .iter()
10913 .map(|c| c.code.clone())
10914 .collect();
10915 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10916 info!(
10917 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10918 subsequent.len(),
10919 subsequent
10920 .iter()
10921 .filter(|e| matches!(
10922 e.classification,
10923 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10924 ))
10925 .count(),
10926 subsequent
10927 .iter()
10928 .filter(|e| matches!(
10929 e.classification,
10930 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10931 ))
10932 .count(),
10933 );
10934 snapshot.subsequent_events = subsequent;
10935 }
10936
10937 {
10941 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10942 let entity_codes: Vec<String> = self
10943 .config
10944 .companies
10945 .iter()
10946 .map(|c| c.code.clone())
10947 .collect();
10948 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10949 info!(
10950 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10951 soc_snapshot.service_organizations.len(),
10952 soc_snapshot.soc_reports.len(),
10953 soc_snapshot.user_entity_controls.len(),
10954 );
10955 snapshot.service_organizations = soc_snapshot.service_organizations;
10956 snapshot.soc_reports = soc_snapshot.soc_reports;
10957 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10958 }
10959
10960 {
10964 use datasynth_generators::audit::going_concern_generator::{
10965 GoingConcernGenerator, GoingConcernInput,
10966 };
10967 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10968 let entity_codes: Vec<String> = self
10969 .config
10970 .companies
10971 .iter()
10972 .map(|c| c.code.clone())
10973 .collect();
10974 let assessment_date = period_end + chrono::Duration::days(75);
10976 let period_label = format!("FY{}", period_end.year());
10977
10978 let gc_inputs: Vec<GoingConcernInput> = self
10989 .config
10990 .companies
10991 .iter()
10992 .map(|company| {
10993 let code = &company.code;
10994 let mut revenue = rust_decimal::Decimal::ZERO;
10995 let mut expenses = rust_decimal::Decimal::ZERO;
10996 let mut current_assets = rust_decimal::Decimal::ZERO;
10997 let mut current_liabs = rust_decimal::Decimal::ZERO;
10998 let mut total_debt = rust_decimal::Decimal::ZERO;
10999
11000 for je in entries.iter().filter(|je| &je.header.company_code == code) {
11001 for line in &je.lines {
11002 let acct = line.gl_account.as_str();
11003 let net = line.debit_amount - line.credit_amount;
11004 if acct.starts_with('4') {
11005 revenue -= net;
11007 } else if acct.starts_with('6') {
11008 expenses += net;
11010 }
11011 if acct.starts_with('1') {
11013 if let Ok(n) = acct.parse::<u32>() {
11015 if (1000..=1499).contains(&n) {
11016 current_assets += net;
11017 }
11018 }
11019 } else if acct.starts_with('2') {
11020 if let Ok(n) = acct.parse::<u32>() {
11021 if (2000..=2499).contains(&n) {
11022 current_liabs -= net; } else if (2500..=2999).contains(&n) {
11025 total_debt -= net;
11027 }
11028 }
11029 }
11030 }
11031 }
11032
11033 let net_income = revenue - expenses;
11034 let working_capital = current_assets - current_liabs;
11035 let operating_cash_flow = net_income;
11038
11039 GoingConcernInput {
11040 entity_code: code.clone(),
11041 net_income,
11042 working_capital,
11043 operating_cash_flow,
11044 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11045 assessment_date,
11046 }
11047 })
11048 .collect();
11049
11050 let assessments = if gc_inputs.is_empty() {
11051 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11052 } else {
11053 gc_gen.generate_for_entities_with_inputs(
11054 &entity_codes,
11055 &gc_inputs,
11056 assessment_date,
11057 &period_label,
11058 )
11059 };
11060 info!(
11061 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11062 assessments.len(),
11063 assessments.iter().filter(|a| matches!(
11064 a.auditor_conclusion,
11065 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11066 )).count(),
11067 assessments.iter().filter(|a| matches!(
11068 a.auditor_conclusion,
11069 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11070 )).count(),
11071 assessments.iter().filter(|a| matches!(
11072 a.auditor_conclusion,
11073 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11074 )).count(),
11075 );
11076 snapshot.going_concern_assessments = assessments;
11077 }
11078
11079 {
11083 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11084 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11085 let entity_codes: Vec<String> = self
11086 .config
11087 .companies
11088 .iter()
11089 .map(|c| c.code.clone())
11090 .collect();
11091 let estimates = est_gen.generate_for_entities(&entity_codes);
11092 info!(
11093 "ISA 540 accounting estimates: {} estimates across {} entities \
11094 ({} with retrospective reviews, {} with auditor point estimates)",
11095 estimates.len(),
11096 entity_codes.len(),
11097 estimates
11098 .iter()
11099 .filter(|e| e.retrospective_review.is_some())
11100 .count(),
11101 estimates
11102 .iter()
11103 .filter(|e| e.auditor_point_estimate.is_some())
11104 .count(),
11105 );
11106 snapshot.accounting_estimates = estimates;
11107 }
11108
11109 {
11113 use datasynth_generators::audit::audit_opinion_generator::{
11114 AuditOpinionGenerator, AuditOpinionInput,
11115 };
11116
11117 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11118
11119 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11121 .engagements
11122 .iter()
11123 .map(|eng| {
11124 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11126 .findings
11127 .iter()
11128 .filter(|f| f.engagement_id == eng.engagement_id)
11129 .cloned()
11130 .collect();
11131
11132 let gc = snapshot
11134 .going_concern_assessments
11135 .iter()
11136 .find(|g| g.entity_code == eng.client_entity_id)
11137 .cloned();
11138
11139 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11141 snapshot.component_reports.clone();
11142
11143 let auditor = self
11144 .master_data
11145 .employees
11146 .first()
11147 .map(|e| e.display_name.clone())
11148 .unwrap_or_else(|| "Global Audit LLP".into());
11149
11150 let partner = self
11151 .master_data
11152 .employees
11153 .get(1)
11154 .map(|e| e.display_name.clone())
11155 .unwrap_or_else(|| eng.engagement_partner_id.clone());
11156
11157 AuditOpinionInput {
11158 entity_code: eng.client_entity_id.clone(),
11159 entity_name: eng.client_name.clone(),
11160 engagement_id: eng.engagement_id,
11161 period_end: eng.period_end_date,
11162 findings: eng_findings,
11163 going_concern: gc,
11164 component_reports: comp_reports,
11165 is_us_listed: {
11167 let fw = &self.config.audit_standards.isa_compliance.framework;
11168 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11169 },
11170 auditor_name: auditor,
11171 engagement_partner: partner,
11172 }
11173 })
11174 .collect();
11175
11176 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11177
11178 for go in &generated_opinions {
11179 snapshot
11180 .key_audit_matters
11181 .extend(go.key_audit_matters.clone());
11182 }
11183 snapshot.audit_opinions = generated_opinions
11184 .into_iter()
11185 .map(|go| go.opinion)
11186 .collect();
11187
11188 info!(
11189 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11190 snapshot.audit_opinions.len(),
11191 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11192 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11193 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11194 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11195 );
11196 }
11197
11198 {
11202 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11203
11204 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11205
11206 for (i, company) in self.config.companies.iter().enumerate() {
11207 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11209 .engagements
11210 .iter()
11211 .filter(|e| e.client_entity_id == company.code)
11212 .map(|e| e.engagement_id)
11213 .collect();
11214
11215 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11216 .findings
11217 .iter()
11218 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11219 .cloned()
11220 .collect();
11221
11222 let emp_count = self.master_data.employees.len();
11224 let ceo_name = if emp_count > 0 {
11225 self.master_data.employees[i % emp_count]
11226 .display_name
11227 .clone()
11228 } else {
11229 format!("CEO of {}", company.name)
11230 };
11231 let cfo_name = if emp_count > 1 {
11232 self.master_data.employees[(i + 1) % emp_count]
11233 .display_name
11234 .clone()
11235 } else {
11236 format!("CFO of {}", company.name)
11237 };
11238
11239 let materiality = snapshot
11241 .engagements
11242 .iter()
11243 .find(|e| e.client_entity_id == company.code)
11244 .map(|e| e.materiality)
11245 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11246
11247 let input = SoxGeneratorInput {
11248 company_code: company.code.clone(),
11249 company_name: company.name.clone(),
11250 fiscal_year,
11251 period_end,
11252 findings: company_findings,
11253 ceo_name,
11254 cfo_name,
11255 materiality_threshold: materiality,
11256 revenue_percent: rust_decimal::Decimal::from(100),
11257 assets_percent: rust_decimal::Decimal::from(100),
11258 significant_accounts: vec![
11259 "Revenue".into(),
11260 "Accounts Receivable".into(),
11261 "Inventory".into(),
11262 "Fixed Assets".into(),
11263 "Accounts Payable".into(),
11264 ],
11265 };
11266
11267 let (certs, assessment) = sox_gen.generate(&input);
11268 snapshot.sox_302_certifications.extend(certs);
11269 snapshot.sox_404_assessments.push(assessment);
11270 }
11271
11272 info!(
11273 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11274 snapshot.sox_302_certifications.len(),
11275 snapshot.sox_404_assessments.len(),
11276 snapshot
11277 .sox_404_assessments
11278 .iter()
11279 .filter(|a| a.icfr_effective)
11280 .count(),
11281 snapshot
11282 .sox_404_assessments
11283 .iter()
11284 .filter(|a| !a.icfr_effective)
11285 .count(),
11286 );
11287 }
11288
11289 {
11293 use datasynth_generators::audit::materiality_generator::{
11294 MaterialityGenerator, MaterialityInput,
11295 };
11296
11297 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11298
11299 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11303
11304 for company in &self.config.companies {
11305 let company_code = company.code.clone();
11306
11307 let company_revenue: rust_decimal::Decimal = entries
11309 .iter()
11310 .filter(|e| e.company_code() == company_code)
11311 .flat_map(|e| e.lines.iter())
11312 .filter(|l| l.account_code.starts_with('4'))
11313 .map(|l| l.credit_amount)
11314 .sum();
11315
11316 let total_assets: rust_decimal::Decimal = entries
11318 .iter()
11319 .filter(|e| e.company_code() == company_code)
11320 .flat_map(|e| e.lines.iter())
11321 .filter(|l| l.account_code.starts_with('1'))
11322 .map(|l| l.debit_amount)
11323 .sum();
11324
11325 let total_expenses: rust_decimal::Decimal = entries
11327 .iter()
11328 .filter(|e| e.company_code() == company_code)
11329 .flat_map(|e| e.lines.iter())
11330 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11331 .map(|l| l.debit_amount)
11332 .sum();
11333
11334 let equity: rust_decimal::Decimal = entries
11336 .iter()
11337 .filter(|e| e.company_code() == company_code)
11338 .flat_map(|e| e.lines.iter())
11339 .filter(|l| l.account_code.starts_with('3'))
11340 .map(|l| l.credit_amount)
11341 .sum();
11342
11343 let pretax_income = company_revenue - total_expenses;
11344
11345 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11347 let w = rust_decimal::Decimal::try_from(company.volume_weight)
11348 .unwrap_or(rust_decimal::Decimal::ONE);
11349 (
11350 total_revenue * w,
11351 total_revenue * w * rust_decimal::Decimal::from(3),
11352 total_revenue * w * rust_decimal::Decimal::new(1, 1),
11353 total_revenue * w * rust_decimal::Decimal::from(2),
11354 )
11355 } else {
11356 (company_revenue, total_assets, pretax_income, equity)
11357 };
11358
11359 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
11362 entity_code: company_code,
11363 period: format!("FY{}", fiscal_year),
11364 revenue: rev,
11365 pretax_income: pti,
11366 total_assets: assets,
11367 equity: eq,
11368 gross_profit,
11369 });
11370 }
11371
11372 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11373
11374 info!(
11375 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11376 {} total assets, {} equity benchmarks)",
11377 snapshot.materiality_calculations.len(),
11378 snapshot
11379 .materiality_calculations
11380 .iter()
11381 .filter(|m| matches!(
11382 m.benchmark,
11383 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11384 ))
11385 .count(),
11386 snapshot
11387 .materiality_calculations
11388 .iter()
11389 .filter(|m| matches!(
11390 m.benchmark,
11391 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11392 ))
11393 .count(),
11394 snapshot
11395 .materiality_calculations
11396 .iter()
11397 .filter(|m| matches!(
11398 m.benchmark,
11399 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11400 ))
11401 .count(),
11402 snapshot
11403 .materiality_calculations
11404 .iter()
11405 .filter(|m| matches!(
11406 m.benchmark,
11407 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11408 ))
11409 .count(),
11410 );
11411 }
11412
11413 {
11417 use datasynth_generators::audit::cra_generator::CraGenerator;
11418
11419 let mut cra_gen = CraGenerator::new(self.seed + 8315);
11420
11421 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11423 .audit_scopes
11424 .iter()
11425 .map(|s| (s.entity_code.clone(), s.id.clone()))
11426 .collect();
11427
11428 for company in &self.config.companies {
11429 let cras = cra_gen.generate_for_entity(&company.code, None);
11430 let scope_id = entity_scope_map.get(&company.code).cloned();
11431 let cras_with_scope: Vec<_> = cras
11432 .into_iter()
11433 .map(|mut cra| {
11434 cra.scope_id = scope_id.clone();
11435 cra
11436 })
11437 .collect();
11438 snapshot.combined_risk_assessments.extend(cras_with_scope);
11439 }
11440
11441 let significant_count = snapshot
11442 .combined_risk_assessments
11443 .iter()
11444 .filter(|c| c.significant_risk)
11445 .count();
11446 let high_cra_count = snapshot
11447 .combined_risk_assessments
11448 .iter()
11449 .filter(|c| {
11450 matches!(
11451 c.combined_risk,
11452 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11453 )
11454 })
11455 .count();
11456
11457 info!(
11458 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11459 snapshot.combined_risk_assessments.len(),
11460 significant_count,
11461 high_cra_count,
11462 );
11463 }
11464
11465 {
11469 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11470
11471 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11472
11473 for company in &self.config.companies {
11475 let entity_code = company.code.clone();
11476
11477 let tolerable_error = snapshot
11479 .materiality_calculations
11480 .iter()
11481 .find(|m| m.entity_code == entity_code)
11482 .map(|m| m.tolerable_error);
11483
11484 let entity_cras: Vec<_> = snapshot
11486 .combined_risk_assessments
11487 .iter()
11488 .filter(|c| c.entity_code == entity_code)
11489 .cloned()
11490 .collect();
11491
11492 if !entity_cras.is_empty() {
11493 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11494 snapshot.sampling_plans.extend(plans);
11495 snapshot.sampled_items.extend(items);
11496 }
11497 }
11498
11499 let misstatement_count = snapshot
11500 .sampled_items
11501 .iter()
11502 .filter(|i| i.misstatement_found)
11503 .count();
11504
11505 info!(
11506 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11507 snapshot.sampling_plans.len(),
11508 snapshot.sampled_items.len(),
11509 misstatement_count,
11510 );
11511 }
11512
11513 {
11517 use datasynth_generators::audit::scots_generator::{
11518 ScotsGenerator, ScotsGeneratorConfig,
11519 };
11520
11521 let ic_enabled = self.config.intercompany.enabled;
11522
11523 let config = ScotsGeneratorConfig {
11524 intercompany_enabled: ic_enabled,
11525 ..ScotsGeneratorConfig::default()
11526 };
11527 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11528
11529 for company in &self.config.companies {
11530 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11531 snapshot
11532 .significant_transaction_classes
11533 .extend(entity_scots);
11534 }
11535
11536 let estimation_count = snapshot
11537 .significant_transaction_classes
11538 .iter()
11539 .filter(|s| {
11540 matches!(
11541 s.transaction_type,
11542 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11543 )
11544 })
11545 .count();
11546
11547 info!(
11548 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11549 snapshot.significant_transaction_classes.len(),
11550 estimation_count,
11551 );
11552 }
11553
11554 {
11558 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11559
11560 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11561 let entity_codes: Vec<String> = self
11562 .config
11563 .companies
11564 .iter()
11565 .map(|c| c.code.clone())
11566 .collect();
11567 let unusual_flags =
11568 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11569 info!(
11570 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11571 unusual_flags.len(),
11572 unusual_flags
11573 .iter()
11574 .filter(|f| matches!(
11575 f.severity,
11576 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11577 ))
11578 .count(),
11579 unusual_flags
11580 .iter()
11581 .filter(|f| matches!(
11582 f.severity,
11583 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11584 ))
11585 .count(),
11586 unusual_flags
11587 .iter()
11588 .filter(|f| matches!(
11589 f.severity,
11590 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11591 ))
11592 .count(),
11593 );
11594 snapshot.unusual_items = unusual_flags;
11595 }
11596
11597 {
11601 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11602
11603 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11604 let entity_codes: Vec<String> = self
11605 .config
11606 .companies
11607 .iter()
11608 .map(|c| c.code.clone())
11609 .collect();
11610 let current_period_label = format!("FY{fiscal_year}");
11611 let prior_period_label = format!("FY{}", fiscal_year - 1);
11612 let analytical_rels = ar_gen.generate_for_entities(
11613 &entity_codes,
11614 entries,
11615 ¤t_period_label,
11616 &prior_period_label,
11617 );
11618 let out_of_range = analytical_rels
11619 .iter()
11620 .filter(|r| !r.within_expected_range)
11621 .count();
11622 info!(
11623 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11624 analytical_rels.len(),
11625 out_of_range,
11626 );
11627 snapshot.analytical_relationships = analytical_rels;
11628 }
11629
11630 if let Some(pb) = pb {
11631 pb.finish_with_message(format!(
11632 "Audit data: {} engagements, {} workpapers, {} evidence, \
11633 {} confirmations, {} procedure steps, {} samples, \
11634 {} analytical, {} IA funcs, {} related parties, \
11635 {} component auditors, {} letters, {} subsequent events, \
11636 {} service orgs, {} going concern, {} accounting estimates, \
11637 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11638 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11639 {} unusual items, {} analytical relationships",
11640 snapshot.engagements.len(),
11641 snapshot.workpapers.len(),
11642 snapshot.evidence.len(),
11643 snapshot.confirmations.len(),
11644 snapshot.procedure_steps.len(),
11645 snapshot.samples.len(),
11646 snapshot.analytical_results.len(),
11647 snapshot.ia_functions.len(),
11648 snapshot.related_parties.len(),
11649 snapshot.component_auditors.len(),
11650 snapshot.engagement_letters.len(),
11651 snapshot.subsequent_events.len(),
11652 snapshot.service_organizations.len(),
11653 snapshot.going_concern_assessments.len(),
11654 snapshot.accounting_estimates.len(),
11655 snapshot.audit_opinions.len(),
11656 snapshot.key_audit_matters.len(),
11657 snapshot.sox_302_certifications.len(),
11658 snapshot.sox_404_assessments.len(),
11659 snapshot.materiality_calculations.len(),
11660 snapshot.combined_risk_assessments.len(),
11661 snapshot.sampling_plans.len(),
11662 snapshot.significant_transaction_classes.len(),
11663 snapshot.unusual_items.len(),
11664 snapshot.analytical_relationships.len(),
11665 ));
11666 }
11667
11668 {
11675 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11676 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11677 debug!(
11678 "PCAOB-ISA mappings generated: {} mappings",
11679 snapshot.isa_pcaob_mappings.len()
11680 );
11681 }
11682
11683 {
11690 use datasynth_standards::audit::isa_reference::IsaStandard;
11691 snapshot.isa_mappings = IsaStandard::standard_entries();
11692 debug!(
11693 "ISA standard entries generated: {} standards",
11694 snapshot.isa_mappings.len()
11695 );
11696 }
11697
11698 {
11701 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11702 .engagements
11703 .iter()
11704 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11705 .collect();
11706
11707 for rpt in &mut snapshot.related_party_transactions {
11708 if rpt.journal_entry_id.is_some() {
11709 continue; }
11711 let entity = engagement_by_id
11712 .get(&rpt.engagement_id.to_string())
11713 .copied()
11714 .unwrap_or("");
11715
11716 let best_je = entries
11718 .iter()
11719 .filter(|je| je.header.company_code == entity)
11720 .min_by_key(|je| {
11721 (je.header.posting_date - rpt.transaction_date)
11722 .num_days()
11723 .abs()
11724 });
11725
11726 if let Some(je) = best_je {
11727 rpt.journal_entry_id = Some(je.header.document_id.to_string());
11728 }
11729 }
11730
11731 let linked = snapshot
11732 .related_party_transactions
11733 .iter()
11734 .filter(|t| t.journal_entry_id.is_some())
11735 .count();
11736 debug!(
11737 "Linked {}/{} related party transactions to journal entries",
11738 linked,
11739 snapshot.related_party_transactions.len()
11740 );
11741 }
11742
11743 Ok(snapshot)
11744 }
11745
11746 fn generate_audit_data_with_fsm(
11753 &mut self,
11754 entries: &[JournalEntry],
11755 ) -> SynthResult<AuditSnapshot> {
11756 use datasynth_audit_fsm::{
11757 context::EngagementContext,
11758 engine::AuditFsmEngine,
11759 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11760 };
11761 use rand::SeedableRng;
11762 use rand_chacha::ChaCha8Rng;
11763
11764 info!("Audit FSM: generating audit data via FSM engine");
11765
11766 let fsm_config = self
11767 .config
11768 .audit
11769 .fsm
11770 .as_ref()
11771 .expect("FSM config must be present when FSM is enabled");
11772
11773 let bwp = match fsm_config.blueprint.as_str() {
11775 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11776 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11777 _ => {
11778 warn!(
11779 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11780 fsm_config.blueprint
11781 );
11782 BlueprintWithPreconditions::load_builtin_fsa()
11783 }
11784 }
11785 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11786
11787 let overlay = match fsm_config.overlay.as_str() {
11789 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11790 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11791 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11792 _ => {
11793 warn!(
11794 "Unknown FSM overlay '{}', falling back to builtin:default",
11795 fsm_config.overlay
11796 );
11797 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11798 }
11799 }
11800 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11801
11802 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11804 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11805 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11806
11807 let company = self.config.companies.first();
11809 let company_code = company
11810 .map(|c| c.code.clone())
11811 .unwrap_or_else(|| "UNKNOWN".to_string());
11812 let company_name = company
11813 .map(|c| c.name.clone())
11814 .unwrap_or_else(|| "Unknown Company".to_string());
11815 let currency = company
11816 .map(|c| c.currency.clone())
11817 .unwrap_or_else(|| "USD".to_string());
11818
11819 let entity_entries: Vec<_> = entries
11821 .iter()
11822 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11823 .cloned()
11824 .collect();
11825 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
11829 .iter()
11830 .flat_map(|e| e.lines.iter())
11831 .filter(|l| l.account_code.starts_with('4'))
11832 .map(|l| l.credit_amount - l.debit_amount)
11833 .sum();
11834
11835 let total_assets: rust_decimal::Decimal = entries
11836 .iter()
11837 .flat_map(|e| e.lines.iter())
11838 .filter(|l| l.account_code.starts_with('1'))
11839 .map(|l| l.debit_amount - l.credit_amount)
11840 .sum();
11841
11842 let total_expenses: rust_decimal::Decimal = entries
11843 .iter()
11844 .flat_map(|e| e.lines.iter())
11845 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11846 .map(|l| l.debit_amount)
11847 .sum();
11848
11849 let equity: rust_decimal::Decimal = entries
11850 .iter()
11851 .flat_map(|e| e.lines.iter())
11852 .filter(|l| l.account_code.starts_with('3'))
11853 .map(|l| l.credit_amount - l.debit_amount)
11854 .sum();
11855
11856 let total_debt: rust_decimal::Decimal = entries
11857 .iter()
11858 .flat_map(|e| e.lines.iter())
11859 .filter(|l| l.account_code.starts_with('2'))
11860 .map(|l| l.credit_amount - l.debit_amount)
11861 .sum();
11862
11863 let pretax_income = total_revenue - total_expenses;
11864
11865 let cogs: rust_decimal::Decimal = entries
11866 .iter()
11867 .flat_map(|e| e.lines.iter())
11868 .filter(|l| l.account_code.starts_with('5'))
11869 .map(|l| l.debit_amount)
11870 .sum();
11871 let gross_profit = total_revenue - cogs;
11872
11873 let current_assets: rust_decimal::Decimal = entries
11874 .iter()
11875 .flat_map(|e| e.lines.iter())
11876 .filter(|l| {
11877 l.account_code.starts_with("10")
11878 || l.account_code.starts_with("11")
11879 || l.account_code.starts_with("12")
11880 || l.account_code.starts_with("13")
11881 })
11882 .map(|l| l.debit_amount - l.credit_amount)
11883 .sum();
11884 let current_liabilities: rust_decimal::Decimal = entries
11885 .iter()
11886 .flat_map(|e| e.lines.iter())
11887 .filter(|l| {
11888 l.account_code.starts_with("20")
11889 || l.account_code.starts_with("21")
11890 || l.account_code.starts_with("22")
11891 })
11892 .map(|l| l.credit_amount - l.debit_amount)
11893 .sum();
11894 let working_capital = current_assets - current_liabilities;
11895
11896 let depreciation: rust_decimal::Decimal = entries
11897 .iter()
11898 .flat_map(|e| e.lines.iter())
11899 .filter(|l| l.account_code.starts_with("60"))
11900 .map(|l| l.debit_amount)
11901 .sum();
11902 let operating_cash_flow = pretax_income + depreciation;
11903
11904 let accounts: Vec<String> = self
11906 .coa
11907 .as_ref()
11908 .map(|coa| {
11909 coa.get_postable_accounts()
11910 .iter()
11911 .map(|acc| acc.account_code().to_string())
11912 .collect()
11913 })
11914 .unwrap_or_default();
11915
11916 let team_member_ids: Vec<String> = self
11918 .master_data
11919 .employees
11920 .iter()
11921 .take(8) .map(|e| e.employee_id.clone())
11923 .collect();
11924 let team_member_pairs: Vec<(String, String)> = self
11925 .master_data
11926 .employees
11927 .iter()
11928 .take(8)
11929 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11930 .collect();
11931
11932 let vendor_names: Vec<String> = self
11933 .master_data
11934 .vendors
11935 .iter()
11936 .map(|v| v.name.clone())
11937 .collect();
11938 let customer_names: Vec<String> = self
11939 .master_data
11940 .customers
11941 .iter()
11942 .map(|c| c.name.clone())
11943 .collect();
11944
11945 let entity_codes: Vec<String> = self
11946 .config
11947 .companies
11948 .iter()
11949 .map(|c| c.code.clone())
11950 .collect();
11951
11952 let journal_entry_ids: Vec<String> = entries
11954 .iter()
11955 .take(50)
11956 .map(|e| e.header.document_id.to_string())
11957 .collect();
11958
11959 let mut account_balances = std::collections::HashMap::<String, f64>::new();
11961 for entry in entries {
11962 for line in &entry.lines {
11963 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
11964 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
11965 *account_balances
11966 .entry(line.account_code.clone())
11967 .or_insert(0.0) += debit_f64 - credit_f64;
11968 }
11969 }
11970
11971 let control_ids: Vec<String> = Vec::new();
11976 let anomaly_refs: Vec<String> = Vec::new();
11977
11978 let mut context = EngagementContext {
11979 company_code,
11980 company_name,
11981 fiscal_year: start_date.year(),
11982 currency,
11983 total_revenue,
11984 total_assets,
11985 engagement_start: start_date,
11986 report_date: period_end,
11987 pretax_income,
11988 equity,
11989 gross_profit,
11990 working_capital,
11991 operating_cash_flow,
11992 total_debt,
11993 team_member_ids,
11994 team_member_pairs,
11995 accounts,
11996 vendor_names,
11997 customer_names,
11998 journal_entry_ids,
11999 account_balances,
12000 control_ids,
12001 anomaly_refs,
12002 journal_entries: entries.to_vec(),
12003 is_us_listed: false,
12004 entity_codes,
12005 auditor_firm_name: "DataSynth Audit LLP".into(),
12006 accounting_framework: self
12007 .config
12008 .accounting_standards
12009 .framework
12010 .map(|f| match f {
12011 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12012 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12013 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12014 "French GAAP"
12015 }
12016 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12017 "German GAAP"
12018 }
12019 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12020 "Dual Reporting"
12021 }
12022 })
12023 .unwrap_or("IFRS")
12024 .into(),
12025 };
12026
12027 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12029 let rng = ChaCha8Rng::seed_from_u64(seed);
12030 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12031
12032 let mut result = engine
12033 .run_engagement(&context)
12034 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12035
12036 info!(
12037 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12038 {} phases completed, duration {:.1}h",
12039 result.event_log.len(),
12040 result.artifacts.total_artifacts(),
12041 result.anomalies.len(),
12042 result.phases_completed.len(),
12043 result.total_duration_hours,
12044 );
12045
12046 let tb_entity = context.company_code.clone();
12048 let tb_fy = context.fiscal_year;
12049 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12050 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12051 entries,
12052 &tb_entity,
12053 tb_fy,
12054 self.coa.as_ref().map(|c| c.as_ref()),
12055 );
12056
12057 let bag = result.artifacts;
12059 let mut snapshot = AuditSnapshot {
12060 engagements: bag.engagements,
12061 engagement_letters: bag.engagement_letters,
12062 materiality_calculations: bag.materiality_calculations,
12063 risk_assessments: bag.risk_assessments,
12064 combined_risk_assessments: bag.combined_risk_assessments,
12065 workpapers: bag.workpapers,
12066 evidence: bag.evidence,
12067 findings: bag.findings,
12068 judgments: bag.judgments,
12069 sampling_plans: bag.sampling_plans,
12070 sampled_items: bag.sampled_items,
12071 analytical_results: bag.analytical_results,
12072 going_concern_assessments: bag.going_concern_assessments,
12073 subsequent_events: bag.subsequent_events,
12074 audit_opinions: bag.audit_opinions,
12075 key_audit_matters: bag.key_audit_matters,
12076 procedure_steps: bag.procedure_steps,
12077 samples: bag.samples,
12078 confirmations: bag.confirmations,
12079 confirmation_responses: bag.confirmation_responses,
12080 fsm_event_trail: Some(result.event_log),
12082 ..Default::default()
12084 };
12085
12086 {
12088 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12089 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12090 }
12091 {
12092 use datasynth_standards::audit::isa_reference::IsaStandard;
12093 snapshot.isa_mappings = IsaStandard::standard_entries();
12094 }
12095
12096 info!(
12097 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12098 {} risk assessments, {} findings, {} materiality calcs",
12099 snapshot.engagements.len(),
12100 snapshot.workpapers.len(),
12101 snapshot.evidence.len(),
12102 snapshot.risk_assessments.len(),
12103 snapshot.findings.len(),
12104 snapshot.materiality_calculations.len(),
12105 );
12106
12107 Ok(snapshot)
12108 }
12109
12110 fn export_graphs(
12117 &mut self,
12118 entries: &[JournalEntry],
12119 _coa: &Arc<ChartOfAccounts>,
12120 stats: &mut EnhancedGenerationStatistics,
12121 ) -> SynthResult<GraphExportSnapshot> {
12122 let pb = self.create_progress_bar(100, "Exporting Graphs");
12123
12124 let mut snapshot = GraphExportSnapshot::default();
12125
12126 let output_dir = self
12128 .output_path
12129 .clone()
12130 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12131 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12132
12133 for graph_type in &self.config.graph_export.graph_types {
12135 if let Some(pb) = &pb {
12136 pb.inc(10);
12137 }
12138
12139 let graph_config = TransactionGraphConfig {
12141 include_vendors: false,
12142 include_customers: false,
12143 create_debit_credit_edges: true,
12144 include_document_nodes: graph_type.include_document_nodes,
12145 min_edge_weight: graph_type.min_edge_weight,
12146 aggregate_parallel_edges: graph_type.aggregate_edges,
12147 framework: None,
12148 };
12149
12150 let mut builder = TransactionGraphBuilder::new(graph_config);
12151 builder.add_journal_entries(entries);
12152 let graph = builder.build();
12153
12154 stats.graph_node_count += graph.node_count();
12156 stats.graph_edge_count += graph.edge_count();
12157
12158 if let Some(pb) = &pb {
12159 pb.inc(40);
12160 }
12161
12162 for format in &self.config.graph_export.formats {
12164 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12165
12166 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12168 warn!("Failed to create graph output directory: {}", e);
12169 continue;
12170 }
12171
12172 match format {
12173 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12174 let pyg_config = PyGExportConfig {
12175 common: datasynth_graph::CommonExportConfig {
12176 export_node_features: true,
12177 export_edge_features: true,
12178 export_node_labels: true,
12179 export_edge_labels: true,
12180 export_masks: true,
12181 train_ratio: self.config.graph_export.train_ratio,
12182 val_ratio: self.config.graph_export.validation_ratio,
12183 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12184 },
12185 one_hot_categoricals: false,
12186 };
12187
12188 let exporter = PyGExporter::new(pyg_config);
12189 match exporter.export(&graph, &format_dir) {
12190 Ok(metadata) => {
12191 snapshot.exports.insert(
12192 format!("{}_{}", graph_type.name, "pytorch_geometric"),
12193 GraphExportInfo {
12194 name: graph_type.name.clone(),
12195 format: "pytorch_geometric".to_string(),
12196 output_path: format_dir.clone(),
12197 node_count: metadata.num_nodes,
12198 edge_count: metadata.num_edges,
12199 },
12200 );
12201 snapshot.graph_count += 1;
12202 }
12203 Err(e) => {
12204 warn!("Failed to export PyTorch Geometric graph: {}", e);
12205 }
12206 }
12207 }
12208 datasynth_config::schema::GraphExportFormat::Neo4j => {
12209 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12210
12211 let neo4j_config = Neo4jExportConfig {
12212 export_node_properties: true,
12213 export_edge_properties: true,
12214 export_features: true,
12215 generate_cypher: true,
12216 generate_admin_import: true,
12217 database_name: "synth".to_string(),
12218 cypher_batch_size: 1000,
12219 };
12220
12221 let exporter = Neo4jExporter::new(neo4j_config);
12222 match exporter.export(&graph, &format_dir) {
12223 Ok(metadata) => {
12224 snapshot.exports.insert(
12225 format!("{}_{}", graph_type.name, "neo4j"),
12226 GraphExportInfo {
12227 name: graph_type.name.clone(),
12228 format: "neo4j".to_string(),
12229 output_path: format_dir.clone(),
12230 node_count: metadata.num_nodes,
12231 edge_count: metadata.num_edges,
12232 },
12233 );
12234 snapshot.graph_count += 1;
12235 }
12236 Err(e) => {
12237 warn!("Failed to export Neo4j graph: {}", e);
12238 }
12239 }
12240 }
12241 datasynth_config::schema::GraphExportFormat::Dgl => {
12242 use datasynth_graph::{DGLExportConfig, DGLExporter};
12243
12244 let dgl_config = DGLExportConfig {
12245 common: datasynth_graph::CommonExportConfig {
12246 export_node_features: true,
12247 export_edge_features: true,
12248 export_node_labels: true,
12249 export_edge_labels: true,
12250 export_masks: true,
12251 train_ratio: self.config.graph_export.train_ratio,
12252 val_ratio: self.config.graph_export.validation_ratio,
12253 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12254 },
12255 heterogeneous: self.config.graph_export.dgl.heterogeneous,
12256 include_pickle_script: true, };
12258
12259 let exporter = DGLExporter::new(dgl_config);
12260 match exporter.export(&graph, &format_dir) {
12261 Ok(metadata) => {
12262 snapshot.exports.insert(
12263 format!("{}_{}", graph_type.name, "dgl"),
12264 GraphExportInfo {
12265 name: graph_type.name.clone(),
12266 format: "dgl".to_string(),
12267 output_path: format_dir.clone(),
12268 node_count: metadata.common.num_nodes,
12269 edge_count: metadata.common.num_edges,
12270 },
12271 );
12272 snapshot.graph_count += 1;
12273 }
12274 Err(e) => {
12275 warn!("Failed to export DGL graph: {}", e);
12276 }
12277 }
12278 }
12279 datasynth_config::schema::GraphExportFormat::RustGraph => {
12280 use datasynth_graph::{
12281 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12282 };
12283
12284 let rustgraph_config = RustGraphExportConfig {
12285 include_features: true,
12286 include_temporal: true,
12287 include_labels: true,
12288 source_name: "datasynth".to_string(),
12289 batch_id: None,
12290 output_format: RustGraphOutputFormat::JsonLines,
12291 export_node_properties: true,
12292 export_edge_properties: true,
12293 pretty_print: false,
12294 };
12295
12296 let exporter = RustGraphExporter::new(rustgraph_config);
12297 match exporter.export(&graph, &format_dir) {
12298 Ok(metadata) => {
12299 snapshot.exports.insert(
12300 format!("{}_{}", graph_type.name, "rustgraph"),
12301 GraphExportInfo {
12302 name: graph_type.name.clone(),
12303 format: "rustgraph".to_string(),
12304 output_path: format_dir.clone(),
12305 node_count: metadata.num_nodes,
12306 edge_count: metadata.num_edges,
12307 },
12308 );
12309 snapshot.graph_count += 1;
12310 }
12311 Err(e) => {
12312 warn!("Failed to export RustGraph: {}", e);
12313 }
12314 }
12315 }
12316 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12317 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12319 }
12320 }
12321 }
12322
12323 if let Some(pb) = &pb {
12324 pb.inc(40);
12325 }
12326 }
12327
12328 stats.graph_export_count = snapshot.graph_count;
12329 snapshot.exported = snapshot.graph_count > 0;
12330
12331 if let Some(pb) = pb {
12332 pb.finish_with_message(format!(
12333 "Graphs exported: {} graphs ({} nodes, {} edges)",
12334 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12335 ));
12336 }
12337
12338 Ok(snapshot)
12339 }
12340
12341 fn build_additional_graphs(
12346 &self,
12347 banking: &BankingSnapshot,
12348 intercompany: &IntercompanySnapshot,
12349 entries: &[JournalEntry],
12350 stats: &mut EnhancedGenerationStatistics,
12351 ) {
12352 let output_dir = self
12353 .output_path
12354 .clone()
12355 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12356 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12357
12358 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12360 info!("Phase 10c: Building banking network graph");
12361 let config = BankingGraphConfig::default();
12362 let mut builder = BankingGraphBuilder::new(config);
12363 builder.add_customers(&banking.customers);
12364 builder.add_accounts(&banking.accounts, &banking.customers);
12365 builder.add_transactions(&banking.transactions);
12366 let graph = builder.build();
12367
12368 let node_count = graph.node_count();
12369 let edge_count = graph.edge_count();
12370 stats.graph_node_count += node_count;
12371 stats.graph_edge_count += edge_count;
12372
12373 for format in &self.config.graph_export.formats {
12375 if matches!(
12376 format,
12377 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12378 ) {
12379 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12380 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12381 warn!("Failed to create banking graph output dir: {}", e);
12382 continue;
12383 }
12384 let pyg_config = PyGExportConfig::default();
12385 let exporter = PyGExporter::new(pyg_config);
12386 if let Err(e) = exporter.export(&graph, &format_dir) {
12387 warn!("Failed to export banking graph as PyG: {}", e);
12388 } else {
12389 info!(
12390 "Banking network graph exported: {} nodes, {} edges",
12391 node_count, edge_count
12392 );
12393 }
12394 }
12395 }
12396 }
12397
12398 let approval_entries: Vec<_> = entries
12400 .iter()
12401 .filter(|je| je.header.approval_workflow.is_some())
12402 .collect();
12403
12404 if !approval_entries.is_empty() {
12405 info!(
12406 "Phase 10c: Building approval network graph ({} entries with approvals)",
12407 approval_entries.len()
12408 );
12409 let config = ApprovalGraphConfig::default();
12410 let mut builder = ApprovalGraphBuilder::new(config);
12411
12412 for je in &approval_entries {
12413 if let Some(ref wf) = je.header.approval_workflow {
12414 for action in &wf.actions {
12415 let record = datasynth_core::models::ApprovalRecord {
12416 approval_id: format!(
12417 "APR-{}-{}",
12418 je.header.document_id, action.approval_level
12419 ),
12420 document_number: je.header.document_id.to_string(),
12421 document_type: "JE".to_string(),
12422 company_code: je.company_code().to_string(),
12423 requester_id: wf.preparer_id.clone(),
12424 requester_name: Some(wf.preparer_name.clone()),
12425 approver_id: action.actor_id.clone(),
12426 approver_name: action.actor_name.clone(),
12427 approval_date: je.posting_date(),
12428 action: format!("{:?}", action.action),
12429 amount: wf.amount,
12430 approval_limit: None,
12431 comments: action.comments.clone(),
12432 delegation_from: None,
12433 is_auto_approved: false,
12434 };
12435 builder.add_approval(&record);
12436 }
12437 }
12438 }
12439
12440 let graph = builder.build();
12441 let node_count = graph.node_count();
12442 let edge_count = graph.edge_count();
12443 stats.graph_node_count += node_count;
12444 stats.graph_edge_count += edge_count;
12445
12446 for format in &self.config.graph_export.formats {
12448 if matches!(
12449 format,
12450 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12451 ) {
12452 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12453 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12454 warn!("Failed to create approval graph output dir: {}", e);
12455 continue;
12456 }
12457 let pyg_config = PyGExportConfig::default();
12458 let exporter = PyGExporter::new(pyg_config);
12459 if let Err(e) = exporter.export(&graph, &format_dir) {
12460 warn!("Failed to export approval graph as PyG: {}", e);
12461 } else {
12462 info!(
12463 "Approval network graph exported: {} nodes, {} edges",
12464 node_count, edge_count
12465 );
12466 }
12467 }
12468 }
12469 }
12470
12471 if self.config.companies.len() >= 2 {
12473 info!(
12474 "Phase 10c: Building entity relationship graph ({} companies)",
12475 self.config.companies.len()
12476 );
12477
12478 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12479 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12480
12481 let parent_code = &self.config.companies[0].code;
12483 let mut companies: Vec<datasynth_core::models::Company> =
12484 Vec::with_capacity(self.config.companies.len());
12485
12486 let first = &self.config.companies[0];
12488 companies.push(datasynth_core::models::Company::parent(
12489 &first.code,
12490 &first.name,
12491 &first.country,
12492 &first.currency,
12493 ));
12494
12495 for cc in self.config.companies.iter().skip(1) {
12497 companies.push(datasynth_core::models::Company::subsidiary(
12498 &cc.code,
12499 &cc.name,
12500 &cc.country,
12501 &cc.currency,
12502 parent_code,
12503 rust_decimal::Decimal::from(100),
12504 ));
12505 }
12506
12507 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12509 self.config
12510 .companies
12511 .iter()
12512 .skip(1)
12513 .enumerate()
12514 .map(|(i, cc)| {
12515 let mut rel =
12516 datasynth_core::models::intercompany::IntercompanyRelationship::new(
12517 format!("REL{:03}", i + 1),
12518 parent_code.clone(),
12519 cc.code.clone(),
12520 rust_decimal::Decimal::from(100),
12521 start_date,
12522 );
12523 rel.functional_currency = cc.currency.clone();
12524 rel
12525 })
12526 .collect();
12527
12528 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12529 builder.add_companies(&companies);
12530 builder.add_ownership_relationships(&relationships);
12531
12532 for pair in &intercompany.matched_pairs {
12534 builder.add_intercompany_edge(
12535 &pair.seller_company,
12536 &pair.buyer_company,
12537 pair.amount,
12538 &format!("{:?}", pair.transaction_type),
12539 );
12540 }
12541
12542 let graph = builder.build();
12543 let node_count = graph.node_count();
12544 let edge_count = graph.edge_count();
12545 stats.graph_node_count += node_count;
12546 stats.graph_edge_count += edge_count;
12547
12548 for format in &self.config.graph_export.formats {
12550 if matches!(
12551 format,
12552 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12553 ) {
12554 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12555 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12556 warn!("Failed to create entity graph output dir: {}", e);
12557 continue;
12558 }
12559 let pyg_config = PyGExportConfig::default();
12560 let exporter = PyGExporter::new(pyg_config);
12561 if let Err(e) = exporter.export(&graph, &format_dir) {
12562 warn!("Failed to export entity graph as PyG: {}", e);
12563 } else {
12564 info!(
12565 "Entity relationship graph exported: {} nodes, {} edges",
12566 node_count, edge_count
12567 );
12568 }
12569 }
12570 }
12571 } else {
12572 debug!(
12573 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12574 self.config.companies.len()
12575 );
12576 }
12577 }
12578
12579 #[allow(clippy::too_many_arguments)]
12586 fn export_hypergraph(
12587 &self,
12588 coa: &Arc<ChartOfAccounts>,
12589 entries: &[JournalEntry],
12590 document_flows: &DocumentFlowSnapshot,
12591 sourcing: &SourcingSnapshot,
12592 hr: &HrSnapshot,
12593 manufacturing: &ManufacturingSnapshot,
12594 banking: &BankingSnapshot,
12595 audit: &AuditSnapshot,
12596 financial_reporting: &FinancialReportingSnapshot,
12597 ocpm: &OcpmSnapshot,
12598 compliance: &ComplianceRegulationsSnapshot,
12599 stats: &mut EnhancedGenerationStatistics,
12600 ) -> SynthResult<HypergraphExportInfo> {
12601 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12602 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12603 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12604 use datasynth_graph::models::hypergraph::AggregationStrategy;
12605
12606 let hg_settings = &self.config.graph_export.hypergraph;
12607
12608 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12610 "truncate" => AggregationStrategy::Truncate,
12611 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12612 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12613 "importance_sample" => AggregationStrategy::ImportanceSample,
12614 _ => AggregationStrategy::PoolByCounterparty,
12615 };
12616
12617 let builder_config = HypergraphConfig {
12618 max_nodes: hg_settings.max_nodes,
12619 aggregation_strategy,
12620 include_coso: hg_settings.governance_layer.include_coso,
12621 include_controls: hg_settings.governance_layer.include_controls,
12622 include_sox: hg_settings.governance_layer.include_sox,
12623 include_vendors: hg_settings.governance_layer.include_vendors,
12624 include_customers: hg_settings.governance_layer.include_customers,
12625 include_employees: hg_settings.governance_layer.include_employees,
12626 include_p2p: hg_settings.process_layer.include_p2p,
12627 include_o2c: hg_settings.process_layer.include_o2c,
12628 include_s2c: hg_settings.process_layer.include_s2c,
12629 include_h2r: hg_settings.process_layer.include_h2r,
12630 include_mfg: hg_settings.process_layer.include_mfg,
12631 include_bank: hg_settings.process_layer.include_bank,
12632 include_audit: hg_settings.process_layer.include_audit,
12633 include_r2r: hg_settings.process_layer.include_r2r,
12634 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12635 docs_per_counterparty_threshold: hg_settings
12636 .process_layer
12637 .docs_per_counterparty_threshold,
12638 include_accounts: hg_settings.accounting_layer.include_accounts,
12639 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12640 include_cross_layer_edges: hg_settings.cross_layer.enabled,
12641 include_compliance: self.config.compliance_regulations.enabled,
12642 include_tax: true,
12643 include_treasury: true,
12644 include_esg: true,
12645 include_project: true,
12646 include_intercompany: true,
12647 include_temporal_events: true,
12648 };
12649
12650 let mut builder = HypergraphBuilder::new(builder_config);
12651
12652 builder.add_coso_framework();
12654
12655 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12658 let controls = InternalControl::standard_controls();
12659 builder.add_controls(&controls);
12660 }
12661
12662 builder.add_vendors(&self.master_data.vendors);
12664 builder.add_customers(&self.master_data.customers);
12665 builder.add_employees(&self.master_data.employees);
12666
12667 builder.add_p2p_documents(
12669 &document_flows.purchase_orders,
12670 &document_flows.goods_receipts,
12671 &document_flows.vendor_invoices,
12672 &document_flows.payments,
12673 );
12674 builder.add_o2c_documents(
12675 &document_flows.sales_orders,
12676 &document_flows.deliveries,
12677 &document_flows.customer_invoices,
12678 );
12679 builder.add_s2c_documents(
12680 &sourcing.sourcing_projects,
12681 &sourcing.qualifications,
12682 &sourcing.rfx_events,
12683 &sourcing.bids,
12684 &sourcing.bid_evaluations,
12685 &sourcing.contracts,
12686 );
12687 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12688 builder.add_mfg_documents(
12689 &manufacturing.production_orders,
12690 &manufacturing.quality_inspections,
12691 &manufacturing.cycle_counts,
12692 );
12693 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12694 builder.add_audit_documents(
12695 &audit.engagements,
12696 &audit.workpapers,
12697 &audit.findings,
12698 &audit.evidence,
12699 &audit.risk_assessments,
12700 &audit.judgments,
12701 &audit.materiality_calculations,
12702 &audit.audit_opinions,
12703 &audit.going_concern_assessments,
12704 );
12705 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12706
12707 if let Some(ref event_log) = ocpm.event_log {
12709 builder.add_ocpm_events(event_log);
12710 }
12711
12712 if self.config.compliance_regulations.enabled
12714 && hg_settings.governance_layer.include_controls
12715 {
12716 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12718 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12719 .standard_records
12720 .iter()
12721 .filter_map(|r| {
12722 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12723 registry.get(&sid).cloned()
12724 })
12725 .collect();
12726
12727 builder.add_compliance_regulations(
12728 &standards,
12729 &compliance.findings,
12730 &compliance.filings,
12731 );
12732 }
12733
12734 builder.add_accounts(coa);
12736 builder.add_journal_entries_as_hyperedges(entries);
12737
12738 let hypergraph = builder.build();
12740
12741 let output_dir = self
12743 .output_path
12744 .clone()
12745 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12746 let hg_dir = output_dir
12747 .join(&self.config.graph_export.output_subdirectory)
12748 .join(&hg_settings.output_subdirectory);
12749
12750 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12752 "unified" => {
12753 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12754 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12755 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12756 })?;
12757 (
12758 metadata.num_nodes,
12759 metadata.num_edges,
12760 metadata.num_hyperedges,
12761 )
12762 }
12763 _ => {
12764 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12766 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12767 SynthError::generation(format!("Hypergraph export failed: {e}"))
12768 })?;
12769 (
12770 metadata.num_nodes,
12771 metadata.num_edges,
12772 metadata.num_hyperedges,
12773 )
12774 }
12775 };
12776
12777 #[cfg(feature = "streaming")]
12779 if let Some(ref target_url) = hg_settings.stream_target {
12780 use crate::stream_client::{StreamClient, StreamConfig};
12781 use std::io::Write as _;
12782
12783 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12784 let stream_config = StreamConfig {
12785 target_url: target_url.clone(),
12786 batch_size: hg_settings.stream_batch_size,
12787 api_key,
12788 ..StreamConfig::default()
12789 };
12790
12791 match StreamClient::new(stream_config) {
12792 Ok(mut client) => {
12793 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12794 match exporter.export_to_writer(&hypergraph, &mut client) {
12795 Ok(_) => {
12796 if let Err(e) = client.flush() {
12797 warn!("Failed to flush stream client: {}", e);
12798 } else {
12799 info!("Streamed {} records to {}", client.total_sent(), target_url);
12800 }
12801 }
12802 Err(e) => {
12803 warn!("Streaming export failed: {}", e);
12804 }
12805 }
12806 }
12807 Err(e) => {
12808 warn!("Failed to create stream client: {}", e);
12809 }
12810 }
12811 }
12812
12813 stats.graph_node_count += num_nodes;
12815 stats.graph_edge_count += num_edges;
12816 stats.graph_export_count += 1;
12817
12818 Ok(HypergraphExportInfo {
12819 node_count: num_nodes,
12820 edge_count: num_edges,
12821 hyperedge_count: num_hyperedges,
12822 output_path: hg_dir,
12823 })
12824 }
12825
12826 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12831 let pb = self.create_progress_bar(100, "Generating Banking Data");
12832
12833 let orchestrator = BankingOrchestratorBuilder::new()
12835 .config(self.config.banking.clone())
12836 .seed(self.seed + 9000)
12837 .country_pack(self.primary_pack().clone())
12838 .build();
12839
12840 if let Some(pb) = &pb {
12841 pb.inc(10);
12842 }
12843
12844 let result = orchestrator.generate();
12846
12847 if let Some(pb) = &pb {
12848 pb.inc(90);
12849 pb.finish_with_message(format!(
12850 "Banking: {} customers, {} transactions",
12851 result.customers.len(),
12852 result.transactions.len()
12853 ));
12854 }
12855
12856 let mut banking_customers = result.customers;
12861 let core_customers = &self.master_data.customers;
12862 if !core_customers.is_empty() {
12863 for (i, bc) in banking_customers.iter_mut().enumerate() {
12864 let core = &core_customers[i % core_customers.len()];
12865 bc.name = CustomerName::business(&core.name);
12866 bc.residence_country = core.country.clone();
12867 bc.enterprise_customer_id = Some(core.customer_id.clone());
12868 }
12869 debug!(
12870 "Cross-referenced {} banking customers with {} core customers",
12871 banking_customers.len(),
12872 core_customers.len()
12873 );
12874 }
12875
12876 Ok(BankingSnapshot {
12877 customers: banking_customers,
12878 accounts: result.accounts,
12879 transactions: result.transactions,
12880 transaction_labels: result.transaction_labels,
12881 customer_labels: result.customer_labels,
12882 account_labels: result.account_labels,
12883 relationship_labels: result.relationship_labels,
12884 narratives: result.narratives,
12885 suspicious_count: result.stats.suspicious_count,
12886 scenario_count: result.scenarios.len(),
12887 })
12888 }
12889
12890 fn calculate_total_transactions(&self) -> u64 {
12892 let months = self.config.global.period_months as f64;
12893 self.config
12894 .companies
12895 .iter()
12896 .map(|c| {
12897 let annual = c.annual_transaction_volume.count() as f64;
12898 let weighted = annual * c.volume_weight;
12899 (weighted * months / 12.0) as u64
12900 })
12901 .sum()
12902 }
12903
12904 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12906 if !self.phase_config.show_progress {
12907 return None;
12908 }
12909
12910 let pb = if let Some(mp) = &self.multi_progress {
12911 mp.add(ProgressBar::new(total))
12912 } else {
12913 ProgressBar::new(total)
12914 };
12915
12916 pb.set_style(
12917 ProgressStyle::default_bar()
12918 .template(&format!(
12919 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12920 ))
12921 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12922 .progress_chars("#>-"),
12923 );
12924
12925 Some(pb)
12926 }
12927
12928 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12930 self.coa.clone()
12931 }
12932
12933 pub fn get_master_data(&self) -> &MasterDataSnapshot {
12935 &self.master_data
12936 }
12937
12938 fn phase_compliance_regulations(
12940 &mut self,
12941 _stats: &mut EnhancedGenerationStatistics,
12942 ) -> SynthResult<ComplianceRegulationsSnapshot> {
12943 if !self.phase_config.generate_compliance_regulations {
12944 return Ok(ComplianceRegulationsSnapshot::default());
12945 }
12946
12947 info!("Phase: Generating Compliance Regulations Data");
12948
12949 let cr_config = &self.config.compliance_regulations;
12950
12951 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
12953 self.config
12954 .companies
12955 .iter()
12956 .map(|c| c.country.clone())
12957 .collect::<std::collections::HashSet<_>>()
12958 .into_iter()
12959 .collect()
12960 } else {
12961 cr_config.jurisdictions.clone()
12962 };
12963
12964 let fallback_date =
12966 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
12967 let reference_date = cr_config
12968 .reference_date
12969 .as_ref()
12970 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
12971 .unwrap_or_else(|| {
12972 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12973 .unwrap_or(fallback_date)
12974 });
12975
12976 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
12978 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
12979 let cross_reference_records = reg_gen.generate_cross_reference_records();
12980 let jurisdiction_records =
12981 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
12982
12983 info!(
12984 " Standards: {} records, {} cross-references, {} jurisdictions",
12985 standard_records.len(),
12986 cross_reference_records.len(),
12987 jurisdiction_records.len()
12988 );
12989
12990 let audit_procedures = if cr_config.audit_procedures.enabled {
12992 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
12993 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
12994 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
12995 confidence_level: cr_config.audit_procedures.confidence_level,
12996 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
12997 };
12998 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
12999 self.seed + 9000,
13000 proc_config,
13001 );
13002 let registry = reg_gen.registry();
13003 let mut all_procs = Vec::new();
13004 for jurisdiction in &jurisdictions {
13005 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13006 all_procs.extend(procs);
13007 }
13008 info!(" Audit procedures: {}", all_procs.len());
13009 all_procs
13010 } else {
13011 Vec::new()
13012 };
13013
13014 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13016 let finding_config =
13017 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13018 finding_rate: cr_config.findings.finding_rate,
13019 material_weakness_rate: cr_config.findings.material_weakness_rate,
13020 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13021 generate_remediation: cr_config.findings.generate_remediation,
13022 };
13023 let mut finding_gen =
13024 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13025 self.seed + 9100,
13026 finding_config,
13027 );
13028 let mut all_findings = Vec::new();
13029 for company in &self.config.companies {
13030 let company_findings =
13031 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13032 all_findings.extend(company_findings);
13033 }
13034 info!(" Compliance findings: {}", all_findings.len());
13035 all_findings
13036 } else {
13037 Vec::new()
13038 };
13039
13040 let filings = if cr_config.filings.enabled {
13042 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13043 filing_types: cr_config.filings.filing_types.clone(),
13044 generate_status_progression: cr_config.filings.generate_status_progression,
13045 };
13046 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13047 self.seed + 9200,
13048 filing_config,
13049 );
13050 let company_codes: Vec<String> = self
13051 .config
13052 .companies
13053 .iter()
13054 .map(|c| c.code.clone())
13055 .collect();
13056 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13057 .unwrap_or(fallback_date);
13058 let filings = filing_gen.generate_filings(
13059 &company_codes,
13060 &jurisdictions,
13061 start_date,
13062 self.config.global.period_months,
13063 );
13064 info!(" Regulatory filings: {}", filings.len());
13065 filings
13066 } else {
13067 Vec::new()
13068 };
13069
13070 let compliance_graph = if cr_config.graph.enabled {
13072 let graph_config = datasynth_graph::ComplianceGraphConfig {
13073 include_standard_nodes: cr_config.graph.include_compliance_nodes,
13074 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13075 include_cross_references: cr_config.graph.include_cross_references,
13076 include_supersession_edges: cr_config.graph.include_supersession_edges,
13077 include_account_links: cr_config.graph.include_account_links,
13078 include_control_links: cr_config.graph.include_control_links,
13079 include_company_links: cr_config.graph.include_company_links,
13080 };
13081 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13082
13083 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13085 .iter()
13086 .map(|r| datasynth_graph::StandardNodeInput {
13087 standard_id: r.standard_id.clone(),
13088 title: r.title.clone(),
13089 category: r.category.clone(),
13090 domain: r.domain.clone(),
13091 is_active: r.is_active,
13092 features: vec![if r.is_active { 1.0 } else { 0.0 }],
13093 applicable_account_types: r.applicable_account_types.clone(),
13094 applicable_processes: r.applicable_processes.clone(),
13095 })
13096 .collect();
13097 builder.add_standards(&standard_inputs);
13098
13099 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13101 jurisdiction_records
13102 .iter()
13103 .map(|r| datasynth_graph::JurisdictionNodeInput {
13104 country_code: r.country_code.clone(),
13105 country_name: r.country_name.clone(),
13106 framework: r.accounting_framework.clone(),
13107 standard_count: r.standard_count,
13108 tax_rate: r.statutory_tax_rate,
13109 })
13110 .collect();
13111 builder.add_jurisdictions(&jurisdiction_inputs);
13112
13113 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13115 cross_reference_records
13116 .iter()
13117 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13118 from_standard: r.from_standard.clone(),
13119 to_standard: r.to_standard.clone(),
13120 relationship: r.relationship.clone(),
13121 convergence_level: r.convergence_level,
13122 })
13123 .collect();
13124 builder.add_cross_references(&xref_inputs);
13125
13126 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13128 .iter()
13129 .map(|r| datasynth_graph::JurisdictionMappingInput {
13130 country_code: r.jurisdiction.clone(),
13131 standard_id: r.standard_id.clone(),
13132 })
13133 .collect();
13134 builder.add_jurisdiction_mappings(&mapping_inputs);
13135
13136 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13138 .iter()
13139 .map(|p| datasynth_graph::ProcedureNodeInput {
13140 procedure_id: p.procedure_id.clone(),
13141 standard_id: p.standard_id.clone(),
13142 procedure_type: p.procedure_type.clone(),
13143 sample_size: p.sample_size,
13144 confidence_level: p.confidence_level,
13145 })
13146 .collect();
13147 builder.add_procedures(&proc_inputs);
13148
13149 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13151 .iter()
13152 .map(|f| datasynth_graph::FindingNodeInput {
13153 finding_id: f.finding_id.to_string(),
13154 standard_id: f
13155 .related_standards
13156 .first()
13157 .map(|s| s.as_str().to_string())
13158 .unwrap_or_default(),
13159 severity: f.severity.to_string(),
13160 deficiency_level: f.deficiency_level.to_string(),
13161 severity_score: f.deficiency_level.severity_score(),
13162 control_id: f.control_id.clone(),
13163 affected_accounts: f.affected_accounts.clone(),
13164 })
13165 .collect();
13166 builder.add_findings(&finding_inputs);
13167
13168 if cr_config.graph.include_account_links {
13170 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13171 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13172 for std_record in &standard_records {
13173 if let Some(std_obj) =
13174 registry.get(&datasynth_core::models::compliance::StandardId::parse(
13175 &std_record.standard_id,
13176 ))
13177 {
13178 for acct_type in &std_obj.applicable_account_types {
13179 account_links.push(datasynth_graph::AccountLinkInput {
13180 standard_id: std_record.standard_id.clone(),
13181 account_code: acct_type.clone(),
13182 account_name: acct_type.clone(),
13183 });
13184 }
13185 }
13186 }
13187 builder.add_account_links(&account_links);
13188 }
13189
13190 if cr_config.graph.include_control_links {
13192 let mut control_links = Vec::new();
13193 let sox_like_ids: Vec<String> = standard_records
13195 .iter()
13196 .filter(|r| {
13197 r.standard_id.starts_with("SOX")
13198 || r.standard_id.starts_with("PCAOB-AS-2201")
13199 })
13200 .map(|r| r.standard_id.clone())
13201 .collect();
13202 let control_ids = [
13204 ("C001", "Cash Controls"),
13205 ("C002", "Large Transaction Approval"),
13206 ("C010", "PO Approval"),
13207 ("C011", "Three-Way Match"),
13208 ("C020", "Revenue Recognition"),
13209 ("C021", "Credit Check"),
13210 ("C030", "Manual JE Approval"),
13211 ("C031", "Period Close Review"),
13212 ("C032", "Account Reconciliation"),
13213 ("C040", "Payroll Processing"),
13214 ("C050", "Fixed Asset Capitalization"),
13215 ("C060", "Intercompany Elimination"),
13216 ];
13217 for sox_id in &sox_like_ids {
13218 for (ctrl_id, ctrl_name) in &control_ids {
13219 control_links.push(datasynth_graph::ControlLinkInput {
13220 standard_id: sox_id.clone(),
13221 control_id: ctrl_id.to_string(),
13222 control_name: ctrl_name.to_string(),
13223 });
13224 }
13225 }
13226 builder.add_control_links(&control_links);
13227 }
13228
13229 if cr_config.graph.include_company_links {
13231 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13232 .iter()
13233 .enumerate()
13234 .map(|(i, f)| datasynth_graph::FilingNodeInput {
13235 filing_id: format!("F{:04}", i + 1),
13236 filing_type: f.filing_type.to_string(),
13237 company_code: f.company_code.clone(),
13238 jurisdiction: f.jurisdiction.clone(),
13239 status: format!("{:?}", f.status),
13240 })
13241 .collect();
13242 builder.add_filings(&filing_inputs);
13243 }
13244
13245 let graph = builder.build();
13246 info!(
13247 " Compliance graph: {} nodes, {} edges",
13248 graph.nodes.len(),
13249 graph.edges.len()
13250 );
13251 Some(graph)
13252 } else {
13253 None
13254 };
13255
13256 self.check_resources_with_log("post-compliance-regulations")?;
13257
13258 Ok(ComplianceRegulationsSnapshot {
13259 standard_records,
13260 cross_reference_records,
13261 jurisdiction_records,
13262 audit_procedures,
13263 findings,
13264 filings,
13265 compliance_graph,
13266 })
13267 }
13268
13269 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13271 use super::lineage::LineageGraphBuilder;
13272
13273 let mut builder = LineageGraphBuilder::new();
13274
13275 builder.add_config_section("config:global", "Global Config");
13277 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13278 builder.add_config_section("config:transactions", "Transaction Config");
13279
13280 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13282 builder.add_generator_phase("phase:je", "Journal Entry Generation");
13283
13284 builder.configured_by("phase:coa", "config:chart_of_accounts");
13286 builder.configured_by("phase:je", "config:transactions");
13287
13288 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13290 builder.produced_by("output:je", "phase:je");
13291
13292 if self.phase_config.generate_master_data {
13294 builder.add_config_section("config:master_data", "Master Data Config");
13295 builder.add_generator_phase("phase:master_data", "Master Data Generation");
13296 builder.configured_by("phase:master_data", "config:master_data");
13297 builder.input_to("phase:master_data", "phase:je");
13298 }
13299
13300 if self.phase_config.generate_document_flows {
13301 builder.add_config_section("config:document_flows", "Document Flow Config");
13302 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13303 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13304 builder.configured_by("phase:p2p", "config:document_flows");
13305 builder.configured_by("phase:o2c", "config:document_flows");
13306
13307 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13308 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13309 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13310 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13311 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13312
13313 builder.produced_by("output:po", "phase:p2p");
13314 builder.produced_by("output:gr", "phase:p2p");
13315 builder.produced_by("output:vi", "phase:p2p");
13316 builder.produced_by("output:so", "phase:o2c");
13317 builder.produced_by("output:ci", "phase:o2c");
13318 }
13319
13320 if self.phase_config.inject_anomalies {
13321 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13322 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13323 builder.configured_by("phase:anomaly", "config:fraud");
13324 builder.add_output_file(
13325 "output:labels",
13326 "Anomaly Labels",
13327 "labels/anomaly_labels.csv",
13328 );
13329 builder.produced_by("output:labels", "phase:anomaly");
13330 }
13331
13332 if self.phase_config.generate_audit {
13333 builder.add_config_section("config:audit", "Audit Config");
13334 builder.add_generator_phase("phase:audit", "Audit Data Generation");
13335 builder.configured_by("phase:audit", "config:audit");
13336 }
13337
13338 if self.phase_config.generate_banking {
13339 builder.add_config_section("config:banking", "Banking Config");
13340 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13341 builder.configured_by("phase:banking", "config:banking");
13342 }
13343
13344 if self.config.llm.enabled {
13345 builder.add_config_section("config:llm", "LLM Enrichment Config");
13346 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13347 builder.configured_by("phase:llm_enrichment", "config:llm");
13348 }
13349
13350 if self.config.diffusion.enabled {
13351 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13352 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13353 builder.configured_by("phase:diffusion", "config:diffusion");
13354 }
13355
13356 if self.config.causal.enabled {
13357 builder.add_config_section("config:causal", "Causal Generation Config");
13358 builder.add_generator_phase("phase:causal", "Causal Overlay");
13359 builder.configured_by("phase:causal", "config:causal");
13360 }
13361
13362 builder.build()
13363 }
13364
13365 fn compute_company_revenue(
13374 entries: &[JournalEntry],
13375 company_code: &str,
13376 ) -> rust_decimal::Decimal {
13377 use rust_decimal::Decimal;
13378 let mut revenue = Decimal::ZERO;
13379 for je in entries {
13380 if je.header.company_code != company_code {
13381 continue;
13382 }
13383 for line in &je.lines {
13384 if line.gl_account.starts_with('4') {
13385 revenue += line.credit_amount - line.debit_amount;
13387 }
13388 }
13389 }
13390 revenue.max(Decimal::ZERO)
13391 }
13392
13393 fn compute_entity_net_assets(
13397 entries: &[JournalEntry],
13398 entity_code: &str,
13399 ) -> rust_decimal::Decimal {
13400 use rust_decimal::Decimal;
13401 let mut asset_net = Decimal::ZERO;
13402 let mut liability_net = Decimal::ZERO;
13403 for je in entries {
13404 if je.header.company_code != entity_code {
13405 continue;
13406 }
13407 for line in &je.lines {
13408 if line.gl_account.starts_with('1') {
13409 asset_net += line.debit_amount - line.credit_amount;
13410 } else if line.gl_account.starts_with('2') {
13411 liability_net += line.credit_amount - line.debit_amount;
13412 }
13413 }
13414 }
13415 asset_net - liability_net
13416 }
13417}
13418
13419fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13421 match format {
13422 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13423 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13424 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13425 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13426 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13427 }
13428}
13429
13430fn compute_trial_balance_entries(
13435 entries: &[JournalEntry],
13436 entity_code: &str,
13437 fiscal_year: i32,
13438 coa: Option<&ChartOfAccounts>,
13439) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13440 use std::collections::BTreeMap;
13441
13442 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13443 BTreeMap::new();
13444
13445 for je in entries {
13446 for line in &je.lines {
13447 let entry = balances.entry(line.account_code.clone()).or_default();
13448 entry.0 += line.debit_amount;
13449 entry.1 += line.credit_amount;
13450 }
13451 }
13452
13453 balances
13454 .into_iter()
13455 .map(
13456 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13457 account_description: coa
13458 .and_then(|c| c.get_account(&account_code))
13459 .map(|a| a.description().to_string())
13460 .unwrap_or_else(|| account_code.clone()),
13461 account_code,
13462 debit_balance: debit,
13463 credit_balance: credit,
13464 net_balance: debit - credit,
13465 entity_code: entity_code.to_string(),
13466 period: format!("FY{}", fiscal_year),
13467 },
13468 )
13469 .collect()
13470}
13471
13472#[cfg(test)]
13473#[allow(clippy::unwrap_used)]
13474mod tests {
13475 use super::*;
13476 use datasynth_config::schema::*;
13477
13478 fn create_test_config() -> GeneratorConfig {
13479 GeneratorConfig {
13480 global: GlobalConfig {
13481 industry: IndustrySector::Manufacturing,
13482 start_date: "2024-01-01".to_string(),
13483 period_months: 1,
13484 seed: Some(42),
13485 parallel: false,
13486 group_currency: "USD".to_string(),
13487 presentation_currency: None,
13488 worker_threads: 0,
13489 memory_limit_mb: 0,
13490 fiscal_year_months: None,
13491 },
13492 companies: vec![CompanyConfig {
13493 code: "1000".to_string(),
13494 name: "Test Company".to_string(),
13495 currency: "USD".to_string(),
13496 functional_currency: None,
13497 country: "US".to_string(),
13498 annual_transaction_volume: TransactionVolume::TenK,
13499 volume_weight: 1.0,
13500 fiscal_year_variant: "K4".to_string(),
13501 }],
13502 chart_of_accounts: ChartOfAccountsConfig {
13503 complexity: CoAComplexity::Small,
13504 industry_specific: true,
13505 custom_accounts: None,
13506 min_hierarchy_depth: 2,
13507 max_hierarchy_depth: 4,
13508 },
13509 transactions: TransactionConfig::default(),
13510 output: OutputConfig::default(),
13511 fraud: FraudConfig::default(),
13512 internal_controls: InternalControlsConfig::default(),
13513 business_processes: BusinessProcessConfig::default(),
13514 user_personas: UserPersonaConfig::default(),
13515 templates: TemplateConfig::default(),
13516 approval: ApprovalConfig::default(),
13517 departments: DepartmentConfig::default(),
13518 master_data: MasterDataConfig::default(),
13519 document_flows: DocumentFlowConfig::default(),
13520 intercompany: IntercompanyConfig::default(),
13521 balance: BalanceConfig::default(),
13522 ocpm: OcpmConfig::default(),
13523 audit: AuditGenerationConfig::default(),
13524 banking: datasynth_banking::BankingConfig::default(),
13525 data_quality: DataQualitySchemaConfig::default(),
13526 scenario: ScenarioConfig::default(),
13527 temporal: TemporalDriftConfig::default(),
13528 graph_export: GraphExportConfig::default(),
13529 streaming: StreamingSchemaConfig::default(),
13530 rate_limit: RateLimitSchemaConfig::default(),
13531 temporal_attributes: TemporalAttributeSchemaConfig::default(),
13532 relationships: RelationshipSchemaConfig::default(),
13533 accounting_standards: AccountingStandardsConfig::default(),
13534 audit_standards: AuditStandardsConfig::default(),
13535 distributions: Default::default(),
13536 temporal_patterns: Default::default(),
13537 vendor_network: VendorNetworkSchemaConfig::default(),
13538 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13539 relationship_strength: RelationshipStrengthSchemaConfig::default(),
13540 cross_process_links: CrossProcessLinksSchemaConfig::default(),
13541 organizational_events: OrganizationalEventsSchemaConfig::default(),
13542 behavioral_drift: BehavioralDriftSchemaConfig::default(),
13543 market_drift: MarketDriftSchemaConfig::default(),
13544 drift_labeling: DriftLabelingSchemaConfig::default(),
13545 anomaly_injection: Default::default(),
13546 industry_specific: Default::default(),
13547 fingerprint_privacy: Default::default(),
13548 quality_gates: Default::default(),
13549 compliance: Default::default(),
13550 webhooks: Default::default(),
13551 llm: Default::default(),
13552 diffusion: Default::default(),
13553 causal: Default::default(),
13554 source_to_pay: Default::default(),
13555 financial_reporting: Default::default(),
13556 hr: Default::default(),
13557 manufacturing: Default::default(),
13558 sales_quotes: Default::default(),
13559 tax: Default::default(),
13560 treasury: Default::default(),
13561 project_accounting: Default::default(),
13562 esg: Default::default(),
13563 country_packs: None,
13564 scenarios: Default::default(),
13565 session: Default::default(),
13566 compliance_regulations: Default::default(),
13567 }
13568 }
13569
13570 #[test]
13571 fn test_enhanced_orchestrator_creation() {
13572 let config = create_test_config();
13573 let orchestrator = EnhancedOrchestrator::with_defaults(config);
13574 assert!(orchestrator.is_ok());
13575 }
13576
13577 #[test]
13578 fn test_minimal_generation() {
13579 let config = create_test_config();
13580 let phase_config = PhaseConfig {
13581 generate_master_data: false,
13582 generate_document_flows: false,
13583 generate_journal_entries: true,
13584 inject_anomalies: false,
13585 show_progress: false,
13586 ..Default::default()
13587 };
13588
13589 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13590 let result = orchestrator.generate();
13591
13592 assert!(result.is_ok());
13593 let result = result.unwrap();
13594 assert!(!result.journal_entries.is_empty());
13595 }
13596
13597 #[test]
13598 fn test_master_data_generation() {
13599 let config = create_test_config();
13600 let phase_config = PhaseConfig {
13601 generate_master_data: true,
13602 generate_document_flows: false,
13603 generate_journal_entries: false,
13604 inject_anomalies: false,
13605 show_progress: false,
13606 vendors_per_company: 5,
13607 customers_per_company: 5,
13608 materials_per_company: 10,
13609 assets_per_company: 5,
13610 employees_per_company: 10,
13611 ..Default::default()
13612 };
13613
13614 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13615 let result = orchestrator.generate().unwrap();
13616
13617 assert!(!result.master_data.vendors.is_empty());
13618 assert!(!result.master_data.customers.is_empty());
13619 assert!(!result.master_data.materials.is_empty());
13620 }
13621
13622 #[test]
13623 fn test_document_flow_generation() {
13624 let config = create_test_config();
13625 let phase_config = PhaseConfig {
13626 generate_master_data: true,
13627 generate_document_flows: true,
13628 generate_journal_entries: false,
13629 inject_anomalies: false,
13630 inject_data_quality: false,
13631 validate_balances: false,
13632 generate_ocpm_events: false,
13633 show_progress: false,
13634 vendors_per_company: 5,
13635 customers_per_company: 5,
13636 materials_per_company: 10,
13637 assets_per_company: 5,
13638 employees_per_company: 10,
13639 p2p_chains: 5,
13640 o2c_chains: 5,
13641 ..Default::default()
13642 };
13643
13644 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13645 let result = orchestrator.generate().unwrap();
13646
13647 assert!(!result.document_flows.p2p_chains.is_empty());
13649 assert!(!result.document_flows.o2c_chains.is_empty());
13650
13651 assert!(!result.document_flows.purchase_orders.is_empty());
13653 assert!(!result.document_flows.sales_orders.is_empty());
13654 }
13655
13656 #[test]
13657 fn test_anomaly_injection() {
13658 let config = create_test_config();
13659 let phase_config = PhaseConfig {
13660 generate_master_data: false,
13661 generate_document_flows: false,
13662 generate_journal_entries: true,
13663 inject_anomalies: true,
13664 show_progress: false,
13665 ..Default::default()
13666 };
13667
13668 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13669 let result = orchestrator.generate().unwrap();
13670
13671 assert!(!result.journal_entries.is_empty());
13673
13674 assert!(result.anomaly_labels.summary.is_some());
13677 }
13678
13679 #[test]
13680 fn test_full_generation_pipeline() {
13681 let config = create_test_config();
13682 let phase_config = PhaseConfig {
13683 generate_master_data: true,
13684 generate_document_flows: true,
13685 generate_journal_entries: true,
13686 inject_anomalies: false,
13687 inject_data_quality: false,
13688 validate_balances: true,
13689 generate_ocpm_events: false,
13690 show_progress: false,
13691 vendors_per_company: 3,
13692 customers_per_company: 3,
13693 materials_per_company: 5,
13694 assets_per_company: 3,
13695 employees_per_company: 5,
13696 p2p_chains: 3,
13697 o2c_chains: 3,
13698 ..Default::default()
13699 };
13700
13701 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13702 let result = orchestrator.generate().unwrap();
13703
13704 assert!(!result.master_data.vendors.is_empty());
13706 assert!(!result.master_data.customers.is_empty());
13707 assert!(!result.document_flows.p2p_chains.is_empty());
13708 assert!(!result.document_flows.o2c_chains.is_empty());
13709 assert!(!result.journal_entries.is_empty());
13710 assert!(result.statistics.accounts_count > 0);
13711
13712 assert!(!result.subledger.ap_invoices.is_empty());
13714 assert!(!result.subledger.ar_invoices.is_empty());
13715
13716 assert!(result.balance_validation.validated);
13718 assert!(result.balance_validation.entries_processed > 0);
13719 }
13720
13721 #[test]
13722 fn test_subledger_linking() {
13723 let config = create_test_config();
13724 let phase_config = PhaseConfig {
13725 generate_master_data: true,
13726 generate_document_flows: true,
13727 generate_journal_entries: false,
13728 inject_anomalies: false,
13729 inject_data_quality: false,
13730 validate_balances: false,
13731 generate_ocpm_events: false,
13732 show_progress: false,
13733 vendors_per_company: 5,
13734 customers_per_company: 5,
13735 materials_per_company: 10,
13736 assets_per_company: 3,
13737 employees_per_company: 5,
13738 p2p_chains: 5,
13739 o2c_chains: 5,
13740 ..Default::default()
13741 };
13742
13743 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13744 let result = orchestrator.generate().unwrap();
13745
13746 assert!(!result.document_flows.vendor_invoices.is_empty());
13748 assert!(!result.document_flows.customer_invoices.is_empty());
13749
13750 assert!(!result.subledger.ap_invoices.is_empty());
13752 assert!(!result.subledger.ar_invoices.is_empty());
13753
13754 assert_eq!(
13756 result.subledger.ap_invoices.len(),
13757 result.document_flows.vendor_invoices.len()
13758 );
13759
13760 assert_eq!(
13762 result.subledger.ar_invoices.len(),
13763 result.document_flows.customer_invoices.len()
13764 );
13765
13766 assert_eq!(
13768 result.statistics.ap_invoice_count,
13769 result.subledger.ap_invoices.len()
13770 );
13771 assert_eq!(
13772 result.statistics.ar_invoice_count,
13773 result.subledger.ar_invoices.len()
13774 );
13775 }
13776
13777 #[test]
13778 fn test_balance_validation() {
13779 let config = create_test_config();
13780 let phase_config = PhaseConfig {
13781 generate_master_data: false,
13782 generate_document_flows: false,
13783 generate_journal_entries: true,
13784 inject_anomalies: false,
13785 validate_balances: true,
13786 show_progress: false,
13787 ..Default::default()
13788 };
13789
13790 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13791 let result = orchestrator.generate().unwrap();
13792
13793 assert!(result.balance_validation.validated);
13795 assert!(result.balance_validation.entries_processed > 0);
13796
13797 assert!(!result.balance_validation.has_unbalanced_entries);
13799
13800 assert_eq!(
13802 result.balance_validation.total_debits,
13803 result.balance_validation.total_credits
13804 );
13805 }
13806
13807 #[test]
13808 fn test_statistics_accuracy() {
13809 let config = create_test_config();
13810 let phase_config = PhaseConfig {
13811 generate_master_data: true,
13812 generate_document_flows: false,
13813 generate_journal_entries: true,
13814 inject_anomalies: false,
13815 show_progress: false,
13816 vendors_per_company: 10,
13817 customers_per_company: 20,
13818 materials_per_company: 15,
13819 assets_per_company: 5,
13820 employees_per_company: 8,
13821 ..Default::default()
13822 };
13823
13824 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13825 let result = orchestrator.generate().unwrap();
13826
13827 assert_eq!(
13829 result.statistics.vendor_count,
13830 result.master_data.vendors.len()
13831 );
13832 assert_eq!(
13833 result.statistics.customer_count,
13834 result.master_data.customers.len()
13835 );
13836 assert_eq!(
13837 result.statistics.material_count,
13838 result.master_data.materials.len()
13839 );
13840 assert_eq!(
13841 result.statistics.total_entries as usize,
13842 result.journal_entries.len()
13843 );
13844 }
13845
13846 #[test]
13847 fn test_phase_config_defaults() {
13848 let config = PhaseConfig::default();
13849 assert!(config.generate_master_data);
13850 assert!(config.generate_document_flows);
13851 assert!(config.generate_journal_entries);
13852 assert!(!config.inject_anomalies);
13853 assert!(config.validate_balances);
13854 assert!(config.show_progress);
13855 assert!(config.vendors_per_company > 0);
13856 assert!(config.customers_per_company > 0);
13857 }
13858
13859 #[test]
13860 fn test_get_coa_before_generation() {
13861 let config = create_test_config();
13862 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13863
13864 assert!(orchestrator.get_coa().is_none());
13866 }
13867
13868 #[test]
13869 fn test_get_coa_after_generation() {
13870 let config = create_test_config();
13871 let phase_config = PhaseConfig {
13872 generate_master_data: false,
13873 generate_document_flows: false,
13874 generate_journal_entries: true,
13875 inject_anomalies: false,
13876 show_progress: false,
13877 ..Default::default()
13878 };
13879
13880 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13881 let _ = orchestrator.generate().unwrap();
13882
13883 assert!(orchestrator.get_coa().is_some());
13885 }
13886
13887 #[test]
13888 fn test_get_master_data() {
13889 let config = create_test_config();
13890 let phase_config = PhaseConfig {
13891 generate_master_data: true,
13892 generate_document_flows: false,
13893 generate_journal_entries: false,
13894 inject_anomalies: false,
13895 show_progress: false,
13896 vendors_per_company: 5,
13897 customers_per_company: 5,
13898 materials_per_company: 5,
13899 assets_per_company: 5,
13900 employees_per_company: 5,
13901 ..Default::default()
13902 };
13903
13904 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13905 let result = orchestrator.generate().unwrap();
13906
13907 assert!(!result.master_data.vendors.is_empty());
13909 }
13910
13911 #[test]
13912 fn test_with_progress_builder() {
13913 let config = create_test_config();
13914 let orchestrator = EnhancedOrchestrator::with_defaults(config)
13915 .unwrap()
13916 .with_progress(false);
13917
13918 assert!(!orchestrator.phase_config.show_progress);
13920 }
13921
13922 #[test]
13923 fn test_multi_company_generation() {
13924 let mut config = create_test_config();
13925 config.companies.push(CompanyConfig {
13926 code: "2000".to_string(),
13927 name: "Subsidiary".to_string(),
13928 currency: "EUR".to_string(),
13929 functional_currency: None,
13930 country: "DE".to_string(),
13931 annual_transaction_volume: TransactionVolume::TenK,
13932 volume_weight: 0.5,
13933 fiscal_year_variant: "K4".to_string(),
13934 });
13935
13936 let phase_config = PhaseConfig {
13937 generate_master_data: true,
13938 generate_document_flows: false,
13939 generate_journal_entries: true,
13940 inject_anomalies: false,
13941 show_progress: false,
13942 vendors_per_company: 5,
13943 customers_per_company: 5,
13944 materials_per_company: 5,
13945 assets_per_company: 5,
13946 employees_per_company: 5,
13947 ..Default::default()
13948 };
13949
13950 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13951 let result = orchestrator.generate().unwrap();
13952
13953 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
13956 assert!(result.statistics.companies_count == 2);
13957 }
13958
13959 #[test]
13960 fn test_empty_master_data_skips_document_flows() {
13961 let config = create_test_config();
13962 let phase_config = PhaseConfig {
13963 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
13966 inject_anomalies: false,
13967 show_progress: false,
13968 ..Default::default()
13969 };
13970
13971 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13972 let result = orchestrator.generate().unwrap();
13973
13974 assert!(result.document_flows.p2p_chains.is_empty());
13976 assert!(result.document_flows.o2c_chains.is_empty());
13977 }
13978
13979 #[test]
13980 fn test_journal_entry_line_item_count() {
13981 let config = create_test_config();
13982 let phase_config = PhaseConfig {
13983 generate_master_data: false,
13984 generate_document_flows: false,
13985 generate_journal_entries: true,
13986 inject_anomalies: false,
13987 show_progress: false,
13988 ..Default::default()
13989 };
13990
13991 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13992 let result = orchestrator.generate().unwrap();
13993
13994 let calculated_line_items: u64 = result
13996 .journal_entries
13997 .iter()
13998 .map(|e| e.line_count() as u64)
13999 .sum();
14000 assert_eq!(result.statistics.total_line_items, calculated_line_items);
14001 }
14002
14003 #[test]
14004 fn test_audit_generation() {
14005 let config = create_test_config();
14006 let phase_config = PhaseConfig {
14007 generate_master_data: false,
14008 generate_document_flows: false,
14009 generate_journal_entries: true,
14010 inject_anomalies: false,
14011 show_progress: false,
14012 generate_audit: true,
14013 audit_engagements: 2,
14014 workpapers_per_engagement: 5,
14015 evidence_per_workpaper: 2,
14016 risks_per_engagement: 3,
14017 findings_per_engagement: 2,
14018 judgments_per_engagement: 2,
14019 ..Default::default()
14020 };
14021
14022 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14023 let result = orchestrator.generate().unwrap();
14024
14025 assert_eq!(result.audit.engagements.len(), 2);
14027 assert!(!result.audit.workpapers.is_empty());
14028 assert!(!result.audit.evidence.is_empty());
14029 assert!(!result.audit.risk_assessments.is_empty());
14030 assert!(!result.audit.findings.is_empty());
14031 assert!(!result.audit.judgments.is_empty());
14032
14033 assert!(
14035 !result.audit.confirmations.is_empty(),
14036 "ISA 505 confirmations should be generated"
14037 );
14038 assert!(
14039 !result.audit.confirmation_responses.is_empty(),
14040 "ISA 505 confirmation responses should be generated"
14041 );
14042 assert!(
14043 !result.audit.procedure_steps.is_empty(),
14044 "ISA 330 procedure steps should be generated"
14045 );
14046 assert!(
14048 !result.audit.analytical_results.is_empty(),
14049 "ISA 520 analytical procedures should be generated"
14050 );
14051 assert!(
14052 !result.audit.ia_functions.is_empty(),
14053 "ISA 610 IA functions should be generated (one per engagement)"
14054 );
14055 assert!(
14056 !result.audit.related_parties.is_empty(),
14057 "ISA 550 related parties should be generated"
14058 );
14059
14060 assert_eq!(
14062 result.statistics.audit_engagement_count,
14063 result.audit.engagements.len()
14064 );
14065 assert_eq!(
14066 result.statistics.audit_workpaper_count,
14067 result.audit.workpapers.len()
14068 );
14069 assert_eq!(
14070 result.statistics.audit_evidence_count,
14071 result.audit.evidence.len()
14072 );
14073 assert_eq!(
14074 result.statistics.audit_risk_count,
14075 result.audit.risk_assessments.len()
14076 );
14077 assert_eq!(
14078 result.statistics.audit_finding_count,
14079 result.audit.findings.len()
14080 );
14081 assert_eq!(
14082 result.statistics.audit_judgment_count,
14083 result.audit.judgments.len()
14084 );
14085 assert_eq!(
14086 result.statistics.audit_confirmation_count,
14087 result.audit.confirmations.len()
14088 );
14089 assert_eq!(
14090 result.statistics.audit_confirmation_response_count,
14091 result.audit.confirmation_responses.len()
14092 );
14093 assert_eq!(
14094 result.statistics.audit_procedure_step_count,
14095 result.audit.procedure_steps.len()
14096 );
14097 assert_eq!(
14098 result.statistics.audit_sample_count,
14099 result.audit.samples.len()
14100 );
14101 assert_eq!(
14102 result.statistics.audit_analytical_result_count,
14103 result.audit.analytical_results.len()
14104 );
14105 assert_eq!(
14106 result.statistics.audit_ia_function_count,
14107 result.audit.ia_functions.len()
14108 );
14109 assert_eq!(
14110 result.statistics.audit_ia_report_count,
14111 result.audit.ia_reports.len()
14112 );
14113 assert_eq!(
14114 result.statistics.audit_related_party_count,
14115 result.audit.related_parties.len()
14116 );
14117 assert_eq!(
14118 result.statistics.audit_related_party_transaction_count,
14119 result.audit.related_party_transactions.len()
14120 );
14121 }
14122
14123 #[test]
14124 fn test_new_phases_disabled_by_default() {
14125 let config = create_test_config();
14126 assert!(!config.llm.enabled);
14128 assert!(!config.diffusion.enabled);
14129 assert!(!config.causal.enabled);
14130
14131 let phase_config = PhaseConfig {
14132 generate_master_data: false,
14133 generate_document_flows: false,
14134 generate_journal_entries: true,
14135 inject_anomalies: false,
14136 show_progress: false,
14137 ..Default::default()
14138 };
14139
14140 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14141 let result = orchestrator.generate().unwrap();
14142
14143 assert_eq!(result.statistics.llm_enrichment_ms, 0);
14145 assert_eq!(result.statistics.llm_vendors_enriched, 0);
14146 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14147 assert_eq!(result.statistics.diffusion_samples_generated, 0);
14148 assert_eq!(result.statistics.causal_generation_ms, 0);
14149 assert_eq!(result.statistics.causal_samples_generated, 0);
14150 assert!(result.statistics.causal_validation_passed.is_none());
14151 assert_eq!(result.statistics.counterfactual_pair_count, 0);
14152 assert!(result.counterfactual_pairs.is_empty());
14153 }
14154
14155 #[test]
14156 fn test_counterfactual_generation_enabled() {
14157 let config = create_test_config();
14158 let phase_config = PhaseConfig {
14159 generate_master_data: false,
14160 generate_document_flows: false,
14161 generate_journal_entries: true,
14162 inject_anomalies: false,
14163 show_progress: false,
14164 generate_counterfactuals: true,
14165 generate_period_close: false, ..Default::default()
14167 };
14168
14169 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14170 let result = orchestrator.generate().unwrap();
14171
14172 if !result.journal_entries.is_empty() {
14174 assert_eq!(
14175 result.counterfactual_pairs.len(),
14176 result.journal_entries.len()
14177 );
14178 assert_eq!(
14179 result.statistics.counterfactual_pair_count,
14180 result.journal_entries.len()
14181 );
14182 let ids: std::collections::HashSet<_> = result
14184 .counterfactual_pairs
14185 .iter()
14186 .map(|p| p.pair_id.clone())
14187 .collect();
14188 assert_eq!(ids.len(), result.counterfactual_pairs.len());
14189 }
14190 }
14191
14192 #[test]
14193 fn test_llm_enrichment_enabled() {
14194 let mut config = create_test_config();
14195 config.llm.enabled = true;
14196 config.llm.max_vendor_enrichments = 3;
14197
14198 let phase_config = PhaseConfig {
14199 generate_master_data: true,
14200 generate_document_flows: false,
14201 generate_journal_entries: false,
14202 inject_anomalies: false,
14203 show_progress: false,
14204 vendors_per_company: 5,
14205 customers_per_company: 3,
14206 materials_per_company: 3,
14207 assets_per_company: 3,
14208 employees_per_company: 3,
14209 ..Default::default()
14210 };
14211
14212 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14213 let result = orchestrator.generate().unwrap();
14214
14215 assert!(result.statistics.llm_vendors_enriched > 0);
14217 assert!(result.statistics.llm_vendors_enriched <= 3);
14218 }
14219
14220 #[test]
14221 fn test_diffusion_enhancement_enabled() {
14222 let mut config = create_test_config();
14223 config.diffusion.enabled = true;
14224 config.diffusion.n_steps = 50;
14225 config.diffusion.sample_size = 20;
14226
14227 let phase_config = PhaseConfig {
14228 generate_master_data: false,
14229 generate_document_flows: false,
14230 generate_journal_entries: true,
14231 inject_anomalies: false,
14232 show_progress: false,
14233 ..Default::default()
14234 };
14235
14236 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14237 let result = orchestrator.generate().unwrap();
14238
14239 assert_eq!(result.statistics.diffusion_samples_generated, 20);
14241 }
14242
14243 #[test]
14244 fn test_causal_overlay_enabled() {
14245 let mut config = create_test_config();
14246 config.causal.enabled = true;
14247 config.causal.template = "fraud_detection".to_string();
14248 config.causal.sample_size = 100;
14249 config.causal.validate = true;
14250
14251 let phase_config = PhaseConfig {
14252 generate_master_data: false,
14253 generate_document_flows: false,
14254 generate_journal_entries: true,
14255 inject_anomalies: false,
14256 show_progress: false,
14257 ..Default::default()
14258 };
14259
14260 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14261 let result = orchestrator.generate().unwrap();
14262
14263 assert_eq!(result.statistics.causal_samples_generated, 100);
14265 assert!(result.statistics.causal_validation_passed.is_some());
14267 }
14268
14269 #[test]
14270 fn test_causal_overlay_revenue_cycle_template() {
14271 let mut config = create_test_config();
14272 config.causal.enabled = true;
14273 config.causal.template = "revenue_cycle".to_string();
14274 config.causal.sample_size = 50;
14275 config.causal.validate = false;
14276
14277 let phase_config = PhaseConfig {
14278 generate_master_data: false,
14279 generate_document_flows: false,
14280 generate_journal_entries: true,
14281 inject_anomalies: false,
14282 show_progress: false,
14283 ..Default::default()
14284 };
14285
14286 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14287 let result = orchestrator.generate().unwrap();
14288
14289 assert_eq!(result.statistics.causal_samples_generated, 50);
14291 assert!(result.statistics.causal_validation_passed.is_none());
14293 }
14294
14295 #[test]
14296 fn test_all_new_phases_enabled_together() {
14297 let mut config = create_test_config();
14298 config.llm.enabled = true;
14299 config.llm.max_vendor_enrichments = 2;
14300 config.diffusion.enabled = true;
14301 config.diffusion.n_steps = 20;
14302 config.diffusion.sample_size = 10;
14303 config.causal.enabled = true;
14304 config.causal.sample_size = 50;
14305 config.causal.validate = true;
14306
14307 let phase_config = PhaseConfig {
14308 generate_master_data: true,
14309 generate_document_flows: false,
14310 generate_journal_entries: true,
14311 inject_anomalies: false,
14312 show_progress: false,
14313 vendors_per_company: 5,
14314 customers_per_company: 3,
14315 materials_per_company: 3,
14316 assets_per_company: 3,
14317 employees_per_company: 3,
14318 ..Default::default()
14319 };
14320
14321 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14322 let result = orchestrator.generate().unwrap();
14323
14324 assert!(result.statistics.llm_vendors_enriched > 0);
14326 assert_eq!(result.statistics.diffusion_samples_generated, 10);
14327 assert_eq!(result.statistics.causal_samples_generated, 50);
14328 assert!(result.statistics.causal_validation_passed.is_some());
14329 }
14330
14331 #[test]
14332 fn test_statistics_serialization_with_new_fields() {
14333 let stats = EnhancedGenerationStatistics {
14334 total_entries: 100,
14335 total_line_items: 500,
14336 llm_enrichment_ms: 42,
14337 llm_vendors_enriched: 10,
14338 diffusion_enhancement_ms: 100,
14339 diffusion_samples_generated: 50,
14340 causal_generation_ms: 200,
14341 causal_samples_generated: 100,
14342 causal_validation_passed: Some(true),
14343 ..Default::default()
14344 };
14345
14346 let json = serde_json::to_string(&stats).unwrap();
14347 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14348
14349 assert_eq!(deserialized.llm_enrichment_ms, 42);
14350 assert_eq!(deserialized.llm_vendors_enriched, 10);
14351 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14352 assert_eq!(deserialized.diffusion_samples_generated, 50);
14353 assert_eq!(deserialized.causal_generation_ms, 200);
14354 assert_eq!(deserialized.causal_samples_generated, 100);
14355 assert_eq!(deserialized.causal_validation_passed, Some(true));
14356 }
14357
14358 #[test]
14359 fn test_statistics_backward_compat_deserialization() {
14360 let old_json = r#"{
14362 "total_entries": 100,
14363 "total_line_items": 500,
14364 "accounts_count": 50,
14365 "companies_count": 1,
14366 "period_months": 12,
14367 "vendor_count": 10,
14368 "customer_count": 20,
14369 "material_count": 15,
14370 "asset_count": 5,
14371 "employee_count": 8,
14372 "p2p_chain_count": 5,
14373 "o2c_chain_count": 5,
14374 "ap_invoice_count": 5,
14375 "ar_invoice_count": 5,
14376 "ocpm_event_count": 0,
14377 "ocpm_object_count": 0,
14378 "ocpm_case_count": 0,
14379 "audit_engagement_count": 0,
14380 "audit_workpaper_count": 0,
14381 "audit_evidence_count": 0,
14382 "audit_risk_count": 0,
14383 "audit_finding_count": 0,
14384 "audit_judgment_count": 0,
14385 "anomalies_injected": 0,
14386 "data_quality_issues": 0,
14387 "banking_customer_count": 0,
14388 "banking_account_count": 0,
14389 "banking_transaction_count": 0,
14390 "banking_suspicious_count": 0,
14391 "graph_export_count": 0,
14392 "graph_node_count": 0,
14393 "graph_edge_count": 0
14394 }"#;
14395
14396 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14397
14398 assert_eq!(stats.llm_enrichment_ms, 0);
14400 assert_eq!(stats.llm_vendors_enriched, 0);
14401 assert_eq!(stats.diffusion_enhancement_ms, 0);
14402 assert_eq!(stats.diffusion_samples_generated, 0);
14403 assert_eq!(stats.causal_generation_ms, 0);
14404 assert_eq!(stats.causal_samples_generated, 0);
14405 assert!(stats.causal_validation_passed.is_none());
14406 }
14407}