1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338 fn default() -> Self {
339 Self {
340 generate_master_data: true,
341 generate_document_flows: true,
342 generate_ocpm_events: false, generate_journal_entries: true,
344 inject_anomalies: false,
345 inject_data_quality: false, validate_balances: true,
347 show_progress: true,
348 vendors_per_company: 50,
349 customers_per_company: 100,
350 materials_per_company: 200,
351 assets_per_company: 50,
352 employees_per_company: 100,
353 p2p_chains: 100,
354 o2c_chains: 100,
355 generate_audit: false, audit_engagements: 5,
357 workpapers_per_engagement: 20,
358 evidence_per_workpaper: 5,
359 risks_per_engagement: 15,
360 findings_per_engagement: 8,
361 judgments_per_engagement: 10,
362 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
381 }
382}
383
384impl PhaseConfig {
385 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390 Self {
391 generate_master_data: true,
393 generate_document_flows: true,
394 generate_journal_entries: true,
395 validate_balances: true,
396 generate_period_close: true,
397 generate_evolution_events: true,
398 show_progress: true,
399
400 generate_audit: cfg.audit.enabled,
402 generate_banking: cfg.banking.enabled,
403 generate_graph_export: cfg.graph_export.enabled,
404 generate_sourcing: cfg.source_to_pay.enabled,
405 generate_intercompany: cfg.intercompany.enabled,
406 generate_financial_statements: cfg.financial_reporting.enabled,
407 generate_bank_reconciliation: cfg.financial_reporting.enabled,
408 generate_accounting_standards: cfg.accounting_standards.enabled,
409 generate_manufacturing: cfg.manufacturing.enabled,
410 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411 generate_tax: cfg.tax.enabled,
412 generate_esg: cfg.esg.enabled,
413 generate_ocpm_events: cfg.ocpm.enabled,
414 generate_compliance_regulations: cfg.compliance_regulations.enabled,
415 generate_hr: cfg.hr.enabled,
416 generate_treasury: cfg.treasury.enabled,
417 generate_project_accounting: cfg.project_accounting.enabled,
418
419 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423 inject_data_quality: cfg.data_quality.enabled,
424
425 vendors_per_company: 50,
427 customers_per_company: 100,
428 materials_per_company: 200,
429 assets_per_company: 50,
430 employees_per_company: 100,
431 p2p_chains: 100,
432 o2c_chains: 100,
433 audit_engagements: 5,
434 workpapers_per_engagement: 20,
435 evidence_per_workpaper: 5,
436 risks_per_engagement: 15,
437 findings_per_engagement: 8,
438 judgments_per_engagement: 10,
439 }
440 }
441}
442
443#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446 pub vendors: Vec<Vendor>,
448 pub customers: Vec<Customer>,
450 pub materials: Vec<Material>,
452 pub assets: Vec<FixedAsset>,
454 pub employees: Vec<Employee>,
456 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465 pub node_count: usize,
467 pub edge_count: usize,
469 pub hyperedge_count: usize,
471 pub output_path: PathBuf,
473}
474
475#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478 pub p2p_chains: Vec<P2PDocumentChain>,
480 pub o2c_chains: Vec<O2CDocumentChain>,
482 pub purchase_orders: Vec<documents::PurchaseOrder>,
484 pub goods_receipts: Vec<documents::GoodsReceipt>,
486 pub vendor_invoices: Vec<documents::VendorInvoice>,
488 pub sales_orders: Vec<documents::SalesOrder>,
490 pub deliveries: Vec<documents::Delivery>,
492 pub customer_invoices: Vec<documents::CustomerInvoice>,
494 pub payments: Vec<documents::Payment>,
496 pub document_references: Vec<documents::DocumentReference>,
499}
500
501#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504 pub ap_invoices: Vec<APInvoice>,
506 pub ar_invoices: Vec<ARInvoice>,
508 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514 pub ar_aging_reports: Vec<ARAgingReport>,
516 pub ap_aging_reports: Vec<APAgingReport>,
518 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531 pub event_log: Option<OcpmEventLog>,
533 pub event_count: usize,
535 pub object_count: usize,
537 pub case_count: usize,
539}
540
541#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544 pub engagements: Vec<AuditEngagement>,
546 pub workpapers: Vec<Workpaper>,
548 pub evidence: Vec<AuditEvidence>,
550 pub risk_assessments: Vec<RiskAssessment>,
552 pub findings: Vec<AuditFinding>,
554 pub judgments: Vec<ProfessionalJudgment>,
556 pub confirmations: Vec<ExternalConfirmation>,
558 pub confirmation_responses: Vec<ConfirmationResponse>,
560 pub procedure_steps: Vec<AuditProcedureStep>,
562 pub samples: Vec<AuditSample>,
564 pub analytical_results: Vec<AnalyticalProcedureResult>,
566 pub ia_functions: Vec<InternalAuditFunction>,
568 pub ia_reports: Vec<InternalAuditReport>,
570 pub related_parties: Vec<RelatedParty>,
572 pub related_party_transactions: Vec<RelatedPartyTransaction>,
574 pub component_auditors: Vec<ComponentAuditor>,
577 pub group_audit_plan: Option<GroupAuditPlan>,
579 pub component_instructions: Vec<ComponentInstruction>,
581 pub component_reports: Vec<ComponentAuditorReport>,
583 pub engagement_letters: Vec<EngagementLetter>,
586 pub subsequent_events: Vec<SubsequentEvent>,
589 pub service_organizations: Vec<ServiceOrganization>,
592 pub soc_reports: Vec<SocReport>,
594 pub user_entity_controls: Vec<UserEntityControl>,
596 pub going_concern_assessments:
599 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600 pub accounting_estimates:
603 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614 pub materiality_calculations:
617 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618 pub combined_risk_assessments:
621 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627 pub significant_transaction_classes:
630 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634 pub analytical_relationships:
637 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657 pub customers: Vec<BankingCustomer>,
659 pub accounts: Vec<BankAccount>,
661 pub transactions: Vec<BankTransaction>,
663 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673 pub suspicious_count: usize,
675 pub scenario_count: usize,
677}
678
679#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682 pub exported: bool,
684 pub graph_count: usize,
686 pub exports: HashMap<String, GraphExportInfo>,
688}
689
690#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693 pub name: String,
695 pub format: String,
697 pub output_path: PathBuf,
699 pub node_count: usize,
701 pub edge_count: usize,
703}
704
705#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708 pub spend_analyses: Vec<SpendAnalysis>,
710 pub sourcing_projects: Vec<SourcingProject>,
712 pub qualifications: Vec<SupplierQualification>,
714 pub rfx_events: Vec<RfxEvent>,
716 pub bids: Vec<SupplierBid>,
718 pub bid_evaluations: Vec<BidEvaluation>,
720 pub contracts: Vec<ProcurementContract>,
722 pub catalog_items: Vec<CatalogItem>,
724 pub scorecards: Vec<SupplierScorecard>,
726}
727
728#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731 pub fiscal_year: u16,
733 pub fiscal_period: u8,
735 pub period_start: NaiveDate,
737 pub period_end: NaiveDate,
739 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746 pub financial_statements: Vec<FinancialStatement>,
749 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752 pub consolidated_statements: Vec<FinancialStatement>,
754 pub consolidation_schedules: Vec<ConsolidationSchedule>,
756 pub bank_reconciliations: Vec<BankReconciliation>,
758 pub trial_balances: Vec<PeriodTrialBalance>,
760 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771 pub payroll_runs: Vec<PayrollRun>,
773 pub payroll_line_items: Vec<PayrollLineItem>,
775 pub time_entries: Vec<TimeEntry>,
777 pub expense_reports: Vec<ExpenseReport>,
779 pub benefit_enrollments: Vec<BenefitEnrollment>,
781 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789 pub pension_journal_entries: Vec<JournalEntry>,
791 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795 pub stock_comp_journal_entries: Vec<JournalEntry>,
797 pub payroll_run_count: usize,
799 pub payroll_line_item_count: usize,
801 pub time_entry_count: usize,
803 pub expense_report_count: usize,
805 pub benefit_enrollment_count: usize,
807 pub pension_plan_count: usize,
809 pub stock_grant_count: usize,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820 pub business_combinations:
822 Vec<datasynth_core::models::business_combination::BusinessCombination>,
823 pub business_combination_journal_entries: Vec<JournalEntry>,
825 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827 pub ecl_provision_movements:
829 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830 pub ecl_journal_entries: Vec<JournalEntry>,
832 pub provisions: Vec<datasynth_core::models::provision::Provision>,
834 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838 pub provision_journal_entries: Vec<JournalEntry>,
840 pub currency_translation_results:
842 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843 pub revenue_contract_count: usize,
845 pub impairment_test_count: usize,
847 pub business_combination_count: usize,
849 pub ecl_model_count: usize,
851 pub provision_count: usize,
853 pub currency_translation_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872 pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879 pub production_orders: Vec<ProductionOrder>,
881 pub quality_inspections: Vec<QualityInspection>,
883 pub cycle_counts: Vec<CycleCount>,
885 pub bom_components: Vec<BomComponent>,
887 pub inventory_movements: Vec<InventoryMovement>,
889 pub production_order_count: usize,
891 pub quality_inspection_count: usize,
893 pub cycle_count_count: usize,
895 pub bom_component_count: usize,
897 pub inventory_movement_count: usize,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904 pub sales_quotes: Vec<SalesQuote>,
906 pub kpis: Vec<ManagementKpi>,
908 pub budgets: Vec<Budget>,
910 pub sales_quote_count: usize,
912 pub kpi_count: usize,
914 pub budget_line_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921 pub labels: Vec<LabeledAnomaly>,
923 pub summary: Option<AnomalySummary>,
925 pub by_type: HashMap<String, usize>,
927}
928
929#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932 pub validated: bool,
934 pub is_balanced: bool,
936 pub entries_processed: u64,
938 pub total_debits: rust_decimal::Decimal,
940 pub total_credits: rust_decimal::Decimal,
942 pub accounts_tracked: usize,
944 pub companies_tracked: usize,
946 pub validation_errors: Vec<ValidationError>,
948 pub has_unbalanced_entries: bool,
950}
951
952#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955 pub jurisdictions: Vec<TaxJurisdiction>,
957 pub codes: Vec<TaxCode>,
959 pub tax_lines: Vec<TaxLine>,
961 pub tax_returns: Vec<TaxReturn>,
963 pub tax_provisions: Vec<TaxProvision>,
965 pub withholding_records: Vec<WithholdingTaxRecord>,
967 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969 pub jurisdiction_count: usize,
971 pub code_count: usize,
973 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975 pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986 pub seller_journal_entries: Vec<JournalEntry>,
988 pub buyer_journal_entries: Vec<JournalEntry>,
990 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994 #[serde(skip)]
996 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997 pub matched_pair_count: usize,
999 pub elimination_entry_count: usize,
1001 pub match_rate: f64,
1003}
1004
1005#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008 pub emissions: Vec<EmissionRecord>,
1010 pub energy: Vec<EnergyConsumption>,
1012 pub water: Vec<WaterUsage>,
1014 pub waste: Vec<WasteRecord>,
1016 pub diversity: Vec<WorkforceDiversityMetric>,
1018 pub pay_equity: Vec<PayEquityMetric>,
1020 pub safety_incidents: Vec<SafetyIncident>,
1022 pub safety_metrics: Vec<SafetyMetric>,
1024 pub governance: Vec<GovernanceMetric>,
1026 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028 pub materiality: Vec<MaterialityAssessment>,
1030 pub disclosures: Vec<EsgDisclosure>,
1032 pub climate_scenarios: Vec<ClimateScenario>,
1034 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036 pub emission_count: usize,
1038 pub disclosure_count: usize,
1040}
1041
1042#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045 pub cash_positions: Vec<CashPosition>,
1047 pub cash_forecasts: Vec<CashForecast>,
1049 pub cash_pools: Vec<CashPool>,
1051 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053 pub hedging_instruments: Vec<HedgingInstrument>,
1055 pub hedge_relationships: Vec<HedgeRelationship>,
1057 pub debt_instruments: Vec<DebtInstrument>,
1059 pub bank_guarantees: Vec<BankGuarantee>,
1061 pub netting_runs: Vec<NettingRun>,
1063 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065 pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073 pub projects: Vec<Project>,
1075 pub cost_lines: Vec<ProjectCostLine>,
1077 pub revenue_records: Vec<ProjectRevenue>,
1079 pub earned_value_metrics: Vec<EarnedValueMetric>,
1081 pub change_orders: Vec<ChangeOrder>,
1083 pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090 pub chart_of_accounts: ChartOfAccounts,
1092 pub master_data: MasterDataSnapshot,
1094 pub document_flows: DocumentFlowSnapshot,
1096 pub subledger: SubledgerSnapshot,
1098 pub ocpm: OcpmSnapshot,
1100 pub audit: AuditSnapshot,
1102 pub banking: BankingSnapshot,
1104 pub graph_export: GraphExportSnapshot,
1106 pub sourcing: SourcingSnapshot,
1108 pub financial_reporting: FinancialReportingSnapshot,
1110 pub hr: HrSnapshot,
1112 pub accounting_standards: AccountingStandardsSnapshot,
1114 pub manufacturing: ManufacturingSnapshot,
1116 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118 pub tax: TaxSnapshot,
1120 pub esg: EsgSnapshot,
1122 pub treasury: TreasurySnapshot,
1124 pub project_accounting: ProjectAccountingSnapshot,
1126 pub process_evolution: Vec<ProcessEvolutionEvent>,
1128 pub organizational_events: Vec<OrganizationalEvent>,
1130 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132 pub intercompany: IntercompanySnapshot,
1134 pub journal_entries: Vec<JournalEntry>,
1136 pub anomaly_labels: AnomalyLabels,
1138 pub balance_validation: BalanceValidationResult,
1140 pub data_quality_stats: DataQualityStats,
1142 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144 pub statistics: EnhancedGenerationStatistics,
1146 pub lineage: Option<super::lineage::LineageGraph>,
1148 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150 pub internal_controls: Vec<InternalControl>,
1152 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156 pub opening_balances: Vec<GeneratedOpeningBalance>,
1158 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166 pub temporal_vendor_chains:
1168 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175 pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182 pub total_entries: u64,
1184 pub total_line_items: u64,
1186 pub accounts_count: usize,
1188 pub companies_count: usize,
1190 pub period_months: u32,
1192 pub vendor_count: usize,
1194 pub customer_count: usize,
1195 pub material_count: usize,
1196 pub asset_count: usize,
1197 pub employee_count: usize,
1198 pub p2p_chain_count: usize,
1200 pub o2c_chain_count: usize,
1201 pub ap_invoice_count: usize,
1203 pub ar_invoice_count: usize,
1204 pub ocpm_event_count: usize,
1206 pub ocpm_object_count: usize,
1207 pub ocpm_case_count: usize,
1208 pub audit_engagement_count: usize,
1210 pub audit_workpaper_count: usize,
1211 pub audit_evidence_count: usize,
1212 pub audit_risk_count: usize,
1213 pub audit_finding_count: usize,
1214 pub audit_judgment_count: usize,
1215 #[serde(default)]
1217 pub audit_confirmation_count: usize,
1218 #[serde(default)]
1219 pub audit_confirmation_response_count: usize,
1220 #[serde(default)]
1222 pub audit_procedure_step_count: usize,
1223 #[serde(default)]
1224 pub audit_sample_count: usize,
1225 #[serde(default)]
1227 pub audit_analytical_result_count: usize,
1228 #[serde(default)]
1230 pub audit_ia_function_count: usize,
1231 #[serde(default)]
1232 pub audit_ia_report_count: usize,
1233 #[serde(default)]
1235 pub audit_related_party_count: usize,
1236 #[serde(default)]
1237 pub audit_related_party_transaction_count: usize,
1238 pub anomalies_injected: usize,
1240 pub data_quality_issues: usize,
1242 pub banking_customer_count: usize,
1244 pub banking_account_count: usize,
1245 pub banking_transaction_count: usize,
1246 pub banking_suspicious_count: usize,
1247 pub graph_export_count: usize,
1249 pub graph_node_count: usize,
1250 pub graph_edge_count: usize,
1251 #[serde(default)]
1253 pub llm_enrichment_ms: u64,
1254 #[serde(default)]
1256 pub llm_vendors_enriched: usize,
1257 #[serde(default)]
1259 pub diffusion_enhancement_ms: u64,
1260 #[serde(default)]
1262 pub diffusion_samples_generated: usize,
1263 #[serde(default)]
1265 pub causal_generation_ms: u64,
1266 #[serde(default)]
1268 pub causal_samples_generated: usize,
1269 #[serde(default)]
1271 pub causal_validation_passed: Option<bool>,
1272 #[serde(default)]
1274 pub sourcing_project_count: usize,
1275 #[serde(default)]
1276 pub rfx_event_count: usize,
1277 #[serde(default)]
1278 pub bid_count: usize,
1279 #[serde(default)]
1280 pub contract_count: usize,
1281 #[serde(default)]
1282 pub catalog_item_count: usize,
1283 #[serde(default)]
1284 pub scorecard_count: usize,
1285 #[serde(default)]
1287 pub financial_statement_count: usize,
1288 #[serde(default)]
1289 pub bank_reconciliation_count: usize,
1290 #[serde(default)]
1292 pub payroll_run_count: usize,
1293 #[serde(default)]
1294 pub time_entry_count: usize,
1295 #[serde(default)]
1296 pub expense_report_count: usize,
1297 #[serde(default)]
1298 pub benefit_enrollment_count: usize,
1299 #[serde(default)]
1300 pub pension_plan_count: usize,
1301 #[serde(default)]
1302 pub stock_grant_count: usize,
1303 #[serde(default)]
1305 pub revenue_contract_count: usize,
1306 #[serde(default)]
1307 pub impairment_test_count: usize,
1308 #[serde(default)]
1309 pub business_combination_count: usize,
1310 #[serde(default)]
1311 pub ecl_model_count: usize,
1312 #[serde(default)]
1313 pub provision_count: usize,
1314 #[serde(default)]
1316 pub production_order_count: usize,
1317 #[serde(default)]
1318 pub quality_inspection_count: usize,
1319 #[serde(default)]
1320 pub cycle_count_count: usize,
1321 #[serde(default)]
1322 pub bom_component_count: usize,
1323 #[serde(default)]
1324 pub inventory_movement_count: usize,
1325 #[serde(default)]
1327 pub sales_quote_count: usize,
1328 #[serde(default)]
1329 pub kpi_count: usize,
1330 #[serde(default)]
1331 pub budget_line_count: usize,
1332 #[serde(default)]
1334 pub tax_jurisdiction_count: usize,
1335 #[serde(default)]
1336 pub tax_code_count: usize,
1337 #[serde(default)]
1339 pub esg_emission_count: usize,
1340 #[serde(default)]
1341 pub esg_disclosure_count: usize,
1342 #[serde(default)]
1344 pub ic_matched_pair_count: usize,
1345 #[serde(default)]
1346 pub ic_elimination_count: usize,
1347 #[serde(default)]
1349 pub ic_transaction_count: usize,
1350 #[serde(default)]
1352 pub fa_subledger_count: usize,
1353 #[serde(default)]
1355 pub inventory_subledger_count: usize,
1356 #[serde(default)]
1358 pub treasury_debt_instrument_count: usize,
1359 #[serde(default)]
1361 pub treasury_hedging_instrument_count: usize,
1362 #[serde(default)]
1364 pub project_count: usize,
1365 #[serde(default)]
1367 pub project_change_order_count: usize,
1368 #[serde(default)]
1370 pub tax_provision_count: usize,
1371 #[serde(default)]
1373 pub opening_balance_count: usize,
1374 #[serde(default)]
1376 pub subledger_reconciliation_count: usize,
1377 #[serde(default)]
1379 pub tax_line_count: usize,
1380 #[serde(default)]
1382 pub project_cost_line_count: usize,
1383 #[serde(default)]
1385 pub cash_position_count: usize,
1386 #[serde(default)]
1388 pub cash_forecast_count: usize,
1389 #[serde(default)]
1391 pub cash_pool_count: usize,
1392 #[serde(default)]
1394 pub process_evolution_event_count: usize,
1395 #[serde(default)]
1397 pub organizational_event_count: usize,
1398 #[serde(default)]
1400 pub counterfactual_pair_count: usize,
1401 #[serde(default)]
1403 pub red_flag_count: usize,
1404 #[serde(default)]
1406 pub collusion_ring_count: usize,
1407 #[serde(default)]
1409 pub temporal_version_chain_count: usize,
1410 #[serde(default)]
1412 pub entity_relationship_node_count: usize,
1413 #[serde(default)]
1415 pub entity_relationship_edge_count: usize,
1416 #[serde(default)]
1418 pub cross_process_link_count: usize,
1419 #[serde(default)]
1421 pub disruption_event_count: usize,
1422 #[serde(default)]
1424 pub industry_gl_account_count: usize,
1425 #[serde(default)]
1427 pub period_close_je_count: usize,
1428}
1429
1430pub struct EnhancedOrchestrator {
1432 config: GeneratorConfig,
1433 phase_config: PhaseConfig,
1434 coa: Option<Arc<ChartOfAccounts>>,
1435 master_data: MasterDataSnapshot,
1436 seed: u64,
1437 multi_progress: Option<MultiProgress>,
1438 resource_guard: ResourceGuard,
1440 output_path: Option<PathBuf>,
1442 copula_generators: Vec<CopulaGeneratorSpec>,
1444 country_pack_registry: datasynth_core::CountryPackRegistry,
1446 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453 datasynth_config::validate_config(&config)?;
1454
1455 let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457 let resource_guard = Self::build_resource_guard(&config, None);
1459
1460 let country_pack_registry = match &config.country_packs {
1462 Some(cp) => {
1463 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464 .map_err(|e| SynthError::config(e.to_string()))?
1465 }
1466 None => datasynth_core::CountryPackRegistry::builtin_only()
1467 .map_err(|e| SynthError::config(e.to_string()))?,
1468 };
1469
1470 Ok(Self {
1471 config,
1472 phase_config,
1473 coa: None,
1474 master_data: MasterDataSnapshot::default(),
1475 seed,
1476 multi_progress: None,
1477 resource_guard,
1478 output_path: None,
1479 copula_generators: Vec::new(),
1480 country_pack_registry,
1481 phase_sink: None,
1482 })
1483 }
1484
1485 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487 Self::new(config, PhaseConfig::default())
1488 }
1489
1490 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492 self.phase_sink = Some(sink);
1493 self
1494 }
1495
1496 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498 self.phase_sink = Some(sink);
1499 }
1500
1501 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503 if let Some(ref sink) = self.phase_sink {
1504 for item in items {
1505 if let Ok(value) = serde_json::to_value(item) {
1506 if let Err(e) = sink.emit(phase, type_name, &value) {
1507 warn!(
1508 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509 );
1510 }
1511 }
1512 }
1513 if let Err(e) = sink.phase_complete(phase) {
1514 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515 }
1516 }
1517 }
1518
1519 pub fn with_progress(mut self, show: bool) -> Self {
1521 self.phase_config.show_progress = show;
1522 if show {
1523 self.multi_progress = Some(MultiProgress::new());
1524 }
1525 self
1526 }
1527
1528 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530 let path = path.into();
1531 self.output_path = Some(path.clone());
1532 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534 self
1535 }
1536
1537 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539 &self.country_pack_registry
1540 }
1541
1542 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544 self.country_pack_registry.get_by_str(country)
1545 }
1546
1547 fn primary_country_code(&self) -> &str {
1550 self.config
1551 .companies
1552 .first()
1553 .map(|c| c.country.as_str())
1554 .unwrap_or("US")
1555 }
1556
1557 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559 self.country_pack_for(self.primary_country_code())
1560 }
1561
1562 fn resolve_coa_framework(&self) -> CoAFramework {
1564 if self.config.accounting_standards.enabled {
1565 match self.config.accounting_standards.framework {
1566 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567 return CoAFramework::FrenchPcg;
1568 }
1569 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570 return CoAFramework::GermanSkr04;
1571 }
1572 _ => {}
1573 }
1574 }
1575 let pack = self.primary_pack();
1577 match pack.accounting.framework.as_str() {
1578 "french_gaap" => CoAFramework::FrenchPcg,
1579 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580 _ => CoAFramework::UsGaap,
1581 }
1582 }
1583
1584 pub fn has_copulas(&self) -> bool {
1589 !self.copula_generators.is_empty()
1590 }
1591
1592 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598 &self.copula_generators
1599 }
1600
1601 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605 &mut self.copula_generators
1606 }
1607
1608 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612 self.copula_generators
1613 .iter_mut()
1614 .find(|c| c.name == copula_name)
1615 .map(|c| c.generator.sample())
1616 }
1617
1618 pub fn from_fingerprint(
1641 fingerprint_path: &std::path::Path,
1642 phase_config: PhaseConfig,
1643 scale: f64,
1644 ) -> SynthResult<Self> {
1645 info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647 let reader = FingerprintReader::new();
1649 let fingerprint = reader
1650 .read_from_file(fingerprint_path)
1651 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654 }
1655
1656 pub fn from_fingerprint_data(
1663 fingerprint: Fingerprint,
1664 phase_config: PhaseConfig,
1665 scale: f64,
1666 ) -> SynthResult<Self> {
1667 info!(
1668 "Synthesizing config from fingerprint (version: {}, tables: {})",
1669 fingerprint.manifest.version,
1670 fingerprint.schema.tables.len()
1671 );
1672
1673 let seed: u64 = rand::random();
1675 info!("Fingerprint synthesis seed: {}", seed);
1676
1677 let options = SynthesisOptions {
1679 scale,
1680 seed: Some(seed),
1681 preserve_correlations: true,
1682 inject_anomalies: true,
1683 };
1684 let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686 let synthesis_result = synthesizer
1688 .synthesize_full(&fingerprint, seed)
1689 .map_err(|e| {
1690 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691 })?;
1692
1693 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695 Self::base_config_for_industry(industry)
1696 } else {
1697 Self::base_config_for_industry("manufacturing")
1698 };
1699
1700 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703 info!(
1705 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706 fingerprint.schema.tables.len(),
1707 scale,
1708 synthesis_result.copula_generators.len()
1709 );
1710
1711 if !synthesis_result.copula_generators.is_empty() {
1712 for spec in &synthesis_result.copula_generators {
1713 info!(
1714 " Copula '{}' for table '{}': {} columns",
1715 spec.name,
1716 spec.table,
1717 spec.columns.len()
1718 );
1719 }
1720 }
1721
1722 let mut orchestrator = Self::new(config, phase_config)?;
1724
1725 orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728 Ok(orchestrator)
1729 }
1730
1731 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733 use datasynth_config::presets::create_preset;
1734 use datasynth_config::TransactionVolume;
1735 use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737 let sector = match industry.to_lowercase().as_str() {
1738 "manufacturing" => IndustrySector::Manufacturing,
1739 "retail" => IndustrySector::Retail,
1740 "financial" | "financial_services" => IndustrySector::FinancialServices,
1741 "healthcare" => IndustrySector::Healthcare,
1742 "technology" | "tech" => IndustrySector::Technology,
1743 _ => IndustrySector::Manufacturing,
1744 };
1745
1746 create_preset(
1748 sector,
1749 1, 12, CoAComplexity::Medium,
1752 TransactionVolume::TenK,
1753 )
1754 }
1755
1756 fn apply_config_patch(
1758 mut config: GeneratorConfig,
1759 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760 ) -> GeneratorConfig {
1761 use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763 for (key, value) in patch.values() {
1764 match (key.as_str(), value) {
1765 ("transactions.count", ConfigValue::Integer(n)) => {
1768 info!(
1769 "Fingerprint suggests {} transactions (apply via company volumes)",
1770 n
1771 );
1772 }
1773 ("global.period_months", ConfigValue::Integer(n)) => {
1774 config.global.period_months = (*n).clamp(1, 120) as u32;
1775 }
1776 ("global.start_date", ConfigValue::String(s)) => {
1777 config.global.start_date = s.clone();
1778 }
1779 ("global.seed", ConfigValue::Integer(n)) => {
1780 config.global.seed = Some(*n as u64);
1781 }
1782 ("fraud.enabled", ConfigValue::Bool(b)) => {
1783 config.fraud.enabled = *b;
1784 }
1785 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786 config.fraud.fraud_rate = *f;
1787 }
1788 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789 config.data_quality.enabled = *b;
1790 }
1791 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793 config.fraud.enabled = *b;
1794 }
1795 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796 config.fraud.fraud_rate = *f;
1797 }
1798 _ => {
1799 debug!("Ignoring unknown config patch key: {}", key);
1800 }
1801 }
1802 }
1803
1804 config
1805 }
1806
1807 fn build_resource_guard(
1809 config: &GeneratorConfig,
1810 output_path: Option<PathBuf>,
1811 ) -> ResourceGuard {
1812 let mut builder = ResourceGuardBuilder::new();
1813
1814 if config.global.memory_limit_mb > 0 {
1816 builder = builder.memory_limit(config.global.memory_limit_mb);
1817 }
1818
1819 if let Some(path) = output_path {
1821 builder = builder.output_path(path).min_free_disk(100); }
1823
1824 builder = builder.conservative();
1826
1827 builder.build()
1828 }
1829
1830 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835 self.resource_guard.check()
1836 }
1837
1838 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840 let level = self.resource_guard.check()?;
1841
1842 if level != DegradationLevel::Normal {
1843 warn!(
1844 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845 phase,
1846 level,
1847 self.resource_guard.current_memory_mb(),
1848 self.resource_guard.available_disk_mb()
1849 );
1850 }
1851
1852 Ok(level)
1853 }
1854
1855 fn get_degradation_actions(&self) -> DegradationActions {
1857 self.resource_guard.get_actions()
1858 }
1859
1860 fn check_memory_limit(&self) -> SynthResult<()> {
1862 self.check_resources()?;
1863 Ok(())
1864 }
1865
1866 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868 info!("Starting enhanced generation workflow");
1869 info!(
1870 "Config: industry={:?}, period_months={}, companies={}",
1871 self.config.global.industry,
1872 self.config.global.period_months,
1873 self.config.companies.len()
1874 );
1875
1876 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879 datasynth_core::serde_decimal::set_numeric_native(is_native);
1880 struct NumericModeGuard;
1881 impl Drop for NumericModeGuard {
1882 fn drop(&mut self) {
1883 datasynth_core::serde_decimal::set_numeric_native(false);
1884 }
1885 }
1886 let _numeric_guard = if is_native {
1887 Some(NumericModeGuard)
1888 } else {
1889 None
1890 };
1891
1892 let initial_level = self.check_resources_with_log("initial")?;
1894 if initial_level == DegradationLevel::Emergency {
1895 return Err(SynthError::resource(
1896 "Insufficient resources to start generation",
1897 ));
1898 }
1899
1900 let mut stats = EnhancedGenerationStatistics {
1901 companies_count: self.config.companies.len(),
1902 period_months: self.config.global.period_months,
1903 ..Default::default()
1904 };
1905
1906 let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909 self.phase_master_data(&mut stats)?;
1911
1912 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917 let (mut document_flows, mut subledger, fa_journal_entries) =
1919 self.phase_document_flows(&mut stats)?;
1920
1921 self.emit_phase_items(
1923 "document_flows",
1924 "PurchaseOrder",
1925 &document_flows.purchase_orders,
1926 );
1927 self.emit_phase_items(
1928 "document_flows",
1929 "GoodsReceipt",
1930 &document_flows.goods_receipts,
1931 );
1932 self.emit_phase_items(
1933 "document_flows",
1934 "VendorInvoice",
1935 &document_flows.vendor_invoices,
1936 );
1937 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948 .iter()
1949 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950 .collect();
1951 if !opening_balance_jes.is_empty() {
1952 debug!(
1953 "Prepending {} opening balance JEs to entries",
1954 opening_balance_jes.len()
1955 );
1956 }
1957
1958 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961 if !opening_balance_jes.is_empty() {
1964 let mut combined = opening_balance_jes;
1965 combined.extend(entries);
1966 entries = combined;
1967 }
1968
1969 if !fa_journal_entries.is_empty() {
1971 debug!(
1972 "Appending {} FA acquisition JEs to main entries",
1973 fa_journal_entries.len()
1974 );
1975 entries.extend(fa_journal_entries);
1976 }
1977
1978 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981 let actions = self.get_degradation_actions();
1983
1984 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987 if !sourcing.contracts.is_empty() {
1990 let mut linked_count = 0usize;
1991 let po_vendor_pairs: Vec<(String, String)> = document_flows
1993 .p2p_chains
1994 .iter()
1995 .map(|chain| {
1996 (
1997 chain.purchase_order.vendor_id.clone(),
1998 chain.purchase_order.header.document_id.clone(),
1999 )
2000 })
2001 .collect();
2002
2003 for chain in &mut document_flows.p2p_chains {
2004 if chain.purchase_order.contract_id.is_none() {
2005 if let Some(contract) = sourcing
2006 .contracts
2007 .iter()
2008 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009 {
2010 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011 linked_count += 1;
2012 }
2013 }
2014 }
2015
2016 for contract in &mut sourcing.contracts {
2018 let po_ids: Vec<String> = po_vendor_pairs
2019 .iter()
2020 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021 .map(|(_, po_id)| po_id.clone())
2022 .collect();
2023 if !po_ids.is_empty() {
2024 contract.purchase_order_ids = po_ids;
2025 }
2026 }
2027
2028 if linked_count > 0 {
2029 debug!(
2030 "Linked {} purchase orders to S2C contracts by vendor match",
2031 linked_count
2032 );
2033 }
2034 }
2035
2036 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039 if !intercompany.seller_journal_entries.is_empty()
2041 || !intercompany.buyer_journal_entries.is_empty()
2042 {
2043 let ic_je_count = intercompany.seller_journal_entries.len()
2044 + intercompany.buyer_journal_entries.len();
2045 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047 debug!(
2048 "Appended {} IC journal entries to main entries",
2049 ic_je_count
2050 );
2051 }
2052
2053 if !intercompany.elimination_entries.is_empty() {
2055 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056 &intercompany.elimination_entries,
2057 );
2058 if !elim_jes.is_empty() {
2059 debug!(
2060 "Appended {} elimination journal entries to main entries",
2061 elim_jes.len()
2062 );
2063 let elim_debit: rust_decimal::Decimal =
2065 elim_jes.iter().map(|je| je.total_debit()).sum();
2066 let elim_credit: rust_decimal::Decimal =
2067 elim_jes.iter().map(|je| je.total_credit()).sum();
2068 if elim_debit != elim_credit {
2069 warn!(
2070 "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2071 elim_debit,
2072 elim_credit,
2073 elim_debit - elim_credit
2074 );
2075 }
2076 entries.extend(elim_jes);
2077 }
2078 }
2079
2080 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2082 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2083 document_flows
2084 .customer_invoices
2085 .extend(ic_docs.seller_invoices.iter().cloned());
2086 document_flows
2087 .purchase_orders
2088 .extend(ic_docs.buyer_orders.iter().cloned());
2089 document_flows
2090 .goods_receipts
2091 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2092 document_flows
2093 .vendor_invoices
2094 .extend(ic_docs.buyer_invoices.iter().cloned());
2095 debug!(
2096 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2097 ic_docs.seller_invoices.len(),
2098 ic_docs.buyer_orders.len(),
2099 ic_docs.buyer_goods_receipts.len(),
2100 ic_docs.buyer_invoices.len(),
2101 );
2102 }
2103 }
2104
2105 let hr = self.phase_hr_data(&mut stats)?;
2107
2108 if !hr.payroll_runs.is_empty() {
2110 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2111 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2112 entries.extend(payroll_jes);
2113 }
2114
2115 if !hr.pension_journal_entries.is_empty() {
2117 debug!(
2118 "Generated {} JEs from pension plans",
2119 hr.pension_journal_entries.len()
2120 );
2121 entries.extend(hr.pension_journal_entries.iter().cloned());
2122 }
2123
2124 if !hr.stock_comp_journal_entries.is_empty() {
2126 debug!(
2127 "Generated {} JEs from stock-based compensation",
2128 hr.stock_comp_journal_entries.len()
2129 );
2130 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2131 }
2132
2133 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2135
2136 if !manufacturing_snap.production_orders.is_empty() {
2138 let currency = self
2139 .config
2140 .companies
2141 .first()
2142 .map(|c| c.currency.as_str())
2143 .unwrap_or("USD");
2144 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2145 &manufacturing_snap.production_orders,
2146 &manufacturing_snap.quality_inspections,
2147 currency,
2148 );
2149 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2150 entries.extend(mfg_jes);
2151 }
2152
2153 if !manufacturing_snap.quality_inspections.is_empty() {
2155 let framework = match self.config.accounting_standards.framework {
2156 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2157 _ => "US_GAAP",
2158 };
2159 for company in &self.config.companies {
2160 let company_orders: Vec<_> = manufacturing_snap
2161 .production_orders
2162 .iter()
2163 .filter(|o| o.company_code == company.code)
2164 .cloned()
2165 .collect();
2166 let company_inspections: Vec<_> = manufacturing_snap
2167 .quality_inspections
2168 .iter()
2169 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2170 .cloned()
2171 .collect();
2172 if company_inspections.is_empty() {
2173 continue;
2174 }
2175 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2176 let warranty_result = warranty_gen.generate(
2177 &company.code,
2178 &company_orders,
2179 &company_inspections,
2180 &company.currency,
2181 framework,
2182 );
2183 if !warranty_result.journal_entries.is_empty() {
2184 debug!(
2185 "Generated {} warranty provision JEs for {}",
2186 warranty_result.journal_entries.len(),
2187 company.code
2188 );
2189 entries.extend(warranty_result.journal_entries);
2190 }
2191 }
2192 }
2193
2194 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2196 {
2197 let cogs_currency = self
2198 .config
2199 .companies
2200 .first()
2201 .map(|c| c.currency.as_str())
2202 .unwrap_or("USD");
2203 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2204 &document_flows.deliveries,
2205 &manufacturing_snap.production_orders,
2206 cogs_currency,
2207 );
2208 if !cogs_jes.is_empty() {
2209 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2210 entries.extend(cogs_jes);
2211 }
2212 }
2213
2214 if !manufacturing_snap.inventory_movements.is_empty()
2220 && !subledger.inventory_positions.is_empty()
2221 {
2222 use datasynth_core::models::MovementType as MfgMovementType;
2223 let mut receipt_count = 0usize;
2224 let mut issue_count = 0usize;
2225 for movement in &manufacturing_snap.inventory_movements {
2226 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2228 p.material_id == movement.material_code
2229 && p.company_code == movement.entity_code
2230 }) {
2231 match movement.movement_type {
2232 MfgMovementType::GoodsReceipt => {
2233 pos.add_quantity(
2235 movement.quantity,
2236 movement.value,
2237 movement.movement_date,
2238 );
2239 receipt_count += 1;
2240 }
2241 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2242 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2244 issue_count += 1;
2245 }
2246 _ => {}
2247 }
2248 }
2249 }
2250 debug!(
2251 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2252 manufacturing_snap.inventory_movements.len(),
2253 receipt_count,
2254 issue_count,
2255 );
2256 }
2257
2258 if !entries.is_empty() {
2261 stats.total_entries = entries.len() as u64;
2262 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2263 debug!(
2264 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2265 stats.total_entries, stats.total_line_items
2266 );
2267 }
2268
2269 if self.config.internal_controls.enabled && !entries.is_empty() {
2271 info!("Phase 7b: Applying internal controls to journal entries");
2272 let control_config = ControlGeneratorConfig {
2273 exception_rate: self.config.internal_controls.exception_rate,
2274 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2275 enable_sox_marking: true,
2276 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2277 self.config.internal_controls.sox_materiality_threshold,
2278 )
2279 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2280 ..Default::default()
2281 };
2282 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2283 for entry in &mut entries {
2284 control_gen.apply_controls(entry, &coa);
2285 }
2286 let with_controls = entries
2287 .iter()
2288 .filter(|e| !e.header.control_ids.is_empty())
2289 .count();
2290 info!(
2291 "Applied controls to {} entries ({} with control IDs assigned)",
2292 entries.len(),
2293 with_controls
2294 );
2295 }
2296
2297 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2301 .iter()
2302 .filter(|e| e.header.sod_violation)
2303 .filter_map(|e| {
2304 e.header.sod_conflict_type.map(|ct| {
2305 use datasynth_core::models::{RiskLevel, SodViolation};
2306 let severity = match ct {
2307 datasynth_core::models::SodConflictType::PaymentReleaser
2308 | datasynth_core::models::SodConflictType::RequesterApprover => {
2309 RiskLevel::Critical
2310 }
2311 datasynth_core::models::SodConflictType::PreparerApprover
2312 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2313 | datasynth_core::models::SodConflictType::JournalEntryPoster
2314 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2315 RiskLevel::High
2316 }
2317 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2318 RiskLevel::Medium
2319 }
2320 };
2321 let action = format!(
2322 "SoD conflict {:?} on entry {} ({})",
2323 ct, e.header.document_id, e.header.company_code
2324 );
2325 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2326 })
2327 })
2328 .collect();
2329 if !sod_violations.is_empty() {
2330 info!(
2331 "Phase 7c: Extracted {} SoD violations from {} entries",
2332 sod_violations.len(),
2333 entries.len()
2334 );
2335 }
2336
2337 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2339
2340 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2342
2343 self.emit_phase_items(
2345 "anomaly_injection",
2346 "LabeledAnomaly",
2347 &anomaly_labels.labels,
2348 );
2349
2350 {
2354 use std::collections::HashMap;
2355 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2357 for je in &entries {
2358 if je.header.is_fraud {
2359 if let Some(ref fraud_type) = je.header.fraud_type {
2360 if let Some(ref reference) = je.header.reference {
2362 fraud_map.insert(reference.clone(), *fraud_type);
2363 }
2364 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2366 }
2367 }
2368 }
2369 if !fraud_map.is_empty() {
2370 let mut propagated = 0usize;
2371 macro_rules! propagate_to {
2373 ($collection:expr) => {
2374 for doc in &mut $collection {
2375 if doc.header.propagate_fraud(&fraud_map) {
2376 propagated += 1;
2377 }
2378 }
2379 };
2380 }
2381 propagate_to!(document_flows.purchase_orders);
2382 propagate_to!(document_flows.goods_receipts);
2383 propagate_to!(document_flows.vendor_invoices);
2384 propagate_to!(document_flows.payments);
2385 propagate_to!(document_flows.sales_orders);
2386 propagate_to!(document_flows.deliveries);
2387 propagate_to!(document_flows.customer_invoices);
2388 if propagated > 0 {
2389 info!(
2390 "Propagated fraud labels to {} document flow records",
2391 propagated
2392 );
2393 }
2394 }
2395 }
2396
2397 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2399
2400 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2402
2403 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2405
2406 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2408
2409 let balance_validation = self.phase_balance_validation(&entries)?;
2411
2412 let subledger_reconciliation =
2414 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2415
2416 let (data_quality_stats, quality_issues) =
2418 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2419
2420 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2422
2423 let audit = self.phase_audit_data(&entries, &mut stats)?;
2425
2426 let mut banking = self.phase_banking_data(&mut stats)?;
2428
2429 if self.phase_config.generate_banking
2434 && !document_flows.payments.is_empty()
2435 && !banking.accounts.is_empty()
2436 {
2437 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2438 if bridge_rate > 0.0 {
2439 let mut bridge =
2440 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2441 self.seed,
2442 );
2443 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2444 &document_flows.payments,
2445 &banking.customers,
2446 &banking.accounts,
2447 bridge_rate,
2448 );
2449 info!(
2450 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2451 bridge_stats.bridged_count,
2452 bridge_stats.transactions_emitted,
2453 bridge_stats.fraud_propagated,
2454 );
2455 let bridged_count = bridged_txns.len();
2456 banking.transactions.extend(bridged_txns);
2457
2458 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2461 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2462 &mut banking.transactions,
2463 );
2464 }
2465
2466 banking.suspicious_count = banking
2468 .transactions
2469 .iter()
2470 .filter(|t| t.is_suspicious)
2471 .count();
2472 stats.banking_transaction_count = banking.transactions.len();
2473 stats.banking_suspicious_count = banking.suspicious_count;
2474 }
2475 }
2476
2477 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2479
2480 self.phase_llm_enrichment(&mut stats);
2482
2483 self.phase_diffusion_enhancement(&mut stats);
2485
2486 self.phase_causal_overlay(&mut stats);
2488
2489 let mut financial_reporting = self.phase_financial_reporting(
2493 &document_flows,
2494 &entries,
2495 &coa,
2496 &hr,
2497 &audit,
2498 &mut stats,
2499 )?;
2500
2501 {
2503 use datasynth_core::models::StatementType;
2504 for stmt in &financial_reporting.consolidated_statements {
2505 if stmt.statement_type == StatementType::BalanceSheet {
2506 let total_assets: rust_decimal::Decimal = stmt
2507 .line_items
2508 .iter()
2509 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2510 .map(|li| li.amount)
2511 .sum();
2512 let total_le: rust_decimal::Decimal = stmt
2513 .line_items
2514 .iter()
2515 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2516 .map(|li| li.amount)
2517 .sum();
2518 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2519 warn!(
2520 "BS equation imbalance: assets={}, L+E={}",
2521 total_assets, total_le
2522 );
2523 }
2524 }
2525 }
2526 }
2527
2528 let accounting_standards =
2530 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2531
2532 if !accounting_standards.ecl_journal_entries.is_empty() {
2534 debug!(
2535 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2536 accounting_standards.ecl_journal_entries.len()
2537 );
2538 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2539 }
2540
2541 if !accounting_standards.provision_journal_entries.is_empty() {
2543 debug!(
2544 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2545 accounting_standards.provision_journal_entries.len()
2546 );
2547 entries.extend(
2548 accounting_standards
2549 .provision_journal_entries
2550 .iter()
2551 .cloned(),
2552 );
2553 }
2554
2555 let ocpm = self.phase_ocpm_events(
2557 &document_flows,
2558 &sourcing,
2559 &hr,
2560 &manufacturing_snap,
2561 &banking,
2562 &audit,
2563 &financial_reporting,
2564 &mut stats,
2565 )?;
2566
2567 if let Some(ref event_log) = ocpm.event_log {
2569 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2570 }
2571
2572 let sales_kpi_budgets =
2574 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2575
2576 let treasury =
2580 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2581
2582 if !treasury.journal_entries.is_empty() {
2584 debug!(
2585 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2586 treasury.journal_entries.len()
2587 );
2588 entries.extend(treasury.journal_entries.iter().cloned());
2589 }
2590
2591 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2593
2594 if !tax.tax_posting_journal_entries.is_empty() {
2596 debug!(
2597 "Merging {} tax posting JEs into GL",
2598 tax.tax_posting_journal_entries.len()
2599 );
2600 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2601 }
2602
2603 {
2607 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2608
2609 let framework_str = {
2610 use datasynth_config::schema::AccountingFrameworkConfig;
2611 match self
2612 .config
2613 .accounting_standards
2614 .framework
2615 .unwrap_or_default()
2616 {
2617 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2618 "IFRS"
2619 }
2620 _ => "US_GAAP",
2621 }
2622 };
2623
2624 let depreciation_total: rust_decimal::Decimal = entries
2626 .iter()
2627 .filter(|je| je.header.document_type == "CL")
2628 .flat_map(|je| je.lines.iter())
2629 .filter(|l| l.gl_account.starts_with("6000"))
2630 .map(|l| l.debit_amount)
2631 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2632
2633 let interest_paid: rust_decimal::Decimal = entries
2635 .iter()
2636 .flat_map(|je| je.lines.iter())
2637 .filter(|l| l.gl_account.starts_with("7100"))
2638 .map(|l| l.debit_amount)
2639 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2640
2641 let tax_paid: rust_decimal::Decimal = entries
2643 .iter()
2644 .flat_map(|je| je.lines.iter())
2645 .filter(|l| l.gl_account.starts_with("8000"))
2646 .map(|l| l.debit_amount)
2647 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2648
2649 let capex: rust_decimal::Decimal = entries
2651 .iter()
2652 .flat_map(|je| je.lines.iter())
2653 .filter(|l| l.gl_account.starts_with("1500"))
2654 .map(|l| l.debit_amount)
2655 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2656
2657 let dividends_paid: rust_decimal::Decimal = entries
2659 .iter()
2660 .flat_map(|je| je.lines.iter())
2661 .filter(|l| l.gl_account == "2170")
2662 .map(|l| l.debit_amount)
2663 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2664
2665 let cf_data = CashFlowSourceData {
2666 depreciation_total,
2667 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
2669 delta_ap: rust_decimal::Decimal::ZERO,
2670 delta_inventory: rust_decimal::Decimal::ZERO,
2671 capex,
2672 debt_issuance: rust_decimal::Decimal::ZERO,
2673 debt_repayment: rust_decimal::Decimal::ZERO,
2674 interest_paid,
2675 tax_paid,
2676 dividends_paid,
2677 framework: framework_str.to_string(),
2678 };
2679
2680 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2681 if !enhanced_cf_items.is_empty() {
2682 use datasynth_core::models::StatementType;
2684 let merge_count = enhanced_cf_items.len();
2685 for stmt in financial_reporting
2686 .financial_statements
2687 .iter_mut()
2688 .chain(financial_reporting.consolidated_statements.iter_mut())
2689 .chain(
2690 financial_reporting
2691 .standalone_statements
2692 .values_mut()
2693 .flat_map(|v| v.iter_mut()),
2694 )
2695 {
2696 if stmt.statement_type == StatementType::CashFlowStatement {
2697 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2698 }
2699 }
2700 info!(
2701 "Enhanced cash flow: {} supplementary items merged into CF statements",
2702 merge_count
2703 );
2704 }
2705 }
2706
2707 self.generate_notes_to_financial_statements(
2710 &mut financial_reporting,
2711 &accounting_standards,
2712 &tax,
2713 &hr,
2714 &audit,
2715 &treasury,
2716 );
2717
2718 if self.config.companies.len() >= 2 && !entries.is_empty() {
2722 let companies: Vec<(String, String)> = self
2723 .config
2724 .companies
2725 .iter()
2726 .map(|c| (c.code.clone(), c.name.clone()))
2727 .collect();
2728 let ic_elim: rust_decimal::Decimal =
2729 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2730 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2731 .unwrap_or(NaiveDate::MIN);
2732 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2733 let period_label = format!(
2734 "{}-{:02}",
2735 end_date.year(),
2736 (end_date - chrono::Days::new(1)).month()
2737 );
2738
2739 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2740 let (je_segments, je_recon) =
2741 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2742 if !je_segments.is_empty() {
2743 info!(
2744 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2745 je_segments.len(),
2746 ic_elim,
2747 );
2748 if financial_reporting.segment_reports.is_empty() {
2750 financial_reporting.segment_reports = je_segments;
2751 financial_reporting.segment_reconciliations = vec![je_recon];
2752 } else {
2753 financial_reporting.segment_reports.extend(je_segments);
2754 financial_reporting.segment_reconciliations.push(je_recon);
2755 }
2756 }
2757 }
2758
2759 let esg_snap =
2761 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2762
2763 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2765
2766 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2768
2769 let disruption_events = self.phase_disruption_events(&mut stats)?;
2771
2772 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2774
2775 let (entity_relationship_graph, cross_process_links) =
2777 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2778
2779 let industry_output = self.phase_industry_data(&mut stats);
2781
2782 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2784
2785 self.phase_hypergraph_export(
2787 &coa,
2788 &entries,
2789 &document_flows,
2790 &sourcing,
2791 &hr,
2792 &manufacturing_snap,
2793 &banking,
2794 &audit,
2795 &financial_reporting,
2796 &ocpm,
2797 &compliance_regulations,
2798 &mut stats,
2799 )?;
2800
2801 if self.phase_config.generate_graph_export {
2804 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2805 }
2806
2807 if self.config.streaming.enabled {
2809 info!("Note: streaming config is enabled but batch mode does not use it");
2810 }
2811 if self.config.vendor_network.enabled {
2812 debug!("Vendor network config available; relationship graph generation is partial");
2813 }
2814 if self.config.customer_segmentation.enabled {
2815 debug!("Customer segmentation config available; segment-aware generation is partial");
2816 }
2817
2818 let resource_stats = self.resource_guard.stats();
2820 info!(
2821 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2822 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2823 resource_stats.disk.estimated_bytes_written,
2824 resource_stats.degradation_level
2825 );
2826
2827 if let Some(ref sink) = self.phase_sink {
2829 if let Err(e) = sink.flush() {
2830 warn!("Stream sink flush failed: {e}");
2831 }
2832 }
2833
2834 let lineage = self.build_lineage_graph();
2836
2837 let gate_result = if self.config.quality_gates.enabled {
2839 let profile_name = &self.config.quality_gates.profile;
2840 match datasynth_eval::gates::get_profile(profile_name) {
2841 Some(profile) => {
2842 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2844
2845 if balance_validation.validated {
2847 eval.coherence.balance =
2848 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2849 equation_balanced: balance_validation.is_balanced,
2850 max_imbalance: (balance_validation.total_debits
2851 - balance_validation.total_credits)
2852 .abs(),
2853 periods_evaluated: 1,
2854 periods_imbalanced: if balance_validation.is_balanced {
2855 0
2856 } else {
2857 1
2858 },
2859 period_results: Vec::new(),
2860 companies_evaluated: self.config.companies.len(),
2861 });
2862 }
2863
2864 eval.coherence.passes = balance_validation.is_balanced;
2866 if !balance_validation.is_balanced {
2867 eval.coherence
2868 .failures
2869 .push("Balance sheet equation not satisfied".to_string());
2870 }
2871
2872 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2874 eval.statistical.passes = !entries.is_empty();
2875
2876 eval.quality.overall_score = 0.9; eval.quality.passes = true;
2879
2880 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2881 info!(
2882 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2883 profile_name, result.gates_passed, result.gates_total, result.summary
2884 );
2885 Some(result)
2886 }
2887 None => {
2888 warn!(
2889 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2890 profile_name
2891 );
2892 None
2893 }
2894 }
2895 } else {
2896 None
2897 };
2898
2899 let internal_controls = if self.config.internal_controls.enabled {
2901 InternalControl::standard_controls()
2902 } else {
2903 Vec::new()
2904 };
2905
2906 Ok(EnhancedGenerationResult {
2907 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2908 master_data: std::mem::take(&mut self.master_data),
2909 document_flows,
2910 subledger,
2911 ocpm,
2912 audit,
2913 banking,
2914 graph_export,
2915 sourcing,
2916 financial_reporting,
2917 hr,
2918 accounting_standards,
2919 manufacturing: manufacturing_snap,
2920 sales_kpi_budgets,
2921 tax,
2922 esg: esg_snap,
2923 treasury,
2924 project_accounting,
2925 process_evolution,
2926 organizational_events,
2927 disruption_events,
2928 intercompany,
2929 journal_entries: entries,
2930 anomaly_labels,
2931 balance_validation,
2932 data_quality_stats,
2933 quality_issues,
2934 statistics: stats,
2935 lineage: Some(lineage),
2936 gate_result,
2937 internal_controls,
2938 sod_violations,
2939 opening_balances,
2940 subledger_reconciliation,
2941 counterfactual_pairs,
2942 red_flags,
2943 collusion_rings,
2944 temporal_vendor_chains,
2945 entity_relationship_graph,
2946 cross_process_links,
2947 industry_output,
2948 compliance_regulations,
2949 })
2950 }
2951
2952 fn phase_chart_of_accounts(
2958 &mut self,
2959 stats: &mut EnhancedGenerationStatistics,
2960 ) -> SynthResult<Arc<ChartOfAccounts>> {
2961 info!("Phase 1: Generating Chart of Accounts");
2962 let coa = self.generate_coa()?;
2963 stats.accounts_count = coa.account_count();
2964 info!(
2965 "Chart of Accounts generated: {} accounts",
2966 stats.accounts_count
2967 );
2968 self.check_resources_with_log("post-coa")?;
2969 Ok(coa)
2970 }
2971
2972 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2974 if self.phase_config.generate_master_data {
2975 info!("Phase 2: Generating Master Data");
2976 self.generate_master_data()?;
2977 stats.vendor_count = self.master_data.vendors.len();
2978 stats.customer_count = self.master_data.customers.len();
2979 stats.material_count = self.master_data.materials.len();
2980 stats.asset_count = self.master_data.assets.len();
2981 stats.employee_count = self.master_data.employees.len();
2982 info!(
2983 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2984 stats.vendor_count, stats.customer_count, stats.material_count,
2985 stats.asset_count, stats.employee_count
2986 );
2987 self.check_resources_with_log("post-master-data")?;
2988 } else {
2989 debug!("Phase 2: Skipped (master data generation disabled)");
2990 }
2991 Ok(())
2992 }
2993
2994 fn phase_document_flows(
2996 &mut self,
2997 stats: &mut EnhancedGenerationStatistics,
2998 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2999 let mut document_flows = DocumentFlowSnapshot::default();
3000 let mut subledger = SubledgerSnapshot::default();
3001 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3004
3005 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3006 info!("Phase 3: Generating Document Flows");
3007 self.generate_document_flows(&mut document_flows)?;
3008 stats.p2p_chain_count = document_flows.p2p_chains.len();
3009 stats.o2c_chain_count = document_flows.o2c_chains.len();
3010 info!(
3011 "Document flows generated: {} P2P chains, {} O2C chains",
3012 stats.p2p_chain_count, stats.o2c_chain_count
3013 );
3014
3015 debug!("Phase 3b: Linking document flows to subledgers");
3017 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3018 stats.ap_invoice_count = subledger.ap_invoices.len();
3019 stats.ar_invoice_count = subledger.ar_invoices.len();
3020 debug!(
3021 "Subledgers linked: {} AP invoices, {} AR invoices",
3022 stats.ap_invoice_count, stats.ar_invoice_count
3023 );
3024
3025 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3030 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3031 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3032 debug!("Payment settlements applied to AP and AR subledgers");
3033
3034 if let Ok(start_date) =
3037 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3038 {
3039 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3040 - chrono::Days::new(1);
3041 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3042 for company in &self.config.companies {
3049 let ar_report = ARAgingReport::from_invoices(
3050 company.code.clone(),
3051 &subledger.ar_invoices,
3052 as_of_date,
3053 );
3054 subledger.ar_aging_reports.push(ar_report);
3055
3056 let ap_report = APAgingReport::from_invoices(
3057 company.code.clone(),
3058 &subledger.ap_invoices,
3059 as_of_date,
3060 );
3061 subledger.ap_aging_reports.push(ap_report);
3062 }
3063 debug!(
3064 "AR/AP aging reports built: {} AR, {} AP",
3065 subledger.ar_aging_reports.len(),
3066 subledger.ap_aging_reports.len()
3067 );
3068
3069 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3071 {
3072 use datasynth_generators::DunningGenerator;
3073 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3074 for company in &self.config.companies {
3075 let currency = company.currency.as_str();
3076 let mut company_invoices: Vec<
3079 datasynth_core::models::subledger::ar::ARInvoice,
3080 > = subledger
3081 .ar_invoices
3082 .iter()
3083 .filter(|inv| inv.company_code == company.code)
3084 .cloned()
3085 .collect();
3086
3087 if company_invoices.is_empty() {
3088 continue;
3089 }
3090
3091 let result = dunning_gen.execute_dunning_run(
3092 &company.code,
3093 as_of_date,
3094 &mut company_invoices,
3095 currency,
3096 );
3097
3098 for updated in &company_invoices {
3100 if let Some(orig) = subledger
3101 .ar_invoices
3102 .iter_mut()
3103 .find(|i| i.invoice_number == updated.invoice_number)
3104 {
3105 orig.dunning_info = updated.dunning_info.clone();
3106 }
3107 }
3108
3109 subledger.dunning_runs.push(result.dunning_run);
3110 subledger.dunning_letters.extend(result.letters);
3111 dunning_journal_entries.extend(result.journal_entries);
3113 }
3114 debug!(
3115 "Dunning runs complete: {} runs, {} letters",
3116 subledger.dunning_runs.len(),
3117 subledger.dunning_letters.len()
3118 );
3119 }
3120 }
3121
3122 self.check_resources_with_log("post-document-flows")?;
3123 } else {
3124 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3125 }
3126
3127 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3129 if !self.master_data.assets.is_empty() {
3130 debug!("Generating FA subledger records");
3131 let company_code = self
3132 .config
3133 .companies
3134 .first()
3135 .map(|c| c.code.as_str())
3136 .unwrap_or("1000");
3137 let currency = self
3138 .config
3139 .companies
3140 .first()
3141 .map(|c| c.currency.as_str())
3142 .unwrap_or("USD");
3143
3144 let mut fa_gen = datasynth_generators::FAGenerator::new(
3145 datasynth_generators::FAGeneratorConfig::default(),
3146 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3147 );
3148
3149 for asset in &self.master_data.assets {
3150 let (record, je) = fa_gen.generate_asset_acquisition(
3151 company_code,
3152 &format!("{:?}", asset.asset_class),
3153 &asset.description,
3154 asset.acquisition_date,
3155 currency,
3156 asset.cost_center.as_deref(),
3157 );
3158 subledger.fa_records.push(record);
3159 fa_journal_entries.push(je);
3160 }
3161
3162 stats.fa_subledger_count = subledger.fa_records.len();
3163 debug!(
3164 "FA subledger records generated: {} (with {} acquisition JEs)",
3165 stats.fa_subledger_count,
3166 fa_journal_entries.len()
3167 );
3168 }
3169
3170 if !self.master_data.materials.is_empty() {
3172 debug!("Generating Inventory subledger records");
3173 let first_company = self.config.companies.first();
3174 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3175 let inv_currency = first_company
3176 .map(|c| c.currency.clone())
3177 .unwrap_or_else(|| "USD".to_string());
3178
3179 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3180 datasynth_generators::InventoryGeneratorConfig::default(),
3181 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3182 inv_currency.clone(),
3183 );
3184
3185 for (i, material) in self.master_data.materials.iter().enumerate() {
3186 let plant = format!("PLANT{:02}", (i % 3) + 1);
3187 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3188 let initial_qty = rust_decimal::Decimal::from(
3189 material
3190 .safety_stock
3191 .to_string()
3192 .parse::<i64>()
3193 .unwrap_or(100),
3194 );
3195
3196 let position = inv_gen.generate_position(
3197 company_code,
3198 &plant,
3199 &storage_loc,
3200 &material.material_id,
3201 &material.description,
3202 initial_qty,
3203 Some(material.standard_cost),
3204 &inv_currency,
3205 );
3206 subledger.inventory_positions.push(position);
3207 }
3208
3209 stats.inventory_subledger_count = subledger.inventory_positions.len();
3210 debug!(
3211 "Inventory subledger records generated: {}",
3212 stats.inventory_subledger_count
3213 );
3214 }
3215
3216 if !subledger.fa_records.is_empty() {
3218 if let Ok(start_date) =
3219 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3220 {
3221 let company_code = self
3222 .config
3223 .companies
3224 .first()
3225 .map(|c| c.code.as_str())
3226 .unwrap_or("1000");
3227 let fiscal_year = start_date.year();
3228 let start_period = start_date.month();
3229 let end_period =
3230 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3231
3232 let depr_cfg = FaDepreciationScheduleConfig {
3233 fiscal_year,
3234 start_period,
3235 end_period,
3236 seed_offset: 800,
3237 };
3238 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3239 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3240 let run_count = runs.len();
3241 subledger.depreciation_runs = runs;
3242 debug!(
3243 "Depreciation runs generated: {} runs for {} periods",
3244 run_count, self.config.global.period_months
3245 );
3246 }
3247 }
3248
3249 if !subledger.inventory_positions.is_empty() {
3251 if let Ok(start_date) =
3252 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3253 {
3254 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3255 - chrono::Days::new(1);
3256
3257 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3258 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3259
3260 for company in &self.config.companies {
3261 let result = inv_val_gen.generate(
3262 &company.code,
3263 &subledger.inventory_positions,
3264 as_of_date,
3265 );
3266 subledger.inventory_valuations.push(result);
3267 }
3268 debug!(
3269 "Inventory valuations generated: {} company reports",
3270 subledger.inventory_valuations.len()
3271 );
3272 }
3273 }
3274
3275 Ok((document_flows, subledger, fa_journal_entries))
3276 }
3277
3278 #[allow(clippy::too_many_arguments)]
3280 fn phase_ocpm_events(
3281 &mut self,
3282 document_flows: &DocumentFlowSnapshot,
3283 sourcing: &SourcingSnapshot,
3284 hr: &HrSnapshot,
3285 manufacturing: &ManufacturingSnapshot,
3286 banking: &BankingSnapshot,
3287 audit: &AuditSnapshot,
3288 financial_reporting: &FinancialReportingSnapshot,
3289 stats: &mut EnhancedGenerationStatistics,
3290 ) -> SynthResult<OcpmSnapshot> {
3291 let degradation = self.check_resources()?;
3292 if degradation >= DegradationLevel::Reduced {
3293 debug!(
3294 "Phase skipped due to resource pressure (degradation: {:?})",
3295 degradation
3296 );
3297 return Ok(OcpmSnapshot::default());
3298 }
3299 if self.phase_config.generate_ocpm_events {
3300 info!("Phase 3c: Generating OCPM Events");
3301 let ocpm_snapshot = self.generate_ocpm_events(
3302 document_flows,
3303 sourcing,
3304 hr,
3305 manufacturing,
3306 banking,
3307 audit,
3308 financial_reporting,
3309 )?;
3310 stats.ocpm_event_count = ocpm_snapshot.event_count;
3311 stats.ocpm_object_count = ocpm_snapshot.object_count;
3312 stats.ocpm_case_count = ocpm_snapshot.case_count;
3313 info!(
3314 "OCPM events generated: {} events, {} objects, {} cases",
3315 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3316 );
3317 self.check_resources_with_log("post-ocpm")?;
3318 Ok(ocpm_snapshot)
3319 } else {
3320 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3321 Ok(OcpmSnapshot::default())
3322 }
3323 }
3324
3325 fn phase_journal_entries(
3327 &mut self,
3328 coa: &Arc<ChartOfAccounts>,
3329 document_flows: &DocumentFlowSnapshot,
3330 _stats: &mut EnhancedGenerationStatistics,
3331 ) -> SynthResult<Vec<JournalEntry>> {
3332 let mut entries = Vec::new();
3333
3334 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3336 debug!("Phase 4a: Generating JEs from document flows");
3337 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3338 debug!("Generated {} JEs from document flows", flow_entries.len());
3339 entries.extend(flow_entries);
3340 }
3341
3342 if self.phase_config.generate_journal_entries {
3344 info!("Phase 4: Generating Journal Entries");
3345 let je_entries = self.generate_journal_entries(coa)?;
3346 info!("Generated {} standalone journal entries", je_entries.len());
3347 entries.extend(je_entries);
3348 } else {
3349 debug!("Phase 4: Skipped (journal entry generation disabled)");
3350 }
3351
3352 if !entries.is_empty() {
3353 self.check_resources_with_log("post-journal-entries")?;
3356 }
3357
3358 Ok(entries)
3359 }
3360
3361 fn phase_anomaly_injection(
3363 &mut self,
3364 entries: &mut [JournalEntry],
3365 actions: &DegradationActions,
3366 stats: &mut EnhancedGenerationStatistics,
3367 ) -> SynthResult<AnomalyLabels> {
3368 if self.phase_config.inject_anomalies
3369 && !entries.is_empty()
3370 && !actions.skip_anomaly_injection
3371 {
3372 info!("Phase 5: Injecting Anomalies");
3373 let result = self.inject_anomalies(entries)?;
3374 stats.anomalies_injected = result.labels.len();
3375 info!("Injected {} anomalies", stats.anomalies_injected);
3376 self.check_resources_with_log("post-anomaly-injection")?;
3377 Ok(result)
3378 } else if actions.skip_anomaly_injection {
3379 warn!("Phase 5: Skipped due to resource degradation");
3380 Ok(AnomalyLabels::default())
3381 } else {
3382 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3383 Ok(AnomalyLabels::default())
3384 }
3385 }
3386
3387 fn phase_balance_validation(
3389 &mut self,
3390 entries: &[JournalEntry],
3391 ) -> SynthResult<BalanceValidationResult> {
3392 if self.phase_config.validate_balances && !entries.is_empty() {
3393 debug!("Phase 6: Validating Balances");
3394 let balance_validation = self.validate_journal_entries(entries)?;
3395 if balance_validation.is_balanced {
3396 debug!("Balance validation passed");
3397 } else {
3398 warn!(
3399 "Balance validation found {} errors",
3400 balance_validation.validation_errors.len()
3401 );
3402 }
3403 Ok(balance_validation)
3404 } else {
3405 Ok(BalanceValidationResult::default())
3406 }
3407 }
3408
3409 fn phase_data_quality_injection(
3411 &mut self,
3412 entries: &mut [JournalEntry],
3413 actions: &DegradationActions,
3414 stats: &mut EnhancedGenerationStatistics,
3415 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3416 if self.phase_config.inject_data_quality
3417 && !entries.is_empty()
3418 && !actions.skip_data_quality
3419 {
3420 info!("Phase 7: Injecting Data Quality Variations");
3421 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3422 stats.data_quality_issues = dq_stats.records_with_issues;
3423 info!("Injected {} data quality issues", stats.data_quality_issues);
3424 self.check_resources_with_log("post-data-quality")?;
3425 Ok((dq_stats, quality_issues))
3426 } else if actions.skip_data_quality {
3427 warn!("Phase 7: Skipped due to resource degradation");
3428 Ok((DataQualityStats::default(), Vec::new()))
3429 } else {
3430 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3431 Ok((DataQualityStats::default(), Vec::new()))
3432 }
3433 }
3434
3435 fn phase_period_close(
3445 &mut self,
3446 entries: &mut Vec<JournalEntry>,
3447 subledger: &SubledgerSnapshot,
3448 stats: &mut EnhancedGenerationStatistics,
3449 ) -> SynthResult<()> {
3450 if !self.phase_config.generate_period_close || entries.is_empty() {
3451 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3452 return Ok(());
3453 }
3454
3455 info!("Phase 10b: Generating period-close journal entries");
3456
3457 use datasynth_core::accounts::{
3458 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3459 };
3460 use rust_decimal::Decimal;
3461
3462 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3463 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3464 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3465 let close_date = end_date - chrono::Days::new(1);
3467
3468 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3473 .config
3474 .companies
3475 .iter()
3476 .map(|c| c.code.clone())
3477 .collect();
3478
3479 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3481 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3482
3483 let period_months = self.config.global.period_months;
3487 for asset in &subledger.fa_records {
3488 use datasynth_core::models::subledger::fa::AssetStatus;
3490 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3491 continue;
3492 }
3493 let useful_life_months = asset.useful_life_months();
3494 if useful_life_months == 0 {
3495 continue;
3497 }
3498 let salvage_value = asset.salvage_value();
3499 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3500 if depreciable_base == Decimal::ZERO {
3501 continue;
3502 }
3503 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3504 * Decimal::from(period_months))
3505 .round_dp(2);
3506 if period_depr <= Decimal::ZERO {
3507 continue;
3508 }
3509
3510 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3511 depr_header.document_type = "CL".to_string();
3512 depr_header.header_text = Some(format!(
3513 "Depreciation - {} {}",
3514 asset.asset_number, asset.description
3515 ));
3516 depr_header.created_by = "CLOSE_ENGINE".to_string();
3517 depr_header.source = TransactionSource::Automated;
3518 depr_header.business_process = Some(BusinessProcess::R2R);
3519
3520 let doc_id = depr_header.document_id;
3521 let mut depr_je = JournalEntry::new(depr_header);
3522
3523 depr_je.add_line(JournalEntryLine::debit(
3525 doc_id,
3526 1,
3527 expense_accounts::DEPRECIATION.to_string(),
3528 period_depr,
3529 ));
3530 depr_je.add_line(JournalEntryLine::credit(
3532 doc_id,
3533 2,
3534 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3535 period_depr,
3536 ));
3537
3538 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3539 close_jes.push(depr_je);
3540 }
3541
3542 if !subledger.fa_records.is_empty() {
3543 debug!(
3544 "Generated {} depreciation JEs from {} FA records",
3545 close_jes.len(),
3546 subledger.fa_records.len()
3547 );
3548 }
3549
3550 {
3554 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3555 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3556
3557 let accrual_items: &[(&str, &str, &str)] = &[
3559 ("Accrued Utilities", "6200", "2100"),
3560 ("Accrued Rent", "6300", "2100"),
3561 ("Accrued Interest", "6100", "2150"),
3562 ];
3563
3564 for company_code in &company_codes {
3565 let company_revenue: Decimal = entries
3567 .iter()
3568 .filter(|e| e.header.company_code == *company_code)
3569 .flat_map(|e| e.lines.iter())
3570 .filter(|l| l.gl_account.starts_with('4'))
3571 .map(|l| l.credit_amount - l.debit_amount)
3572 .fold(Decimal::ZERO, |acc, v| acc + v);
3573
3574 if company_revenue <= Decimal::ZERO {
3575 continue;
3576 }
3577
3578 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3580 if accrual_base <= Decimal::ZERO {
3581 continue;
3582 }
3583
3584 for (description, expense_acct, liability_acct) in accrual_items {
3585 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3586 company_code,
3587 description,
3588 accrual_base,
3589 expense_acct,
3590 liability_acct,
3591 close_date,
3592 None,
3593 );
3594 close_jes.push(accrual_je);
3595 if let Some(rev_je) = reversal_je {
3596 close_jes.push(rev_je);
3597 }
3598 }
3599 }
3600
3601 debug!(
3602 "Generated accrual entries for {} companies",
3603 company_codes.len()
3604 );
3605 }
3606
3607 for company_code in &company_codes {
3608 let mut total_revenue = Decimal::ZERO;
3613 let mut total_expenses = Decimal::ZERO;
3614
3615 for entry in entries.iter() {
3616 if entry.header.company_code != *company_code {
3617 continue;
3618 }
3619 for line in &entry.lines {
3620 let category = AccountCategory::from_account(&line.gl_account);
3621 match category {
3622 AccountCategory::Revenue => {
3623 total_revenue += line.credit_amount - line.debit_amount;
3625 }
3626 AccountCategory::Cogs
3627 | AccountCategory::OperatingExpense
3628 | AccountCategory::OtherIncomeExpense
3629 | AccountCategory::Tax => {
3630 total_expenses += line.debit_amount - line.credit_amount;
3632 }
3633 _ => {}
3634 }
3635 }
3636 }
3637
3638 let pre_tax_income = total_revenue - total_expenses;
3639
3640 if pre_tax_income == Decimal::ZERO {
3642 debug!(
3643 "Company {}: no pre-tax income, skipping period close",
3644 company_code
3645 );
3646 continue;
3647 }
3648
3649 if pre_tax_income > Decimal::ZERO {
3651 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3653
3654 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3655 tax_header.document_type = "CL".to_string();
3656 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3657 tax_header.created_by = "CLOSE_ENGINE".to_string();
3658 tax_header.source = TransactionSource::Automated;
3659 tax_header.business_process = Some(BusinessProcess::R2R);
3660
3661 let doc_id = tax_header.document_id;
3662 let mut tax_je = JournalEntry::new(tax_header);
3663
3664 tax_je.add_line(JournalEntryLine::debit(
3666 doc_id,
3667 1,
3668 tax_accounts::TAX_EXPENSE.to_string(),
3669 tax_amount,
3670 ));
3671 tax_je.add_line(JournalEntryLine::credit(
3673 doc_id,
3674 2,
3675 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3676 tax_amount,
3677 ));
3678
3679 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3680 close_jes.push(tax_je);
3681 } else {
3682 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3685 if dta_amount > Decimal::ZERO {
3686 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3687 dta_header.document_type = "CL".to_string();
3688 dta_header.header_text =
3689 Some(format!("Deferred tax asset (DTA) - {}", company_code));
3690 dta_header.created_by = "CLOSE_ENGINE".to_string();
3691 dta_header.source = TransactionSource::Automated;
3692 dta_header.business_process = Some(BusinessProcess::R2R);
3693
3694 let doc_id = dta_header.document_id;
3695 let mut dta_je = JournalEntry::new(dta_header);
3696
3697 dta_je.add_line(JournalEntryLine::debit(
3699 doc_id,
3700 1,
3701 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3702 dta_amount,
3703 ));
3704 dta_je.add_line(JournalEntryLine::credit(
3707 doc_id,
3708 2,
3709 tax_accounts::TAX_EXPENSE.to_string(),
3710 dta_amount,
3711 ));
3712
3713 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3714 close_jes.push(dta_je);
3715 debug!(
3716 "Company {}: loss year — recognised DTA of {}",
3717 company_code, dta_amount
3718 );
3719 }
3720 }
3721
3722 let tax_provision = if pre_tax_income > Decimal::ZERO {
3728 (pre_tax_income * tax_rate).round_dp(2)
3729 } else {
3730 Decimal::ZERO
3731 };
3732 let net_income = pre_tax_income - tax_provision;
3733
3734 if net_income > Decimal::ZERO {
3735 use datasynth_generators::DividendGenerator;
3736 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
3738 let currency_str = self
3739 .config
3740 .companies
3741 .iter()
3742 .find(|c| c.code == *company_code)
3743 .map(|c| c.currency.as_str())
3744 .unwrap_or("USD");
3745 let div_result = div_gen.generate(
3746 company_code,
3747 close_date,
3748 Decimal::new(1, 0), dividend_amount,
3750 currency_str,
3751 );
3752 let div_je_count = div_result.journal_entries.len();
3753 close_jes.extend(div_result.journal_entries);
3754 debug!(
3755 "Company {}: declared dividend of {} ({} JEs)",
3756 company_code, dividend_amount, div_je_count
3757 );
3758 }
3759
3760 if net_income != Decimal::ZERO {
3765 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3766 close_header.document_type = "CL".to_string();
3767 close_header.header_text =
3768 Some(format!("Income statement close - {}", company_code));
3769 close_header.created_by = "CLOSE_ENGINE".to_string();
3770 close_header.source = TransactionSource::Automated;
3771 close_header.business_process = Some(BusinessProcess::R2R);
3772
3773 let doc_id = close_header.document_id;
3774 let mut close_je = JournalEntry::new(close_header);
3775
3776 let abs_net_income = net_income.abs();
3777
3778 if net_income > Decimal::ZERO {
3779 close_je.add_line(JournalEntryLine::debit(
3781 doc_id,
3782 1,
3783 equity_accounts::INCOME_SUMMARY.to_string(),
3784 abs_net_income,
3785 ));
3786 close_je.add_line(JournalEntryLine::credit(
3787 doc_id,
3788 2,
3789 equity_accounts::RETAINED_EARNINGS.to_string(),
3790 abs_net_income,
3791 ));
3792 } else {
3793 close_je.add_line(JournalEntryLine::debit(
3795 doc_id,
3796 1,
3797 equity_accounts::RETAINED_EARNINGS.to_string(),
3798 abs_net_income,
3799 ));
3800 close_je.add_line(JournalEntryLine::credit(
3801 doc_id,
3802 2,
3803 equity_accounts::INCOME_SUMMARY.to_string(),
3804 abs_net_income,
3805 ));
3806 }
3807
3808 debug_assert!(
3809 close_je.is_balanced(),
3810 "Income statement closing JE must be balanced"
3811 );
3812 close_jes.push(close_je);
3813 }
3814 }
3815
3816 let close_count = close_jes.len();
3817 if close_count > 0 {
3818 info!("Generated {} period-close journal entries", close_count);
3819 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3820 entries.extend(close_jes);
3821 stats.period_close_je_count = close_count;
3822
3823 stats.total_entries = entries.len() as u64;
3825 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3826 } else {
3827 debug!("No period-close entries generated (no income statement activity)");
3828 }
3829
3830 Ok(())
3831 }
3832
3833 fn phase_audit_data(
3835 &mut self,
3836 entries: &[JournalEntry],
3837 stats: &mut EnhancedGenerationStatistics,
3838 ) -> SynthResult<AuditSnapshot> {
3839 if self.phase_config.generate_audit {
3840 info!("Phase 8: Generating Audit Data");
3841 let audit_snapshot = self.generate_audit_data(entries)?;
3842 stats.audit_engagement_count = audit_snapshot.engagements.len();
3843 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3844 stats.audit_evidence_count = audit_snapshot.evidence.len();
3845 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3846 stats.audit_finding_count = audit_snapshot.findings.len();
3847 stats.audit_judgment_count = audit_snapshot.judgments.len();
3848 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3849 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3850 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3851 stats.audit_sample_count = audit_snapshot.samples.len();
3852 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3853 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3854 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3855 stats.audit_related_party_count = audit_snapshot.related_parties.len();
3856 stats.audit_related_party_transaction_count =
3857 audit_snapshot.related_party_transactions.len();
3858 info!(
3859 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3860 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3861 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3862 {} RP transactions",
3863 stats.audit_engagement_count,
3864 stats.audit_workpaper_count,
3865 stats.audit_evidence_count,
3866 stats.audit_risk_count,
3867 stats.audit_finding_count,
3868 stats.audit_judgment_count,
3869 stats.audit_confirmation_count,
3870 stats.audit_procedure_step_count,
3871 stats.audit_sample_count,
3872 stats.audit_analytical_result_count,
3873 stats.audit_ia_function_count,
3874 stats.audit_ia_report_count,
3875 stats.audit_related_party_count,
3876 stats.audit_related_party_transaction_count,
3877 );
3878 self.check_resources_with_log("post-audit")?;
3879 Ok(audit_snapshot)
3880 } else {
3881 debug!("Phase 8: Skipped (audit generation disabled)");
3882 Ok(AuditSnapshot::default())
3883 }
3884 }
3885
3886 fn phase_banking_data(
3888 &mut self,
3889 stats: &mut EnhancedGenerationStatistics,
3890 ) -> SynthResult<BankingSnapshot> {
3891 if self.phase_config.generate_banking {
3892 info!("Phase 9: Generating Banking KYC/AML Data");
3893 let banking_snapshot = self.generate_banking_data()?;
3894 stats.banking_customer_count = banking_snapshot.customers.len();
3895 stats.banking_account_count = banking_snapshot.accounts.len();
3896 stats.banking_transaction_count = banking_snapshot.transactions.len();
3897 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3898 info!(
3899 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3900 stats.banking_customer_count, stats.banking_account_count,
3901 stats.banking_transaction_count, stats.banking_suspicious_count
3902 );
3903 self.check_resources_with_log("post-banking")?;
3904 Ok(banking_snapshot)
3905 } else {
3906 debug!("Phase 9: Skipped (banking generation disabled)");
3907 Ok(BankingSnapshot::default())
3908 }
3909 }
3910
3911 fn phase_graph_export(
3913 &mut self,
3914 entries: &[JournalEntry],
3915 coa: &Arc<ChartOfAccounts>,
3916 stats: &mut EnhancedGenerationStatistics,
3917 ) -> SynthResult<GraphExportSnapshot> {
3918 if self.phase_config.generate_graph_export && !entries.is_empty() {
3919 info!("Phase 10: Exporting Accounting Network Graphs");
3920 match self.export_graphs(entries, coa, stats) {
3921 Ok(snapshot) => {
3922 info!(
3923 "Graph export complete: {} graphs ({} nodes, {} edges)",
3924 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3925 );
3926 Ok(snapshot)
3927 }
3928 Err(e) => {
3929 warn!("Phase 10: Graph export failed: {}", e);
3930 Ok(GraphExportSnapshot::default())
3931 }
3932 }
3933 } else {
3934 debug!("Phase 10: Skipped (graph export disabled or no entries)");
3935 Ok(GraphExportSnapshot::default())
3936 }
3937 }
3938
3939 #[allow(clippy::too_many_arguments)]
3941 fn phase_hypergraph_export(
3942 &self,
3943 coa: &Arc<ChartOfAccounts>,
3944 entries: &[JournalEntry],
3945 document_flows: &DocumentFlowSnapshot,
3946 sourcing: &SourcingSnapshot,
3947 hr: &HrSnapshot,
3948 manufacturing: &ManufacturingSnapshot,
3949 banking: &BankingSnapshot,
3950 audit: &AuditSnapshot,
3951 financial_reporting: &FinancialReportingSnapshot,
3952 ocpm: &OcpmSnapshot,
3953 compliance: &ComplianceRegulationsSnapshot,
3954 stats: &mut EnhancedGenerationStatistics,
3955 ) -> SynthResult<()> {
3956 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3957 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3958 match self.export_hypergraph(
3959 coa,
3960 entries,
3961 document_flows,
3962 sourcing,
3963 hr,
3964 manufacturing,
3965 banking,
3966 audit,
3967 financial_reporting,
3968 ocpm,
3969 compliance,
3970 stats,
3971 ) {
3972 Ok(info) => {
3973 info!(
3974 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3975 info.node_count, info.edge_count, info.hyperedge_count
3976 );
3977 }
3978 Err(e) => {
3979 warn!("Phase 10b: Hypergraph export failed: {}", e);
3980 }
3981 }
3982 } else {
3983 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3984 }
3985 Ok(())
3986 }
3987
3988 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3994 if !self.config.llm.enabled {
3995 debug!("Phase 11: Skipped (LLM enrichment disabled)");
3996 return;
3997 }
3998
3999 info!("Phase 11: Starting LLM Enrichment");
4000 let start = std::time::Instant::now();
4001
4002 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4003 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4006 let schema_provider = &self.config.llm.provider;
4007 let api_key_env = match schema_provider.as_str() {
4008 "openai" => Some("OPENAI_API_KEY"),
4009 "anthropic" => Some("ANTHROPIC_API_KEY"),
4010 "custom" => Some("LLM_API_KEY"),
4011 _ => None,
4012 };
4013 if let Some(key_env) = api_key_env {
4014 if std::env::var(key_env).is_ok() {
4015 let llm_config = datasynth_core::llm::LlmConfig {
4016 model: self.config.llm.model.clone(),
4017 api_key_env: key_env.to_string(),
4018 ..datasynth_core::llm::LlmConfig::default()
4019 };
4020 match HttpLlmProvider::new(llm_config) {
4021 Ok(p) => Arc::new(p),
4022 Err(e) => {
4023 warn!(
4024 "Failed to create HttpLlmProvider: {}; falling back to mock",
4025 e
4026 );
4027 Arc::new(MockLlmProvider::new(self.seed))
4028 }
4029 }
4030 } else {
4031 Arc::new(MockLlmProvider::new(self.seed))
4032 }
4033 } else {
4034 Arc::new(MockLlmProvider::new(self.seed))
4035 }
4036 };
4037 let enricher = VendorLlmEnricher::new(provider);
4038
4039 let industry = format!("{:?}", self.config.global.industry);
4040 let max_enrichments = self
4041 .config
4042 .llm
4043 .max_vendor_enrichments
4044 .min(self.master_data.vendors.len());
4045
4046 let mut enriched_count = 0usize;
4047 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4048 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4049 Ok(name) => {
4050 vendor.name = name;
4051 enriched_count += 1;
4052 }
4053 Err(e) => {
4054 warn!(
4055 "LLM vendor enrichment failed for {}: {}",
4056 vendor.vendor_id, e
4057 );
4058 }
4059 }
4060 }
4061
4062 enriched_count
4063 }));
4064
4065 match result {
4066 Ok(enriched_count) => {
4067 stats.llm_vendors_enriched = enriched_count;
4068 let elapsed = start.elapsed();
4069 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4070 info!(
4071 "Phase 11 complete: {} vendors enriched in {}ms",
4072 enriched_count, stats.llm_enrichment_ms
4073 );
4074 }
4075 Err(_) => {
4076 let elapsed = start.elapsed();
4077 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4078 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4079 }
4080 }
4081 }
4082
4083 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4089 if !self.config.diffusion.enabled {
4090 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4091 return;
4092 }
4093
4094 info!("Phase 12: Starting Diffusion Enhancement");
4095 let start = std::time::Instant::now();
4096
4097 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4098 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4101
4102 let diffusion_config = DiffusionConfig {
4103 n_steps: self.config.diffusion.n_steps,
4104 seed: self.seed,
4105 ..Default::default()
4106 };
4107
4108 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4109
4110 let n_samples = self.config.diffusion.sample_size;
4111 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
4113
4114 samples.len()
4115 }));
4116
4117 match result {
4118 Ok(sample_count) => {
4119 stats.diffusion_samples_generated = sample_count;
4120 let elapsed = start.elapsed();
4121 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4122 info!(
4123 "Phase 12 complete: {} diffusion samples generated in {}ms",
4124 sample_count, stats.diffusion_enhancement_ms
4125 );
4126 }
4127 Err(_) => {
4128 let elapsed = start.elapsed();
4129 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4130 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4131 }
4132 }
4133 }
4134
4135 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4142 if !self.config.causal.enabled {
4143 debug!("Phase 13: Skipped (causal generation disabled)");
4144 return;
4145 }
4146
4147 info!("Phase 13: Starting Causal Overlay");
4148 let start = std::time::Instant::now();
4149
4150 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4151 let graph = match self.config.causal.template.as_str() {
4153 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4154 _ => CausalGraph::fraud_detection_template(),
4155 };
4156
4157 let scm = StructuralCausalModel::new(graph.clone())
4158 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4159
4160 let n_samples = self.config.causal.sample_size;
4161 let samples = scm
4162 .generate(n_samples, self.seed)
4163 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4164
4165 let validation_passed = if self.config.causal.validate {
4167 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4168 if report.valid {
4169 info!(
4170 "Causal validation passed: all {} checks OK",
4171 report.checks.len()
4172 );
4173 } else {
4174 warn!(
4175 "Causal validation: {} violations detected: {:?}",
4176 report.violations.len(),
4177 report.violations
4178 );
4179 }
4180 Some(report.valid)
4181 } else {
4182 None
4183 };
4184
4185 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4186 }));
4187
4188 match result {
4189 Ok(Ok((sample_count, validation_passed))) => {
4190 stats.causal_samples_generated = sample_count;
4191 stats.causal_validation_passed = validation_passed;
4192 let elapsed = start.elapsed();
4193 stats.causal_generation_ms = elapsed.as_millis() as u64;
4194 info!(
4195 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4196 sample_count, stats.causal_generation_ms, validation_passed,
4197 );
4198 }
4199 Ok(Err(e)) => {
4200 let elapsed = start.elapsed();
4201 stats.causal_generation_ms = elapsed.as_millis() as u64;
4202 warn!("Phase 13: Causal generation failed: {}", e);
4203 }
4204 Err(_) => {
4205 let elapsed = start.elapsed();
4206 stats.causal_generation_ms = elapsed.as_millis() as u64;
4207 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4208 }
4209 }
4210 }
4211
4212 fn phase_sourcing_data(
4214 &mut self,
4215 stats: &mut EnhancedGenerationStatistics,
4216 ) -> SynthResult<SourcingSnapshot> {
4217 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4218 debug!("Phase 14: Skipped (sourcing generation disabled)");
4219 return Ok(SourcingSnapshot::default());
4220 }
4221 let degradation = self.check_resources()?;
4222 if degradation >= DegradationLevel::Reduced {
4223 debug!(
4224 "Phase skipped due to resource pressure (degradation: {:?})",
4225 degradation
4226 );
4227 return Ok(SourcingSnapshot::default());
4228 }
4229
4230 info!("Phase 14: Generating S2C Sourcing Data");
4231 let seed = self.seed;
4232
4233 let vendor_ids: Vec<String> = self
4235 .master_data
4236 .vendors
4237 .iter()
4238 .map(|v| v.vendor_id.clone())
4239 .collect();
4240 if vendor_ids.is_empty() {
4241 debug!("Phase 14: Skipped (no vendors available)");
4242 return Ok(SourcingSnapshot::default());
4243 }
4244
4245 let categories: Vec<(String, String)> = vec![
4246 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4247 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4248 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4249 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4250 ("CAT-LOG".to_string(), "Logistics".to_string()),
4251 ];
4252 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4253 .iter()
4254 .map(|(id, name)| {
4255 (
4256 id.clone(),
4257 name.clone(),
4258 rust_decimal::Decimal::from(100_000),
4259 )
4260 })
4261 .collect();
4262
4263 let company_code = self
4264 .config
4265 .companies
4266 .first()
4267 .map(|c| c.code.as_str())
4268 .unwrap_or("1000");
4269 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4270 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4271 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4272 let fiscal_year = start_date.year() as u16;
4273 let owner_ids: Vec<String> = self
4274 .master_data
4275 .employees
4276 .iter()
4277 .take(5)
4278 .map(|e| e.employee_id.clone())
4279 .collect();
4280 let owner_id = owner_ids
4281 .first()
4282 .map(std::string::String::as_str)
4283 .unwrap_or("BUYER-001");
4284
4285 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4287 let spend_analyses =
4288 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4289
4290 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4292 let sourcing_projects = if owner_ids.is_empty() {
4293 Vec::new()
4294 } else {
4295 project_gen.generate(
4296 company_code,
4297 &categories_with_spend,
4298 &owner_ids,
4299 start_date,
4300 self.config.global.period_months,
4301 )
4302 };
4303 stats.sourcing_project_count = sourcing_projects.len();
4304
4305 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4307 let mut qual_gen = QualificationGenerator::new(seed + 2);
4308 let qualifications = qual_gen.generate(
4309 company_code,
4310 &qual_vendor_ids,
4311 sourcing_projects.first().map(|p| p.project_id.as_str()),
4312 owner_id,
4313 start_date,
4314 );
4315
4316 let mut rfx_gen = RfxGenerator::new(seed + 3);
4318 let rfx_events: Vec<RfxEvent> = sourcing_projects
4319 .iter()
4320 .map(|proj| {
4321 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4322 rfx_gen.generate(
4323 company_code,
4324 &proj.project_id,
4325 &proj.category_id,
4326 &qualified_vids,
4327 owner_id,
4328 start_date,
4329 50000.0,
4330 )
4331 })
4332 .collect();
4333 stats.rfx_event_count = rfx_events.len();
4334
4335 let mut bid_gen = BidGenerator::new(seed + 4);
4337 let mut all_bids = Vec::new();
4338 for rfx in &rfx_events {
4339 let bidder_count = vendor_ids.len().clamp(2, 5);
4340 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4341 let bids = bid_gen.generate(rfx, &responding, start_date);
4342 all_bids.extend(bids);
4343 }
4344 stats.bid_count = all_bids.len();
4345
4346 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4348 let bid_evaluations: Vec<BidEvaluation> = rfx_events
4349 .iter()
4350 .map(|rfx| {
4351 let rfx_bids: Vec<SupplierBid> = all_bids
4352 .iter()
4353 .filter(|b| b.rfx_id == rfx.rfx_id)
4354 .cloned()
4355 .collect();
4356 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4357 })
4358 .collect();
4359
4360 let mut contract_gen = ContractGenerator::new(seed + 6);
4362 let contracts: Vec<ProcurementContract> = bid_evaluations
4363 .iter()
4364 .zip(rfx_events.iter())
4365 .filter_map(|(eval, rfx)| {
4366 eval.ranked_bids.first().and_then(|winner| {
4367 all_bids
4368 .iter()
4369 .find(|b| b.bid_id == winner.bid_id)
4370 .map(|winning_bid| {
4371 contract_gen.generate_from_bid(
4372 winning_bid,
4373 Some(&rfx.sourcing_project_id),
4374 &rfx.category_id,
4375 owner_id,
4376 start_date,
4377 )
4378 })
4379 })
4380 })
4381 .collect();
4382 stats.contract_count = contracts.len();
4383
4384 let mut catalog_gen = CatalogGenerator::new(seed + 7);
4386 let catalog_items = catalog_gen.generate(&contracts);
4387 stats.catalog_item_count = catalog_items.len();
4388
4389 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4391 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4392 .iter()
4393 .fold(
4394 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4395 |mut acc, c| {
4396 acc.entry(c.vendor_id.clone()).or_default().push(c);
4397 acc
4398 },
4399 )
4400 .into_iter()
4401 .collect();
4402 let scorecards = scorecard_gen.generate(
4403 company_code,
4404 &vendor_contracts,
4405 start_date,
4406 end_date,
4407 owner_id,
4408 );
4409 stats.scorecard_count = scorecards.len();
4410
4411 let mut sourcing_projects = sourcing_projects;
4414 for project in &mut sourcing_projects {
4415 project.rfx_ids = rfx_events
4417 .iter()
4418 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4419 .map(|rfx| rfx.rfx_id.clone())
4420 .collect();
4421
4422 project.contract_id = contracts
4424 .iter()
4425 .find(|c| {
4426 c.sourcing_project_id
4427 .as_deref()
4428 .is_some_and(|sp| sp == project.project_id)
4429 })
4430 .map(|c| c.contract_id.clone());
4431
4432 project.spend_analysis_id = spend_analyses
4434 .iter()
4435 .find(|sa| sa.category_id == project.category_id)
4436 .map(|sa| sa.category_id.clone());
4437 }
4438
4439 info!(
4440 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4441 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4442 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4443 );
4444 self.check_resources_with_log("post-sourcing")?;
4445
4446 Ok(SourcingSnapshot {
4447 spend_analyses,
4448 sourcing_projects,
4449 qualifications,
4450 rfx_events,
4451 bids: all_bids,
4452 bid_evaluations,
4453 contracts,
4454 catalog_items,
4455 scorecards,
4456 })
4457 }
4458
4459 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4465 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4466
4467 let parent_code = self
4468 .config
4469 .companies
4470 .first()
4471 .map(|c| c.code.clone())
4472 .unwrap_or_else(|| "PARENT".to_string());
4473
4474 let mut group = GroupStructure::new(parent_code);
4475
4476 for company in self.config.companies.iter().skip(1) {
4477 let sub =
4478 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4479 group.add_subsidiary(sub);
4480 }
4481
4482 group
4483 }
4484
4485 fn phase_intercompany(
4487 &mut self,
4488 journal_entries: &[JournalEntry],
4489 stats: &mut EnhancedGenerationStatistics,
4490 ) -> SynthResult<IntercompanySnapshot> {
4491 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4493 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4494 return Ok(IntercompanySnapshot::default());
4495 }
4496
4497 if self.config.companies.len() < 2 {
4499 debug!(
4500 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4501 self.config.companies.len()
4502 );
4503 return Ok(IntercompanySnapshot::default());
4504 }
4505
4506 info!("Phase 14b: Generating Intercompany Transactions");
4507
4508 let group_structure = self.build_group_structure();
4511 debug!(
4512 "Group structure built: parent={}, subsidiaries={}",
4513 group_structure.parent_entity,
4514 group_structure.subsidiaries.len()
4515 );
4516
4517 let seed = self.seed;
4518 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4519 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4520 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4521
4522 let parent_code = self.config.companies[0].code.clone();
4525 let mut ownership_structure =
4526 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4527
4528 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4529 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4530 format!("REL{:03}", i + 1),
4531 parent_code.clone(),
4532 company.code.clone(),
4533 rust_decimal::Decimal::from(100), start_date,
4535 );
4536 ownership_structure.add_relationship(relationship);
4537 }
4538
4539 let tp_method = match self.config.intercompany.transfer_pricing_method {
4541 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4542 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4543 }
4544 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4545 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4546 }
4547 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4548 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4549 }
4550 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4551 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4552 }
4553 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4554 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4555 }
4556 };
4557
4558 let ic_currency = self
4560 .config
4561 .companies
4562 .first()
4563 .map(|c| c.currency.clone())
4564 .unwrap_or_else(|| "USD".to_string());
4565 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4566 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4567 transfer_pricing_method: tp_method,
4568 markup_percent: rust_decimal::Decimal::from_f64_retain(
4569 self.config.intercompany.markup_percent,
4570 )
4571 .unwrap_or(rust_decimal::Decimal::from(5)),
4572 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4573 default_currency: ic_currency,
4574 ..Default::default()
4575 };
4576
4577 let mut ic_generator = datasynth_generators::ICGenerator::new(
4579 ic_gen_config,
4580 ownership_structure.clone(),
4581 seed + 50,
4582 );
4583
4584 let transactions_per_day = 3;
4587 let matched_pairs = ic_generator.generate_transactions_for_period(
4588 start_date,
4589 end_date,
4590 transactions_per_day,
4591 );
4592
4593 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4595 debug!(
4596 "Generated {} IC seller invoices, {} IC buyer POs",
4597 ic_doc_chains.seller_invoices.len(),
4598 ic_doc_chains.buyer_orders.len()
4599 );
4600
4601 let mut seller_entries = Vec::new();
4603 let mut buyer_entries = Vec::new();
4604 let fiscal_year = start_date.year();
4605
4606 for pair in &matched_pairs {
4607 let fiscal_period = pair.posting_date.month();
4608 let (seller_je, buyer_je) =
4609 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4610 seller_entries.push(seller_je);
4611 buyer_entries.push(buyer_je);
4612 }
4613
4614 let matching_config = datasynth_generators::ICMatchingConfig {
4616 base_currency: self
4617 .config
4618 .companies
4619 .first()
4620 .map(|c| c.currency.clone())
4621 .unwrap_or_else(|| "USD".to_string()),
4622 ..Default::default()
4623 };
4624 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4625 matching_engine.load_matched_pairs(&matched_pairs);
4626 let matching_result = matching_engine.run_matching(end_date);
4627
4628 let mut elimination_entries = Vec::new();
4630 if self.config.intercompany.generate_eliminations {
4631 let elim_config = datasynth_generators::EliminationConfig {
4632 consolidation_entity: "GROUP".to_string(),
4633 base_currency: self
4634 .config
4635 .companies
4636 .first()
4637 .map(|c| c.currency.clone())
4638 .unwrap_or_else(|| "USD".to_string()),
4639 ..Default::default()
4640 };
4641
4642 let mut elim_generator =
4643 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4644
4645 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4646 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4647 matching_result
4648 .matched_balances
4649 .iter()
4650 .chain(matching_result.unmatched_balances.iter())
4651 .cloned()
4652 .collect();
4653
4654 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4666 std::collections::HashMap::new();
4667 let mut equity_amounts: std::collections::HashMap<
4668 String,
4669 std::collections::HashMap<String, rust_decimal::Decimal>,
4670 > = std::collections::HashMap::new();
4671 {
4672 use rust_decimal::Decimal;
4673 let hundred = Decimal::from(100u32);
4674 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
4678 for sub in &group_structure.subsidiaries {
4679 let net_assets = {
4680 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4681 if na > Decimal::ZERO {
4682 na
4683 } else {
4684 Decimal::from(1_000_000u64)
4685 }
4686 };
4687 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4689 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4690
4691 let mut eq_map = std::collections::HashMap::new();
4694 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4695 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4696 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4697 equity_amounts.insert(sub.entity_code.clone(), eq_map);
4698 }
4699 }
4700
4701 let journal = elim_generator.generate_eliminations(
4702 &fiscal_period,
4703 end_date,
4704 &all_balances,
4705 &matched_pairs,
4706 &investment_amounts,
4707 &equity_amounts,
4708 );
4709
4710 elimination_entries = journal.entries.clone();
4711 }
4712
4713 let matched_pair_count = matched_pairs.len();
4714 let elimination_entry_count = elimination_entries.len();
4715 let match_rate = matching_result.match_rate;
4716
4717 stats.ic_matched_pair_count = matched_pair_count;
4718 stats.ic_elimination_count = elimination_entry_count;
4719 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4720
4721 info!(
4722 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4723 matched_pair_count,
4724 stats.ic_transaction_count,
4725 seller_entries.len(),
4726 buyer_entries.len(),
4727 elimination_entry_count,
4728 match_rate * 100.0
4729 );
4730 self.check_resources_with_log("post-intercompany")?;
4731
4732 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4736 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4737 use rust_decimal::Decimal;
4738
4739 let eight_pct = Decimal::new(8, 2); group_structure
4742 .subsidiaries
4743 .iter()
4744 .filter(|sub| {
4745 sub.nci_percentage > Decimal::ZERO
4746 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4747 })
4748 .map(|sub| {
4749 let net_assets_from_jes =
4753 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4754
4755 let net_assets = if net_assets_from_jes > Decimal::ZERO {
4756 net_assets_from_jes.round_dp(2)
4757 } else {
4758 Decimal::from(1_000_000u64)
4760 };
4761
4762 let net_income = (net_assets * eight_pct).round_dp(2);
4764
4765 NciMeasurement::compute(
4766 sub.entity_code.clone(),
4767 sub.nci_percentage,
4768 net_assets,
4769 net_income,
4770 )
4771 })
4772 .collect()
4773 };
4774
4775 if !nci_measurements.is_empty() {
4776 info!(
4777 "NCI measurements: {} subsidiaries with non-controlling interests",
4778 nci_measurements.len()
4779 );
4780 }
4781
4782 Ok(IntercompanySnapshot {
4783 group_structure: Some(group_structure),
4784 matched_pairs,
4785 seller_journal_entries: seller_entries,
4786 buyer_journal_entries: buyer_entries,
4787 elimination_entries,
4788 nci_measurements,
4789 ic_document_chains: Some(ic_doc_chains),
4790 matched_pair_count,
4791 elimination_entry_count,
4792 match_rate,
4793 })
4794 }
4795
4796 fn phase_financial_reporting(
4798 &mut self,
4799 document_flows: &DocumentFlowSnapshot,
4800 journal_entries: &[JournalEntry],
4801 coa: &Arc<ChartOfAccounts>,
4802 _hr: &HrSnapshot,
4803 _audit: &AuditSnapshot,
4804 stats: &mut EnhancedGenerationStatistics,
4805 ) -> SynthResult<FinancialReportingSnapshot> {
4806 let fs_enabled = self.phase_config.generate_financial_statements
4807 || self.config.financial_reporting.enabled;
4808 let br_enabled = self.phase_config.generate_bank_reconciliation;
4809
4810 if !fs_enabled && !br_enabled {
4811 debug!("Phase 15: Skipped (financial reporting disabled)");
4812 return Ok(FinancialReportingSnapshot::default());
4813 }
4814
4815 info!("Phase 15: Generating Financial Reporting Data");
4816
4817 let seed = self.seed;
4818 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4819 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4820
4821 let mut financial_statements = Vec::new();
4822 let mut bank_reconciliations = Vec::new();
4823 let mut trial_balances = Vec::new();
4824 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4825 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4826 Vec::new();
4827 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4829 std::collections::HashMap::new();
4830 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4832 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4834
4835 if fs_enabled {
4843 let has_journal_entries = !journal_entries.is_empty();
4844
4845 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4848 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4850
4851 let elimination_entries: Vec<&JournalEntry> = journal_entries
4853 .iter()
4854 .filter(|je| je.header.is_elimination)
4855 .collect();
4856
4857 for period in 0..self.config.global.period_months {
4859 let period_start = start_date + chrono::Months::new(period);
4860 let period_end =
4861 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4862 let fiscal_year = period_end.year() as u16;
4863 let fiscal_period = period_end.month() as u8;
4864 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4865
4866 let mut entity_tb_map: std::collections::HashMap<
4869 String,
4870 std::collections::HashMap<String, rust_decimal::Decimal>,
4871 > = std::collections::HashMap::new();
4872
4873 for (company_idx, company) in self.config.companies.iter().enumerate() {
4875 let company_code = company.code.as_str();
4876 let currency = company.currency.as_str();
4877 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4880 let mut company_fs_gen =
4881 FinancialStatementGenerator::new(seed + company_seed_offset);
4882
4883 if has_journal_entries {
4884 let tb_entries = Self::build_cumulative_trial_balance(
4885 journal_entries,
4886 coa,
4887 company_code,
4888 start_date,
4889 period_end,
4890 fiscal_year,
4891 fiscal_period,
4892 );
4893
4894 let entity_cat_map =
4896 entity_tb_map.entry(company_code.to_string()).or_default();
4897 for tb_entry in &tb_entries {
4898 let net = tb_entry.debit_balance - tb_entry.credit_balance;
4899 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4900 }
4901
4902 let stmts = company_fs_gen.generate(
4903 company_code,
4904 currency,
4905 &tb_entries,
4906 period_start,
4907 period_end,
4908 fiscal_year,
4909 fiscal_period,
4910 None,
4911 "SYS-AUTOCLOSE",
4912 );
4913
4914 let mut entity_stmts = Vec::new();
4915 for stmt in stmts {
4916 if stmt.statement_type == StatementType::CashFlowStatement {
4917 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4918 let cf_items = Self::build_cash_flow_from_trial_balances(
4919 &tb_entries,
4920 None,
4921 net_income,
4922 );
4923 entity_stmts.push(FinancialStatement {
4924 cash_flow_items: cf_items,
4925 ..stmt
4926 });
4927 } else {
4928 entity_stmts.push(stmt);
4929 }
4930 }
4931
4932 financial_statements.extend(entity_stmts.clone());
4934
4935 standalone_statements
4937 .entry(company_code.to_string())
4938 .or_default()
4939 .extend(entity_stmts);
4940
4941 if company_idx == 0 {
4944 trial_balances.push(PeriodTrialBalance {
4945 fiscal_year,
4946 fiscal_period,
4947 period_start,
4948 period_end,
4949 entries: tb_entries,
4950 });
4951 }
4952 } else {
4953 let tb_entries = Self::build_trial_balance_from_entries(
4955 journal_entries,
4956 coa,
4957 company_code,
4958 fiscal_year,
4959 fiscal_period,
4960 );
4961
4962 let stmts = company_fs_gen.generate(
4963 company_code,
4964 currency,
4965 &tb_entries,
4966 period_start,
4967 period_end,
4968 fiscal_year,
4969 fiscal_period,
4970 None,
4971 "SYS-AUTOCLOSE",
4972 );
4973 financial_statements.extend(stmts.clone());
4974 standalone_statements
4975 .entry(company_code.to_string())
4976 .or_default()
4977 .extend(stmts);
4978
4979 if company_idx == 0 && !tb_entries.is_empty() {
4980 trial_balances.push(PeriodTrialBalance {
4981 fiscal_year,
4982 fiscal_period,
4983 period_start,
4984 period_end,
4985 entries: tb_entries,
4986 });
4987 }
4988 }
4989 }
4990
4991 let group_currency = self
4994 .config
4995 .companies
4996 .first()
4997 .map(|c| c.currency.as_str())
4998 .unwrap_or("USD");
4999
5000 let period_eliminations: Vec<JournalEntry> = elimination_entries
5002 .iter()
5003 .filter(|je| {
5004 je.header.fiscal_year == fiscal_year
5005 && je.header.fiscal_period == fiscal_period
5006 })
5007 .map(|je| (*je).clone())
5008 .collect();
5009
5010 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5011 &entity_tb_map,
5012 &period_eliminations,
5013 &period_label,
5014 );
5015
5016 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5019 .line_items
5020 .iter()
5021 .map(|li| {
5022 let net = li.post_elimination_total;
5023 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5024 (net, rust_decimal::Decimal::ZERO)
5025 } else {
5026 (rust_decimal::Decimal::ZERO, -net)
5027 };
5028 datasynth_generators::TrialBalanceEntry {
5029 account_code: li.account_category.clone(),
5030 account_name: li.account_category.clone(),
5031 category: li.account_category.clone(),
5032 debit_balance: debit,
5033 credit_balance: credit,
5034 }
5035 })
5036 .collect();
5037
5038 let mut cons_stmts = cons_gen.generate(
5039 "GROUP",
5040 group_currency,
5041 &cons_tb,
5042 period_start,
5043 period_end,
5044 fiscal_year,
5045 fiscal_period,
5046 None,
5047 "SYS-AUTOCLOSE",
5048 );
5049
5050 let bs_categories: &[&str] = &[
5054 "CASH",
5055 "RECEIVABLES",
5056 "INVENTORY",
5057 "FIXEDASSETS",
5058 "PAYABLES",
5059 "ACCRUEDLIABILITIES",
5060 "LONGTERMDEBT",
5061 "EQUITY",
5062 ];
5063 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5064 cons_line_items.into_iter().partition(|li| {
5065 let upper = li.label.to_uppercase();
5066 bs_categories.iter().any(|c| upper == *c)
5067 });
5068
5069 for stmt in &mut cons_stmts {
5070 stmt.is_consolidated = true;
5071 match stmt.statement_type {
5072 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5073 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5074 _ => {} }
5076 }
5077
5078 consolidated_statements.extend(cons_stmts);
5079 consolidation_schedules.push(schedule);
5080 }
5081
5082 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5088 info!(
5089 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5090 stats.financial_statement_count,
5091 consolidated_statements.len(),
5092 has_journal_entries
5093 );
5094
5095 let entity_seeds: Vec<SegmentSeed> = self
5100 .config
5101 .companies
5102 .iter()
5103 .map(|c| SegmentSeed {
5104 code: c.code.clone(),
5105 name: c.name.clone(),
5106 currency: c.currency.clone(),
5107 })
5108 .collect();
5109
5110 let mut seg_gen = SegmentGenerator::new(seed + 30);
5111
5112 for period in 0..self.config.global.period_months {
5117 let period_end =
5118 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5119 let fiscal_year = period_end.year() as u16;
5120 let fiscal_period = period_end.month() as u8;
5121 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5122
5123 use datasynth_core::models::StatementType;
5124
5125 let cons_is = consolidated_statements.iter().find(|s| {
5127 s.fiscal_year == fiscal_year
5128 && s.fiscal_period == fiscal_period
5129 && s.statement_type == StatementType::IncomeStatement
5130 });
5131 let cons_bs = consolidated_statements.iter().find(|s| {
5132 s.fiscal_year == fiscal_year
5133 && s.fiscal_period == fiscal_period
5134 && s.statement_type == StatementType::BalanceSheet
5135 });
5136
5137 let is_stmt = cons_is.or_else(|| {
5139 financial_statements.iter().find(|s| {
5140 s.fiscal_year == fiscal_year
5141 && s.fiscal_period == fiscal_period
5142 && s.statement_type == StatementType::IncomeStatement
5143 })
5144 });
5145 let bs_stmt = cons_bs.or_else(|| {
5146 financial_statements.iter().find(|s| {
5147 s.fiscal_year == fiscal_year
5148 && s.fiscal_period == fiscal_period
5149 && s.statement_type == StatementType::BalanceSheet
5150 })
5151 });
5152
5153 let consolidated_revenue = is_stmt
5154 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5155 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5157
5158 let consolidated_profit = is_stmt
5159 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5160 .map(|li| li.amount)
5161 .unwrap_or(rust_decimal::Decimal::ZERO);
5162
5163 let consolidated_assets = bs_stmt
5164 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5165 .map(|li| li.amount)
5166 .unwrap_or(rust_decimal::Decimal::ZERO);
5167
5168 if consolidated_revenue == rust_decimal::Decimal::ZERO
5170 && consolidated_assets == rust_decimal::Decimal::ZERO
5171 {
5172 continue;
5173 }
5174
5175 let group_code = self
5176 .config
5177 .companies
5178 .first()
5179 .map(|c| c.code.as_str())
5180 .unwrap_or("GROUP");
5181
5182 let total_depr: rust_decimal::Decimal = journal_entries
5185 .iter()
5186 .filter(|je| je.header.document_type == "CL")
5187 .flat_map(|je| je.lines.iter())
5188 .filter(|l| l.gl_account.starts_with("6000"))
5189 .map(|l| l.debit_amount)
5190 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5191 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5192 Some(total_depr)
5193 } else {
5194 None
5195 };
5196
5197 let (segs, recon) = seg_gen.generate(
5198 group_code,
5199 &period_label,
5200 consolidated_revenue,
5201 consolidated_profit,
5202 consolidated_assets,
5203 &entity_seeds,
5204 depr_param,
5205 );
5206 segment_reports.extend(segs);
5207 segment_reconciliations.push(recon);
5208 }
5209
5210 info!(
5211 "Segment reports generated: {} segments, {} reconciliations",
5212 segment_reports.len(),
5213 segment_reconciliations.len()
5214 );
5215 }
5216
5217 if br_enabled && !document_flows.payments.is_empty() {
5219 let employee_ids: Vec<String> = self
5220 .master_data
5221 .employees
5222 .iter()
5223 .map(|e| e.employee_id.clone())
5224 .collect();
5225 let mut br_gen =
5226 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5227
5228 for company in &self.config.companies {
5230 let company_payments: Vec<PaymentReference> = document_flows
5231 .payments
5232 .iter()
5233 .filter(|p| p.header.company_code == company.code)
5234 .map(|p| PaymentReference {
5235 id: p.header.document_id.clone(),
5236 amount: if p.is_vendor { p.amount } else { -p.amount },
5237 date: p.header.document_date,
5238 reference: p
5239 .check_number
5240 .clone()
5241 .or_else(|| p.wire_reference.clone())
5242 .unwrap_or_else(|| p.header.document_id.clone()),
5243 })
5244 .collect();
5245
5246 if company_payments.is_empty() {
5247 continue;
5248 }
5249
5250 let bank_account_id = format!("{}-MAIN", company.code);
5251
5252 for period in 0..self.config.global.period_months {
5254 let period_start = start_date + chrono::Months::new(period);
5255 let period_end =
5256 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5257
5258 let period_payments: Vec<PaymentReference> = company_payments
5259 .iter()
5260 .filter(|p| p.date >= period_start && p.date <= period_end)
5261 .cloned()
5262 .collect();
5263
5264 let recon = br_gen.generate(
5265 &company.code,
5266 &bank_account_id,
5267 period_start,
5268 period_end,
5269 &company.currency,
5270 &period_payments,
5271 );
5272 bank_reconciliations.push(recon);
5273 }
5274 }
5275 info!(
5276 "Bank reconciliations generated: {} reconciliations",
5277 bank_reconciliations.len()
5278 );
5279 }
5280
5281 stats.bank_reconciliation_count = bank_reconciliations.len();
5282 self.check_resources_with_log("post-financial-reporting")?;
5283
5284 if !trial_balances.is_empty() {
5285 info!(
5286 "Period-close trial balances captured: {} periods",
5287 trial_balances.len()
5288 );
5289 }
5290
5291 let notes_to_financial_statements = Vec::new();
5295
5296 Ok(FinancialReportingSnapshot {
5297 financial_statements,
5298 standalone_statements,
5299 consolidated_statements,
5300 consolidation_schedules,
5301 bank_reconciliations,
5302 trial_balances,
5303 segment_reports,
5304 segment_reconciliations,
5305 notes_to_financial_statements,
5306 })
5307 }
5308
5309 fn generate_notes_to_financial_statements(
5316 &self,
5317 financial_reporting: &mut FinancialReportingSnapshot,
5318 accounting_standards: &AccountingStandardsSnapshot,
5319 tax: &TaxSnapshot,
5320 hr: &HrSnapshot,
5321 audit: &AuditSnapshot,
5322 treasury: &TreasurySnapshot,
5323 ) {
5324 use datasynth_config::schema::AccountingFrameworkConfig;
5325 use datasynth_core::models::StatementType;
5326 use datasynth_generators::period_close::notes_generator::{
5327 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5328 };
5329
5330 let seed = self.seed;
5331 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5332 {
5333 Ok(d) => d,
5334 Err(_) => return,
5335 };
5336
5337 let mut notes_gen = NotesGenerator::new(seed + 4235);
5338
5339 for company in &self.config.companies {
5340 let last_period_end = start_date
5341 + chrono::Months::new(self.config.global.period_months)
5342 - chrono::Days::new(1);
5343 let fiscal_year = last_period_end.year() as u16;
5344
5345 let entity_is = financial_reporting
5347 .standalone_statements
5348 .get(&company.code)
5349 .and_then(|stmts| {
5350 stmts.iter().find(|s| {
5351 s.fiscal_year == fiscal_year
5352 && s.statement_type == StatementType::IncomeStatement
5353 })
5354 });
5355 let entity_bs = financial_reporting
5356 .standalone_statements
5357 .get(&company.code)
5358 .and_then(|stmts| {
5359 stmts.iter().find(|s| {
5360 s.fiscal_year == fiscal_year
5361 && s.statement_type == StatementType::BalanceSheet
5362 })
5363 });
5364
5365 let revenue_amount = entity_is
5367 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5368 .map(|li| li.amount);
5369 let ppe_gross = entity_bs
5370 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5371 .map(|li| li.amount);
5372
5373 let framework = match self
5374 .config
5375 .accounting_standards
5376 .framework
5377 .unwrap_or_default()
5378 {
5379 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5380 "IFRS".to_string()
5381 }
5382 _ => "US GAAP".to_string(),
5383 };
5384
5385 let (entity_dta, entity_dtl) = {
5388 let mut dta = rust_decimal::Decimal::ZERO;
5389 let mut dtl = rust_decimal::Decimal::ZERO;
5390 for rf in &tax.deferred_tax.rollforwards {
5391 if rf.entity_code == company.code {
5392 dta += rf.closing_dta;
5393 dtl += rf.closing_dtl;
5394 }
5395 }
5396 (
5397 if dta > rust_decimal::Decimal::ZERO {
5398 Some(dta)
5399 } else {
5400 None
5401 },
5402 if dtl > rust_decimal::Decimal::ZERO {
5403 Some(dtl)
5404 } else {
5405 None
5406 },
5407 )
5408 };
5409
5410 let entity_provisions: Vec<_> = accounting_standards
5413 .provisions
5414 .iter()
5415 .filter(|p| p.entity_code == company.code)
5416 .collect();
5417 let provision_count = entity_provisions.len();
5418 let total_provisions = if provision_count > 0 {
5419 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5420 } else {
5421 None
5422 };
5423
5424 let entity_pension_plan_count = hr
5426 .pension_plans
5427 .iter()
5428 .filter(|p| p.entity_code == company.code)
5429 .count();
5430 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5431 let sum: rust_decimal::Decimal = hr
5432 .pension_disclosures
5433 .iter()
5434 .filter(|d| {
5435 hr.pension_plans
5436 .iter()
5437 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5438 })
5439 .map(|d| d.net_pension_liability)
5440 .sum();
5441 let plan_assets_sum: rust_decimal::Decimal = hr
5442 .pension_plan_assets
5443 .iter()
5444 .filter(|a| {
5445 hr.pension_plans
5446 .iter()
5447 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5448 })
5449 .map(|a| a.fair_value_closing)
5450 .sum();
5451 if entity_pension_plan_count > 0 {
5452 Some(sum + plan_assets_sum)
5453 } else {
5454 None
5455 }
5456 };
5457 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5458 let sum: rust_decimal::Decimal = hr
5459 .pension_plan_assets
5460 .iter()
5461 .filter(|a| {
5462 hr.pension_plans
5463 .iter()
5464 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5465 })
5466 .map(|a| a.fair_value_closing)
5467 .sum();
5468 if entity_pension_plan_count > 0 {
5469 Some(sum)
5470 } else {
5471 None
5472 }
5473 };
5474
5475 let rp_count = audit.related_party_transactions.len();
5478 let se_count = audit.subsequent_events.len();
5479 let adjusting_count = audit
5480 .subsequent_events
5481 .iter()
5482 .filter(|e| {
5483 matches!(
5484 e.classification,
5485 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5486 )
5487 })
5488 .count();
5489
5490 let ctx = NotesGeneratorContext {
5491 entity_code: company.code.clone(),
5492 framework,
5493 period: format!("FY{}", fiscal_year),
5494 period_end: last_period_end,
5495 currency: company.currency.clone(),
5496 revenue_amount,
5497 total_ppe_gross: ppe_gross,
5498 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5499 deferred_tax_asset: entity_dta,
5501 deferred_tax_liability: entity_dtl,
5502 provision_count,
5504 total_provisions,
5505 pension_plan_count: entity_pension_plan_count,
5507 total_dbo: entity_total_dbo,
5508 total_plan_assets: entity_total_plan_assets,
5509 related_party_transaction_count: rp_count,
5511 subsequent_event_count: se_count,
5512 adjusting_event_count: adjusting_count,
5513 ..NotesGeneratorContext::default()
5514 };
5515
5516 let entity_notes = notes_gen.generate(&ctx);
5517 let standard_note_count = entity_notes.len() as u32;
5518 info!(
5519 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5520 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5521 );
5522 financial_reporting
5523 .notes_to_financial_statements
5524 .extend(entity_notes);
5525
5526 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5528 .debt_instruments
5529 .iter()
5530 .filter(|d| d.entity_id == company.code)
5531 .map(|d| {
5532 (
5533 format!("{:?}", d.instrument_type),
5534 d.principal,
5535 d.maturity_date.to_string(),
5536 )
5537 })
5538 .collect();
5539
5540 let hedge_count = treasury.hedge_relationships.len();
5541 let effective_hedges = treasury
5542 .hedge_relationships
5543 .iter()
5544 .filter(|h| h.is_effective)
5545 .count();
5546 let total_notional: rust_decimal::Decimal = treasury
5547 .hedging_instruments
5548 .iter()
5549 .map(|h| h.notional_amount)
5550 .sum();
5551 let total_fair_value: rust_decimal::Decimal = treasury
5552 .hedging_instruments
5553 .iter()
5554 .map(|h| h.fair_value)
5555 .sum();
5556
5557 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5559 .provisions
5560 .iter()
5561 .filter(|p| p.entity_code == company.code)
5562 .map(|p| p.id.as_str())
5563 .collect();
5564 let provision_movements: Vec<(
5565 String,
5566 rust_decimal::Decimal,
5567 rust_decimal::Decimal,
5568 rust_decimal::Decimal,
5569 )> = accounting_standards
5570 .provision_movements
5571 .iter()
5572 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5573 .map(|m| {
5574 let prov_type = accounting_standards
5575 .provisions
5576 .iter()
5577 .find(|p| p.id == m.provision_id)
5578 .map(|p| format!("{:?}", p.provision_type))
5579 .unwrap_or_else(|| "Unknown".to_string());
5580 (prov_type, m.opening, m.additions, m.closing)
5581 })
5582 .collect();
5583
5584 let enhanced_ctx = EnhancedNotesContext {
5585 entity_code: company.code.clone(),
5586 period: format!("FY{}", fiscal_year),
5587 currency: company.currency.clone(),
5588 finished_goods_value: rust_decimal::Decimal::ZERO,
5590 wip_value: rust_decimal::Decimal::ZERO,
5591 raw_materials_value: rust_decimal::Decimal::ZERO,
5592 debt_instruments,
5593 hedge_count,
5594 effective_hedges,
5595 total_notional,
5596 total_fair_value,
5597 provision_movements,
5598 };
5599
5600 let enhanced_notes =
5601 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5602 if !enhanced_notes.is_empty() {
5603 info!(
5604 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5605 company.code,
5606 enhanced_notes.len(),
5607 enhanced_ctx.debt_instruments.len(),
5608 hedge_count,
5609 enhanced_ctx.provision_movements.len(),
5610 );
5611 financial_reporting
5612 .notes_to_financial_statements
5613 .extend(enhanced_notes);
5614 }
5615 }
5616 }
5617
5618 fn build_trial_balance_from_entries(
5624 journal_entries: &[JournalEntry],
5625 coa: &ChartOfAccounts,
5626 company_code: &str,
5627 fiscal_year: u16,
5628 fiscal_period: u8,
5629 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5630 use rust_decimal::Decimal;
5631
5632 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5634 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5635
5636 for je in journal_entries {
5637 if je.header.company_code != company_code
5639 || je.header.fiscal_year != fiscal_year
5640 || je.header.fiscal_period != fiscal_period
5641 {
5642 continue;
5643 }
5644
5645 for line in &je.lines {
5646 let acct = &line.gl_account;
5647 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5648 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5649 }
5650 }
5651
5652 let mut all_accounts: Vec<&String> = account_debits
5654 .keys()
5655 .chain(account_credits.keys())
5656 .collect::<std::collections::HashSet<_>>()
5657 .into_iter()
5658 .collect();
5659 all_accounts.sort();
5660
5661 let mut entries = Vec::new();
5662
5663 for acct_number in all_accounts {
5664 let debit = account_debits
5665 .get(acct_number)
5666 .copied()
5667 .unwrap_or(Decimal::ZERO);
5668 let credit = account_credits
5669 .get(acct_number)
5670 .copied()
5671 .unwrap_or(Decimal::ZERO);
5672
5673 if debit.is_zero() && credit.is_zero() {
5674 continue;
5675 }
5676
5677 let account_name = coa
5679 .get_account(acct_number)
5680 .map(|gl| gl.short_description.clone())
5681 .unwrap_or_else(|| format!("Account {acct_number}"));
5682
5683 let category = Self::category_from_account_code(acct_number);
5688
5689 entries.push(datasynth_generators::TrialBalanceEntry {
5690 account_code: acct_number.clone(),
5691 account_name,
5692 category,
5693 debit_balance: debit,
5694 credit_balance: credit,
5695 });
5696 }
5697
5698 entries
5699 }
5700
5701 fn build_cumulative_trial_balance(
5708 journal_entries: &[JournalEntry],
5709 coa: &ChartOfAccounts,
5710 company_code: &str,
5711 start_date: NaiveDate,
5712 period_end: NaiveDate,
5713 fiscal_year: u16,
5714 fiscal_period: u8,
5715 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5716 use rust_decimal::Decimal;
5717
5718 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5720 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5721
5722 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5724 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5725
5726 for je in journal_entries {
5727 if je.header.company_code != company_code {
5728 continue;
5729 }
5730
5731 for line in &je.lines {
5732 let acct = &line.gl_account;
5733 let category = Self::category_from_account_code(acct);
5734 let is_bs_account = matches!(
5735 category.as_str(),
5736 "Cash"
5737 | "Receivables"
5738 | "Inventory"
5739 | "FixedAssets"
5740 | "Payables"
5741 | "AccruedLiabilities"
5742 | "LongTermDebt"
5743 | "Equity"
5744 );
5745
5746 if is_bs_account {
5747 if je.header.document_date <= period_end
5749 && je.header.document_date >= start_date
5750 {
5751 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5752 line.debit_amount;
5753 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5754 line.credit_amount;
5755 }
5756 } else {
5757 if je.header.fiscal_year == fiscal_year
5759 && je.header.fiscal_period == fiscal_period
5760 {
5761 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5762 line.debit_amount;
5763 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5764 line.credit_amount;
5765 }
5766 }
5767 }
5768 }
5769
5770 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5772 all_accounts.extend(bs_debits.keys().cloned());
5773 all_accounts.extend(bs_credits.keys().cloned());
5774 all_accounts.extend(is_debits.keys().cloned());
5775 all_accounts.extend(is_credits.keys().cloned());
5776
5777 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5778 sorted_accounts.sort();
5779
5780 let mut entries = Vec::new();
5781
5782 for acct_number in &sorted_accounts {
5783 let category = Self::category_from_account_code(acct_number);
5784 let is_bs_account = matches!(
5785 category.as_str(),
5786 "Cash"
5787 | "Receivables"
5788 | "Inventory"
5789 | "FixedAssets"
5790 | "Payables"
5791 | "AccruedLiabilities"
5792 | "LongTermDebt"
5793 | "Equity"
5794 );
5795
5796 let (debit, credit) = if is_bs_account {
5797 (
5798 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5799 bs_credits
5800 .get(acct_number)
5801 .copied()
5802 .unwrap_or(Decimal::ZERO),
5803 )
5804 } else {
5805 (
5806 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5807 is_credits
5808 .get(acct_number)
5809 .copied()
5810 .unwrap_or(Decimal::ZERO),
5811 )
5812 };
5813
5814 if debit.is_zero() && credit.is_zero() {
5815 continue;
5816 }
5817
5818 let account_name = coa
5819 .get_account(acct_number)
5820 .map(|gl| gl.short_description.clone())
5821 .unwrap_or_else(|| format!("Account {acct_number}"));
5822
5823 entries.push(datasynth_generators::TrialBalanceEntry {
5824 account_code: acct_number.clone(),
5825 account_name,
5826 category,
5827 debit_balance: debit,
5828 credit_balance: credit,
5829 });
5830 }
5831
5832 entries
5833 }
5834
5835 fn build_cash_flow_from_trial_balances(
5840 current_tb: &[datasynth_generators::TrialBalanceEntry],
5841 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5842 net_income: rust_decimal::Decimal,
5843 ) -> Vec<CashFlowItem> {
5844 use rust_decimal::Decimal;
5845
5846 let aggregate =
5848 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5849 let mut map: HashMap<String, Decimal> = HashMap::new();
5850 for entry in tb {
5851 let net = entry.debit_balance - entry.credit_balance;
5852 *map.entry(entry.category.clone()).or_default() += net;
5853 }
5854 map
5855 };
5856
5857 let current = aggregate(current_tb);
5858 let prior = prior_tb.map(aggregate);
5859
5860 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5862 *map.get(key).unwrap_or(&Decimal::ZERO)
5863 };
5864
5865 let change = |key: &str| -> Decimal {
5867 let curr = get(¤t, key);
5868 match &prior {
5869 Some(p) => curr - get(p, key),
5870 None => curr,
5871 }
5872 };
5873
5874 let fixed_asset_change = change("FixedAssets");
5877 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5878 -fixed_asset_change
5879 } else {
5880 Decimal::ZERO
5881 };
5882
5883 let ar_change = change("Receivables");
5885 let inventory_change = change("Inventory");
5886 let ap_change = change("Payables");
5888 let accrued_change = change("AccruedLiabilities");
5889
5890 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5891 + (-ap_change)
5892 + (-accrued_change);
5893
5894 let capex = if fixed_asset_change > Decimal::ZERO {
5896 -fixed_asset_change
5897 } else {
5898 Decimal::ZERO
5899 };
5900 let investing_cf = capex;
5901
5902 let debt_change = -change("LongTermDebt");
5904 let equity_change = -change("Equity");
5905 let financing_cf = debt_change + equity_change;
5906
5907 let net_change = operating_cf + investing_cf + financing_cf;
5908
5909 vec![
5910 CashFlowItem {
5911 item_code: "CF-NI".to_string(),
5912 label: "Net Income".to_string(),
5913 category: CashFlowCategory::Operating,
5914 amount: net_income,
5915 amount_prior: None,
5916 sort_order: 1,
5917 is_total: false,
5918 },
5919 CashFlowItem {
5920 item_code: "CF-DEP".to_string(),
5921 label: "Depreciation & Amortization".to_string(),
5922 category: CashFlowCategory::Operating,
5923 amount: depreciation_addback,
5924 amount_prior: None,
5925 sort_order: 2,
5926 is_total: false,
5927 },
5928 CashFlowItem {
5929 item_code: "CF-AR".to_string(),
5930 label: "Change in Accounts Receivable".to_string(),
5931 category: CashFlowCategory::Operating,
5932 amount: -ar_change,
5933 amount_prior: None,
5934 sort_order: 3,
5935 is_total: false,
5936 },
5937 CashFlowItem {
5938 item_code: "CF-AP".to_string(),
5939 label: "Change in Accounts Payable".to_string(),
5940 category: CashFlowCategory::Operating,
5941 amount: -ap_change,
5942 amount_prior: None,
5943 sort_order: 4,
5944 is_total: false,
5945 },
5946 CashFlowItem {
5947 item_code: "CF-INV".to_string(),
5948 label: "Change in Inventory".to_string(),
5949 category: CashFlowCategory::Operating,
5950 amount: -inventory_change,
5951 amount_prior: None,
5952 sort_order: 5,
5953 is_total: false,
5954 },
5955 CashFlowItem {
5956 item_code: "CF-OP".to_string(),
5957 label: "Net Cash from Operating Activities".to_string(),
5958 category: CashFlowCategory::Operating,
5959 amount: operating_cf,
5960 amount_prior: None,
5961 sort_order: 6,
5962 is_total: true,
5963 },
5964 CashFlowItem {
5965 item_code: "CF-CAPEX".to_string(),
5966 label: "Capital Expenditures".to_string(),
5967 category: CashFlowCategory::Investing,
5968 amount: capex,
5969 amount_prior: None,
5970 sort_order: 7,
5971 is_total: false,
5972 },
5973 CashFlowItem {
5974 item_code: "CF-INV-T".to_string(),
5975 label: "Net Cash from Investing Activities".to_string(),
5976 category: CashFlowCategory::Investing,
5977 amount: investing_cf,
5978 amount_prior: None,
5979 sort_order: 8,
5980 is_total: true,
5981 },
5982 CashFlowItem {
5983 item_code: "CF-DEBT".to_string(),
5984 label: "Net Borrowings / (Repayments)".to_string(),
5985 category: CashFlowCategory::Financing,
5986 amount: debt_change,
5987 amount_prior: None,
5988 sort_order: 9,
5989 is_total: false,
5990 },
5991 CashFlowItem {
5992 item_code: "CF-EQ".to_string(),
5993 label: "Equity Changes".to_string(),
5994 category: CashFlowCategory::Financing,
5995 amount: equity_change,
5996 amount_prior: None,
5997 sort_order: 10,
5998 is_total: false,
5999 },
6000 CashFlowItem {
6001 item_code: "CF-FIN-T".to_string(),
6002 label: "Net Cash from Financing Activities".to_string(),
6003 category: CashFlowCategory::Financing,
6004 amount: financing_cf,
6005 amount_prior: None,
6006 sort_order: 11,
6007 is_total: true,
6008 },
6009 CashFlowItem {
6010 item_code: "CF-NET".to_string(),
6011 label: "Net Change in Cash".to_string(),
6012 category: CashFlowCategory::Operating,
6013 amount: net_change,
6014 amount_prior: None,
6015 sort_order: 12,
6016 is_total: true,
6017 },
6018 ]
6019 }
6020
6021 fn calculate_net_income_from_tb(
6025 tb: &[datasynth_generators::TrialBalanceEntry],
6026 ) -> rust_decimal::Decimal {
6027 use rust_decimal::Decimal;
6028
6029 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6030 for entry in tb {
6031 let net = entry.debit_balance - entry.credit_balance;
6032 *aggregated.entry(entry.category.clone()).or_default() += net;
6033 }
6034
6035 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6036 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6037 let opex = *aggregated
6038 .get("OperatingExpenses")
6039 .unwrap_or(&Decimal::ZERO);
6040 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6041 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6042
6043 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6046 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6048 operating_income - tax
6049 }
6050
6051 fn category_from_account_code(code: &str) -> String {
6058 let prefix: String = code.chars().take(2).collect();
6059 match prefix.as_str() {
6060 "10" => "Cash",
6061 "11" => "Receivables",
6062 "12" | "13" | "14" => "Inventory",
6063 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6064 "20" => "Payables",
6065 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6066 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6067 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6068 "40" | "41" | "42" | "43" | "44" => "Revenue",
6069 "50" | "51" | "52" => "CostOfSales",
6070 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6071 "OperatingExpenses"
6072 }
6073 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6074 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6075 _ => "OperatingExpenses",
6076 }
6077 .to_string()
6078 }
6079
6080 fn phase_hr_data(
6082 &mut self,
6083 stats: &mut EnhancedGenerationStatistics,
6084 ) -> SynthResult<HrSnapshot> {
6085 if !self.phase_config.generate_hr {
6086 debug!("Phase 16: Skipped (HR generation disabled)");
6087 return Ok(HrSnapshot::default());
6088 }
6089
6090 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6091
6092 let seed = self.seed;
6093 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6094 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6095 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6096 let company_code = self
6097 .config
6098 .companies
6099 .first()
6100 .map(|c| c.code.as_str())
6101 .unwrap_or("1000");
6102 let currency = self
6103 .config
6104 .companies
6105 .first()
6106 .map(|c| c.currency.as_str())
6107 .unwrap_or("USD");
6108
6109 let employee_ids: Vec<String> = self
6110 .master_data
6111 .employees
6112 .iter()
6113 .map(|e| e.employee_id.clone())
6114 .collect();
6115
6116 if employee_ids.is_empty() {
6117 debug!("Phase 16: Skipped (no employees available)");
6118 return Ok(HrSnapshot::default());
6119 }
6120
6121 let cost_center_ids: Vec<String> = self
6124 .master_data
6125 .employees
6126 .iter()
6127 .filter_map(|e| e.cost_center.clone())
6128 .collect::<std::collections::HashSet<_>>()
6129 .into_iter()
6130 .collect();
6131
6132 let mut snapshot = HrSnapshot::default();
6133
6134 if self.config.hr.payroll.enabled {
6136 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6137 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6138
6139 let payroll_pack = self.primary_pack();
6141
6142 payroll_gen.set_country_pack(payroll_pack.clone());
6145
6146 let employees_with_salary: Vec<(
6147 String,
6148 rust_decimal::Decimal,
6149 Option<String>,
6150 Option<String>,
6151 )> = self
6152 .master_data
6153 .employees
6154 .iter()
6155 .map(|e| {
6156 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6159 e.base_salary
6160 } else {
6161 rust_decimal::Decimal::from(60_000)
6162 };
6163 (
6164 e.employee_id.clone(),
6165 annual, e.cost_center.clone(),
6167 e.department_id.clone(),
6168 )
6169 })
6170 .collect();
6171
6172 let change_history = &self.master_data.employee_change_history;
6175 let has_changes = !change_history.is_empty();
6176 if has_changes {
6177 debug!(
6178 "Payroll will incorporate {} employee change events",
6179 change_history.len()
6180 );
6181 }
6182
6183 for month in 0..self.config.global.period_months {
6184 let period_start = start_date + chrono::Months::new(month);
6185 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6186 let (run, items) = if has_changes {
6187 payroll_gen.generate_with_changes(
6188 company_code,
6189 &employees_with_salary,
6190 period_start,
6191 period_end,
6192 currency,
6193 change_history,
6194 )
6195 } else {
6196 payroll_gen.generate(
6197 company_code,
6198 &employees_with_salary,
6199 period_start,
6200 period_end,
6201 currency,
6202 )
6203 };
6204 snapshot.payroll_runs.push(run);
6205 snapshot.payroll_run_count += 1;
6206 snapshot.payroll_line_item_count += items.len();
6207 snapshot.payroll_line_items.extend(items);
6208 }
6209 }
6210
6211 if self.config.hr.time_attendance.enabled {
6213 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6214 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6215 let entries = time_gen.generate(
6216 &employee_ids,
6217 start_date,
6218 end_date,
6219 &self.config.hr.time_attendance,
6220 );
6221 snapshot.time_entry_count = entries.len();
6222 snapshot.time_entries = entries;
6223 }
6224
6225 if self.config.hr.expenses.enabled {
6227 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6228 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6229 expense_gen.set_country_pack(self.primary_pack().clone());
6230 let company_currency = self
6231 .config
6232 .companies
6233 .first()
6234 .map(|c| c.currency.as_str())
6235 .unwrap_or("USD");
6236 let reports = expense_gen.generate_with_currency(
6237 &employee_ids,
6238 start_date,
6239 end_date,
6240 &self.config.hr.expenses,
6241 company_currency,
6242 );
6243 snapshot.expense_report_count = reports.len();
6244 snapshot.expense_reports = reports;
6245 }
6246
6247 if self.config.hr.payroll.enabled {
6249 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6250 let employee_pairs: Vec<(String, String)> = self
6251 .master_data
6252 .employees
6253 .iter()
6254 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6255 .collect();
6256 let enrollments =
6257 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6258 snapshot.benefit_enrollment_count = enrollments.len();
6259 snapshot.benefit_enrollments = enrollments;
6260 }
6261
6262 if self.phase_config.generate_hr {
6264 let entity_name = self
6265 .config
6266 .companies
6267 .first()
6268 .map(|c| c.name.as_str())
6269 .unwrap_or("Entity");
6270 let period_months = self.config.global.period_months;
6271 let period_label = {
6272 let y = start_date.year();
6273 let m = start_date.month();
6274 if period_months >= 12 {
6275 format!("FY{y}")
6276 } else {
6277 format!("{y}-{m:02}")
6278 }
6279 };
6280 let reporting_date =
6281 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6282
6283 let avg_salary: Option<rust_decimal::Decimal> = {
6288 let employee_count = employee_ids.len();
6289 if self.config.hr.payroll.enabled
6290 && employee_count > 0
6291 && !snapshot.payroll_runs.is_empty()
6292 {
6293 let total_gross: rust_decimal::Decimal = snapshot
6295 .payroll_runs
6296 .iter()
6297 .filter(|r| r.company_code == company_code)
6298 .map(|r| r.total_gross)
6299 .sum();
6300 if total_gross > rust_decimal::Decimal::ZERO {
6301 let annual_total = if period_months > 0 && period_months < 12 {
6303 total_gross * rust_decimal::Decimal::from(12u32)
6304 / rust_decimal::Decimal::from(period_months)
6305 } else {
6306 total_gross
6307 };
6308 Some(
6309 (annual_total / rust_decimal::Decimal::from(employee_count))
6310 .round_dp(2),
6311 )
6312 } else {
6313 None
6314 }
6315 } else {
6316 None
6317 }
6318 };
6319
6320 let mut pension_gen =
6321 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6322 let pension_snap = pension_gen.generate(
6323 company_code,
6324 entity_name,
6325 &period_label,
6326 reporting_date,
6327 employee_ids.len(),
6328 currency,
6329 avg_salary,
6330 period_months,
6331 );
6332 snapshot.pension_plan_count = pension_snap.plans.len();
6333 snapshot.pension_plans = pension_snap.plans;
6334 snapshot.pension_obligations = pension_snap.obligations;
6335 snapshot.pension_plan_assets = pension_snap.plan_assets;
6336 snapshot.pension_disclosures = pension_snap.disclosures;
6337 snapshot.pension_journal_entries = pension_snap.journal_entries;
6342 }
6343
6344 if self.phase_config.generate_hr && !employee_ids.is_empty() {
6346 let period_months = self.config.global.period_months;
6347 let period_label = {
6348 let y = start_date.year();
6349 let m = start_date.month();
6350 if period_months >= 12 {
6351 format!("FY{y}")
6352 } else {
6353 format!("{y}-{m:02}")
6354 }
6355 };
6356 let reporting_date =
6357 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6358
6359 let mut stock_comp_gen =
6360 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6361 let stock_snap = stock_comp_gen.generate(
6362 company_code,
6363 &employee_ids,
6364 start_date,
6365 &period_label,
6366 reporting_date,
6367 currency,
6368 );
6369 snapshot.stock_grant_count = stock_snap.grants.len();
6370 snapshot.stock_grants = stock_snap.grants;
6371 snapshot.stock_comp_expenses = stock_snap.expenses;
6372 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6373 }
6374
6375 stats.payroll_run_count = snapshot.payroll_run_count;
6376 stats.time_entry_count = snapshot.time_entry_count;
6377 stats.expense_report_count = snapshot.expense_report_count;
6378 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6379 stats.pension_plan_count = snapshot.pension_plan_count;
6380 stats.stock_grant_count = snapshot.stock_grant_count;
6381
6382 info!(
6383 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6384 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6385 snapshot.time_entry_count, snapshot.expense_report_count,
6386 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6387 snapshot.stock_grant_count
6388 );
6389 self.check_resources_with_log("post-hr")?;
6390
6391 Ok(snapshot)
6392 }
6393
6394 fn phase_accounting_standards(
6396 &mut self,
6397 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6398 journal_entries: &[JournalEntry],
6399 stats: &mut EnhancedGenerationStatistics,
6400 ) -> SynthResult<AccountingStandardsSnapshot> {
6401 if !self.phase_config.generate_accounting_standards {
6402 debug!("Phase 17: Skipped (accounting standards generation disabled)");
6403 return Ok(AccountingStandardsSnapshot::default());
6404 }
6405 info!("Phase 17: Generating Accounting Standards Data");
6406
6407 let seed = self.seed;
6408 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6409 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6410 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6411 let company_code = self
6412 .config
6413 .companies
6414 .first()
6415 .map(|c| c.code.as_str())
6416 .unwrap_or("1000");
6417 let currency = self
6418 .config
6419 .companies
6420 .first()
6421 .map(|c| c.currency.as_str())
6422 .unwrap_or("USD");
6423
6424 let framework = match self.config.accounting_standards.framework {
6429 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6430 datasynth_standards::framework::AccountingFramework::UsGaap
6431 }
6432 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6433 datasynth_standards::framework::AccountingFramework::Ifrs
6434 }
6435 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6436 datasynth_standards::framework::AccountingFramework::DualReporting
6437 }
6438 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6439 datasynth_standards::framework::AccountingFramework::FrenchGaap
6440 }
6441 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6442 datasynth_standards::framework::AccountingFramework::GermanGaap
6443 }
6444 None => {
6445 let pack = self.primary_pack();
6447 let pack_fw = pack.accounting.framework.as_str();
6448 match pack_fw {
6449 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6450 "dual_reporting" => {
6451 datasynth_standards::framework::AccountingFramework::DualReporting
6452 }
6453 "french_gaap" => {
6454 datasynth_standards::framework::AccountingFramework::FrenchGaap
6455 }
6456 "german_gaap" | "hgb" => {
6457 datasynth_standards::framework::AccountingFramework::GermanGaap
6458 }
6459 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6461 }
6462 }
6463 };
6464
6465 let mut snapshot = AccountingStandardsSnapshot::default();
6466
6467 if self.config.accounting_standards.revenue_recognition.enabled {
6469 let customer_ids: Vec<String> = self
6470 .master_data
6471 .customers
6472 .iter()
6473 .map(|c| c.customer_id.clone())
6474 .collect();
6475
6476 if !customer_ids.is_empty() {
6477 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6478 let contracts = rev_gen.generate(
6479 company_code,
6480 &customer_ids,
6481 start_date,
6482 end_date,
6483 currency,
6484 &self.config.accounting_standards.revenue_recognition,
6485 framework,
6486 );
6487 snapshot.revenue_contract_count = contracts.len();
6488 snapshot.contracts = contracts;
6489 }
6490 }
6491
6492 if self.config.accounting_standards.impairment.enabled {
6494 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6495 .master_data
6496 .assets
6497 .iter()
6498 .map(|a| {
6499 (
6500 a.asset_id.clone(),
6501 a.description.clone(),
6502 a.acquisition_cost,
6503 )
6504 })
6505 .collect();
6506
6507 if !asset_data.is_empty() {
6508 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6509 let tests = imp_gen.generate(
6510 company_code,
6511 &asset_data,
6512 end_date,
6513 &self.config.accounting_standards.impairment,
6514 framework,
6515 );
6516 snapshot.impairment_test_count = tests.len();
6517 snapshot.impairment_tests = tests;
6518 }
6519 }
6520
6521 if self
6523 .config
6524 .accounting_standards
6525 .business_combinations
6526 .enabled
6527 {
6528 let bc_config = &self.config.accounting_standards.business_combinations;
6529 let framework_str = match framework {
6530 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6531 _ => "US_GAAP",
6532 };
6533 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6534 let bc_snap = bc_gen.generate(
6535 company_code,
6536 currency,
6537 start_date,
6538 end_date,
6539 bc_config.acquisition_count,
6540 framework_str,
6541 );
6542 snapshot.business_combination_count = bc_snap.combinations.len();
6543 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6544 snapshot.business_combinations = bc_snap.combinations;
6545 }
6546
6547 if self
6549 .config
6550 .accounting_standards
6551 .expected_credit_loss
6552 .enabled
6553 {
6554 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6555 let framework_str = match framework {
6556 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6557 _ => "ASC_326",
6558 };
6559
6560 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6563
6564 let mut ecl_gen = EclGenerator::new(seed + 43);
6565
6566 let bucket_exposures: Vec<(
6568 datasynth_core::models::subledger::ar::AgingBucket,
6569 rust_decimal::Decimal,
6570 )> = if ar_aging_reports.is_empty() {
6571 use datasynth_core::models::subledger::ar::AgingBucket;
6573 vec![
6574 (
6575 AgingBucket::Current,
6576 rust_decimal::Decimal::from(500_000_u32),
6577 ),
6578 (
6579 AgingBucket::Days1To30,
6580 rust_decimal::Decimal::from(120_000_u32),
6581 ),
6582 (
6583 AgingBucket::Days31To60,
6584 rust_decimal::Decimal::from(45_000_u32),
6585 ),
6586 (
6587 AgingBucket::Days61To90,
6588 rust_decimal::Decimal::from(15_000_u32),
6589 ),
6590 (
6591 AgingBucket::Over90Days,
6592 rust_decimal::Decimal::from(8_000_u32),
6593 ),
6594 ]
6595 } else {
6596 use datasynth_core::models::subledger::ar::AgingBucket;
6597 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6599 std::collections::HashMap::new();
6600 for report in ar_aging_reports {
6601 for (bucket, amount) in &report.bucket_totals {
6602 *totals.entry(*bucket).or_default() += amount;
6603 }
6604 }
6605 AgingBucket::all()
6606 .into_iter()
6607 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6608 .collect()
6609 };
6610
6611 let ecl_snap = ecl_gen.generate(
6612 company_code,
6613 end_date,
6614 &bucket_exposures,
6615 ecl_config,
6616 &period_label,
6617 framework_str,
6618 );
6619
6620 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6621 snapshot.ecl_models = ecl_snap.ecl_models;
6622 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6623 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6624 }
6625
6626 {
6628 let framework_str = match framework {
6629 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6630 _ => "US_GAAP",
6631 };
6632
6633 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6638 .max(rust_decimal::Decimal::from(100_000_u32));
6639
6640 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6641
6642 let mut prov_gen = ProvisionGenerator::new(seed + 44);
6643 let prov_snap = prov_gen.generate(
6644 company_code,
6645 currency,
6646 revenue_proxy,
6647 end_date,
6648 &period_label,
6649 framework_str,
6650 None, );
6652
6653 snapshot.provision_count = prov_snap.provisions.len();
6654 snapshot.provisions = prov_snap.provisions;
6655 snapshot.provision_movements = prov_snap.movements;
6656 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6657 snapshot.provision_journal_entries = prov_snap.journal_entries;
6658 }
6659
6660 {
6664 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6665
6666 let presentation_currency = self
6667 .config
6668 .global
6669 .presentation_currency
6670 .clone()
6671 .unwrap_or_else(|| self.config.global.group_currency.clone());
6672
6673 let mut rate_table = FxRateTable::new(&presentation_currency);
6676
6677 let base_rates = base_rates_usd();
6681 for (ccy, rate) in &base_rates {
6682 rate_table.add_rate(FxRate::new(
6683 ccy,
6684 "USD",
6685 RateType::Closing,
6686 end_date,
6687 *rate,
6688 "SYNTHETIC",
6689 ));
6690 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6693 rate_table.add_rate(FxRate::new(
6694 ccy,
6695 "USD",
6696 RateType::Average,
6697 end_date,
6698 avg,
6699 "SYNTHETIC",
6700 ));
6701 }
6702
6703 let mut translation_results = Vec::new();
6704 for company in &self.config.companies {
6705 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6708 .max(rust_decimal::Decimal::from(100_000_u32));
6709
6710 let func_ccy = company
6711 .functional_currency
6712 .clone()
6713 .unwrap_or_else(|| company.currency.clone());
6714
6715 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6716 &company.code,
6717 &func_ccy,
6718 &presentation_currency,
6719 &ias21_period_label,
6720 end_date,
6721 company_revenue,
6722 &rate_table,
6723 );
6724 translation_results.push(result);
6725 }
6726
6727 snapshot.currency_translation_count = translation_results.len();
6728 snapshot.currency_translation_results = translation_results;
6729 }
6730
6731 stats.revenue_contract_count = snapshot.revenue_contract_count;
6732 stats.impairment_test_count = snapshot.impairment_test_count;
6733 stats.business_combination_count = snapshot.business_combination_count;
6734 stats.ecl_model_count = snapshot.ecl_model_count;
6735 stats.provision_count = snapshot.provision_count;
6736
6737 info!(
6738 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6739 snapshot.revenue_contract_count,
6740 snapshot.impairment_test_count,
6741 snapshot.business_combination_count,
6742 snapshot.ecl_model_count,
6743 snapshot.provision_count,
6744 snapshot.currency_translation_count
6745 );
6746 self.check_resources_with_log("post-accounting-standards")?;
6747
6748 Ok(snapshot)
6749 }
6750
6751 fn phase_manufacturing(
6753 &mut self,
6754 stats: &mut EnhancedGenerationStatistics,
6755 ) -> SynthResult<ManufacturingSnapshot> {
6756 if !self.phase_config.generate_manufacturing {
6757 debug!("Phase 18: Skipped (manufacturing generation disabled)");
6758 return Ok(ManufacturingSnapshot::default());
6759 }
6760 info!("Phase 18: Generating Manufacturing Data");
6761
6762 let seed = self.seed;
6763 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6764 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6765 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6766 let company_code = self
6767 .config
6768 .companies
6769 .first()
6770 .map(|c| c.code.as_str())
6771 .unwrap_or("1000");
6772
6773 let material_data: Vec<(String, String)> = self
6774 .master_data
6775 .materials
6776 .iter()
6777 .map(|m| (m.material_id.clone(), m.description.clone()))
6778 .collect();
6779
6780 if material_data.is_empty() {
6781 debug!("Phase 18: Skipped (no materials available)");
6782 return Ok(ManufacturingSnapshot::default());
6783 }
6784
6785 let mut snapshot = ManufacturingSnapshot::default();
6786
6787 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6789 let production_orders = prod_gen.generate(
6790 company_code,
6791 &material_data,
6792 start_date,
6793 end_date,
6794 &self.config.manufacturing.production_orders,
6795 &self.config.manufacturing.costing,
6796 &self.config.manufacturing.routing,
6797 );
6798 snapshot.production_order_count = production_orders.len();
6799
6800 let inspection_data: Vec<(String, String, String)> = production_orders
6802 .iter()
6803 .map(|po| {
6804 (
6805 po.order_id.clone(),
6806 po.material_id.clone(),
6807 po.material_description.clone(),
6808 )
6809 })
6810 .collect();
6811
6812 snapshot.production_orders = production_orders;
6813
6814 if !inspection_data.is_empty() {
6815 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6816 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6817 snapshot.quality_inspection_count = inspections.len();
6818 snapshot.quality_inspections = inspections;
6819 }
6820
6821 let storage_locations: Vec<(String, String)> = material_data
6823 .iter()
6824 .enumerate()
6825 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6826 .collect();
6827
6828 let employee_ids: Vec<String> = self
6829 .master_data
6830 .employees
6831 .iter()
6832 .map(|e| e.employee_id.clone())
6833 .collect();
6834 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
6835 .with_employee_pool(employee_ids);
6836 let mut cycle_count_total = 0usize;
6837 for month in 0..self.config.global.period_months {
6838 let count_date = start_date + chrono::Months::new(month);
6839 let items_per_count = storage_locations.len().clamp(10, 50);
6840 let cc = cc_gen.generate(
6841 company_code,
6842 &storage_locations,
6843 count_date,
6844 items_per_count,
6845 );
6846 snapshot.cycle_counts.push(cc);
6847 cycle_count_total += 1;
6848 }
6849 snapshot.cycle_count_count = cycle_count_total;
6850
6851 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
6853 let bom_components = bom_gen.generate(company_code, &material_data);
6854 snapshot.bom_component_count = bom_components.len();
6855 snapshot.bom_components = bom_components;
6856
6857 let currency = self
6859 .config
6860 .companies
6861 .first()
6862 .map(|c| c.currency.as_str())
6863 .unwrap_or("USD");
6864 let production_order_ids: Vec<String> = snapshot
6865 .production_orders
6866 .iter()
6867 .map(|po| po.order_id.clone())
6868 .collect();
6869 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
6870 let inventory_movements = inv_mov_gen.generate_with_production_orders(
6871 company_code,
6872 &material_data,
6873 start_date,
6874 end_date,
6875 2,
6876 currency,
6877 &production_order_ids,
6878 );
6879 snapshot.inventory_movement_count = inventory_movements.len();
6880 snapshot.inventory_movements = inventory_movements;
6881
6882 stats.production_order_count = snapshot.production_order_count;
6883 stats.quality_inspection_count = snapshot.quality_inspection_count;
6884 stats.cycle_count_count = snapshot.cycle_count_count;
6885 stats.bom_component_count = snapshot.bom_component_count;
6886 stats.inventory_movement_count = snapshot.inventory_movement_count;
6887
6888 info!(
6889 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6890 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6891 snapshot.bom_component_count, snapshot.inventory_movement_count
6892 );
6893 self.check_resources_with_log("post-manufacturing")?;
6894
6895 Ok(snapshot)
6896 }
6897
6898 fn phase_sales_kpi_budgets(
6900 &mut self,
6901 coa: &Arc<ChartOfAccounts>,
6902 financial_reporting: &FinancialReportingSnapshot,
6903 stats: &mut EnhancedGenerationStatistics,
6904 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6905 if !self.phase_config.generate_sales_kpi_budgets {
6906 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6907 return Ok(SalesKpiBudgetsSnapshot::default());
6908 }
6909 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6910
6911 let seed = self.seed;
6912 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6913 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6914 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6915 let company_code = self
6916 .config
6917 .companies
6918 .first()
6919 .map(|c| c.code.as_str())
6920 .unwrap_or("1000");
6921
6922 let mut snapshot = SalesKpiBudgetsSnapshot::default();
6923
6924 if self.config.sales_quotes.enabled {
6926 let customer_data: Vec<(String, String)> = self
6927 .master_data
6928 .customers
6929 .iter()
6930 .map(|c| (c.customer_id.clone(), c.name.clone()))
6931 .collect();
6932 let material_data: Vec<(String, String)> = self
6933 .master_data
6934 .materials
6935 .iter()
6936 .map(|m| (m.material_id.clone(), m.description.clone()))
6937 .collect();
6938
6939 if !customer_data.is_empty() && !material_data.is_empty() {
6940 let employee_ids: Vec<String> = self
6941 .master_data
6942 .employees
6943 .iter()
6944 .map(|e| e.employee_id.clone())
6945 .collect();
6946 let customer_ids: Vec<String> = self
6947 .master_data
6948 .customers
6949 .iter()
6950 .map(|c| c.customer_id.clone())
6951 .collect();
6952 let company_currency = self
6953 .config
6954 .companies
6955 .first()
6956 .map(|c| c.currency.as_str())
6957 .unwrap_or("USD");
6958
6959 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6960 .with_pools(employee_ids, customer_ids);
6961 let quotes = quote_gen.generate_with_currency(
6962 company_code,
6963 &customer_data,
6964 &material_data,
6965 start_date,
6966 end_date,
6967 &self.config.sales_quotes,
6968 company_currency,
6969 );
6970 snapshot.sales_quote_count = quotes.len();
6971 snapshot.sales_quotes = quotes;
6972 }
6973 }
6974
6975 if self.config.financial_reporting.management_kpis.enabled {
6977 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6978 let mut kpis = kpi_gen.generate(
6979 company_code,
6980 start_date,
6981 end_date,
6982 &self.config.financial_reporting.management_kpis,
6983 );
6984
6985 {
6987 use rust_decimal::Decimal;
6988
6989 if let Some(income_stmt) =
6990 financial_reporting.financial_statements.iter().find(|fs| {
6991 fs.statement_type == StatementType::IncomeStatement
6992 && fs.company_code == company_code
6993 })
6994 {
6995 let total_revenue: Decimal = income_stmt
6997 .line_items
6998 .iter()
6999 .filter(|li| li.section.contains("Revenue") && !li.is_total)
7000 .map(|li| li.amount)
7001 .sum();
7002 let total_cogs: Decimal = income_stmt
7003 .line_items
7004 .iter()
7005 .filter(|li| {
7006 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7007 && !li.is_total
7008 })
7009 .map(|li| li.amount.abs())
7010 .sum();
7011 let total_opex: Decimal = income_stmt
7012 .line_items
7013 .iter()
7014 .filter(|li| {
7015 li.section.contains("Expense")
7016 && !li.is_total
7017 && !li.section.contains("Cost")
7018 })
7019 .map(|li| li.amount.abs())
7020 .sum();
7021
7022 if total_revenue > Decimal::ZERO {
7023 let hundred = Decimal::from(100);
7024 let gross_margin_pct =
7025 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7026 let operating_income = total_revenue - total_cogs - total_opex;
7027 let op_margin_pct =
7028 (operating_income * hundred / total_revenue).round_dp(2);
7029
7030 for kpi in &mut kpis {
7032 if kpi.name == "Gross Margin" {
7033 kpi.value = gross_margin_pct;
7034 } else if kpi.name == "Operating Margin" {
7035 kpi.value = op_margin_pct;
7036 }
7037 }
7038 }
7039 }
7040
7041 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7043 fs.statement_type == StatementType::BalanceSheet
7044 && fs.company_code == company_code
7045 }) {
7046 let current_assets: Decimal = bs
7047 .line_items
7048 .iter()
7049 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7050 .map(|li| li.amount)
7051 .sum();
7052 let current_liabilities: Decimal = bs
7053 .line_items
7054 .iter()
7055 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7056 .map(|li| li.amount.abs())
7057 .sum();
7058
7059 if current_liabilities > Decimal::ZERO {
7060 let current_ratio = (current_assets / current_liabilities).round_dp(2);
7061 for kpi in &mut kpis {
7062 if kpi.name == "Current Ratio" {
7063 kpi.value = current_ratio;
7064 }
7065 }
7066 }
7067 }
7068 }
7069
7070 snapshot.kpi_count = kpis.len();
7071 snapshot.kpis = kpis;
7072 }
7073
7074 if self.config.financial_reporting.budgets.enabled {
7076 let account_data: Vec<(String, String)> = coa
7077 .accounts
7078 .iter()
7079 .map(|a| (a.account_number.clone(), a.short_description.clone()))
7080 .collect();
7081
7082 if !account_data.is_empty() {
7083 let fiscal_year = start_date.year() as u32;
7084 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7085 let budget = budget_gen.generate(
7086 company_code,
7087 fiscal_year,
7088 &account_data,
7089 &self.config.financial_reporting.budgets,
7090 );
7091 snapshot.budget_line_count = budget.line_items.len();
7092 snapshot.budgets.push(budget);
7093 }
7094 }
7095
7096 stats.sales_quote_count = snapshot.sales_quote_count;
7097 stats.kpi_count = snapshot.kpi_count;
7098 stats.budget_line_count = snapshot.budget_line_count;
7099
7100 info!(
7101 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7102 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7103 );
7104 self.check_resources_with_log("post-sales-kpi-budgets")?;
7105
7106 Ok(snapshot)
7107 }
7108
7109 fn compute_pre_tax_income(
7116 company_code: &str,
7117 journal_entries: &[JournalEntry],
7118 ) -> rust_decimal::Decimal {
7119 use datasynth_core::accounts::AccountCategory;
7120 use rust_decimal::Decimal;
7121
7122 let mut total_revenue = Decimal::ZERO;
7123 let mut total_expenses = Decimal::ZERO;
7124
7125 for je in journal_entries {
7126 if je.header.company_code != company_code {
7127 continue;
7128 }
7129 for line in &je.lines {
7130 let cat = AccountCategory::from_account(&line.gl_account);
7131 match cat {
7132 AccountCategory::Revenue => {
7133 total_revenue += line.credit_amount - line.debit_amount;
7134 }
7135 AccountCategory::Cogs
7136 | AccountCategory::OperatingExpense
7137 | AccountCategory::OtherIncomeExpense => {
7138 total_expenses += line.debit_amount - line.credit_amount;
7139 }
7140 _ => {}
7141 }
7142 }
7143 }
7144
7145 let pti = (total_revenue - total_expenses).round_dp(2);
7146 if pti == rust_decimal::Decimal::ZERO {
7147 rust_decimal::Decimal::from(1_000_000u32)
7150 } else {
7151 pti
7152 }
7153 }
7154
7155 fn phase_tax_generation(
7157 &mut self,
7158 document_flows: &DocumentFlowSnapshot,
7159 journal_entries: &[JournalEntry],
7160 stats: &mut EnhancedGenerationStatistics,
7161 ) -> SynthResult<TaxSnapshot> {
7162 if !self.phase_config.generate_tax {
7163 debug!("Phase 20: Skipped (tax generation disabled)");
7164 return Ok(TaxSnapshot::default());
7165 }
7166 info!("Phase 20: Generating Tax Data");
7167
7168 let seed = self.seed;
7169 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7170 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7171 let fiscal_year = start_date.year();
7172 let company_code = self
7173 .config
7174 .companies
7175 .first()
7176 .map(|c| c.code.as_str())
7177 .unwrap_or("1000");
7178
7179 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7180 seed + 370,
7181 self.config.tax.clone(),
7182 );
7183
7184 let pack = self.primary_pack().clone();
7185 let (jurisdictions, codes) =
7186 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7187
7188 let mut provisions = Vec::new();
7190 if self.config.tax.provisions.enabled {
7191 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7192 for company in &self.config.companies {
7193 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7194 let statutory_rate = rust_decimal::Decimal::new(
7195 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7196 2,
7197 );
7198 let provision = provision_gen.generate(
7199 &company.code,
7200 start_date,
7201 pre_tax_income,
7202 statutory_rate,
7203 );
7204 provisions.push(provision);
7205 }
7206 }
7207
7208 let mut tax_lines = Vec::new();
7210 if !codes.is_empty() {
7211 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7212 datasynth_generators::TaxLineGeneratorConfig::default(),
7213 codes.clone(),
7214 seed + 372,
7215 );
7216
7217 let buyer_country = self
7220 .config
7221 .companies
7222 .first()
7223 .map(|c| c.country.as_str())
7224 .unwrap_or("US");
7225 for vi in &document_flows.vendor_invoices {
7226 let lines = tax_line_gen.generate_for_document(
7227 datasynth_core::models::TaxableDocumentType::VendorInvoice,
7228 &vi.header.document_id,
7229 buyer_country, buyer_country,
7231 vi.payable_amount,
7232 vi.header.document_date,
7233 None,
7234 );
7235 tax_lines.extend(lines);
7236 }
7237
7238 for ci in &document_flows.customer_invoices {
7240 let lines = tax_line_gen.generate_for_document(
7241 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7242 &ci.header.document_id,
7243 buyer_country, buyer_country,
7245 ci.total_gross_amount,
7246 ci.header.document_date,
7247 None,
7248 );
7249 tax_lines.extend(lines);
7250 }
7251 }
7252
7253 let deferred_tax = {
7255 let companies: Vec<(&str, &str)> = self
7256 .config
7257 .companies
7258 .iter()
7259 .map(|c| (c.code.as_str(), c.country.as_str()))
7260 .collect();
7261 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7262 deferred_gen.generate(&companies, start_date, journal_entries)
7263 };
7264
7265 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7268 std::collections::HashMap::new();
7269 for vi in &document_flows.vendor_invoices {
7270 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7271 }
7272 for ci in &document_flows.customer_invoices {
7273 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7274 }
7275
7276 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7278 let tax_posting_journal_entries = if !tax_lines.is_empty() {
7279 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7280 &tax_lines,
7281 company_code,
7282 &doc_dates,
7283 end_date,
7284 );
7285 debug!("Generated {} tax posting JEs", jes.len());
7286 jes
7287 } else {
7288 Vec::new()
7289 };
7290
7291 let snapshot = TaxSnapshot {
7292 jurisdiction_count: jurisdictions.len(),
7293 code_count: codes.len(),
7294 jurisdictions,
7295 codes,
7296 tax_provisions: provisions,
7297 tax_lines,
7298 tax_returns: Vec::new(),
7299 withholding_records: Vec::new(),
7300 tax_anomaly_labels: Vec::new(),
7301 deferred_tax,
7302 tax_posting_journal_entries,
7303 };
7304
7305 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7306 stats.tax_code_count = snapshot.code_count;
7307 stats.tax_provision_count = snapshot.tax_provisions.len();
7308 stats.tax_line_count = snapshot.tax_lines.len();
7309
7310 info!(
7311 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7312 snapshot.jurisdiction_count,
7313 snapshot.code_count,
7314 snapshot.tax_provisions.len(),
7315 snapshot.deferred_tax.temporary_differences.len(),
7316 snapshot.deferred_tax.journal_entries.len(),
7317 snapshot.tax_posting_journal_entries.len(),
7318 );
7319 self.check_resources_with_log("post-tax")?;
7320
7321 Ok(snapshot)
7322 }
7323
7324 fn phase_esg_generation(
7326 &mut self,
7327 document_flows: &DocumentFlowSnapshot,
7328 manufacturing: &ManufacturingSnapshot,
7329 stats: &mut EnhancedGenerationStatistics,
7330 ) -> SynthResult<EsgSnapshot> {
7331 if !self.phase_config.generate_esg {
7332 debug!("Phase 21: Skipped (ESG generation disabled)");
7333 return Ok(EsgSnapshot::default());
7334 }
7335 let degradation = self.check_resources()?;
7336 if degradation >= DegradationLevel::Reduced {
7337 debug!(
7338 "Phase skipped due to resource pressure (degradation: {:?})",
7339 degradation
7340 );
7341 return Ok(EsgSnapshot::default());
7342 }
7343 info!("Phase 21: Generating ESG Data");
7344
7345 let seed = self.seed;
7346 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7347 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7348 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7349 let entity_id = self
7350 .config
7351 .companies
7352 .first()
7353 .map(|c| c.code.as_str())
7354 .unwrap_or("1000");
7355
7356 let esg_cfg = &self.config.esg;
7357 let mut snapshot = EsgSnapshot::default();
7358
7359 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7361 esg_cfg.environmental.energy.clone(),
7362 seed + 80,
7363 );
7364 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7365
7366 let facility_count = esg_cfg.environmental.energy.facility_count;
7368 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7369 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7370
7371 let mut waste_gen = datasynth_generators::WasteGenerator::new(
7373 seed + 82,
7374 esg_cfg.environmental.waste.diversion_target,
7375 facility_count,
7376 );
7377 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7378
7379 let mut emission_gen =
7381 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7382
7383 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7385 .iter()
7386 .map(|e| datasynth_generators::EnergyInput {
7387 facility_id: e.facility_id.clone(),
7388 energy_type: match e.energy_source {
7389 EnergySourceType::NaturalGas => {
7390 datasynth_generators::EnergyInputType::NaturalGas
7391 }
7392 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7393 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7394 _ => datasynth_generators::EnergyInputType::Electricity,
7395 },
7396 consumption_kwh: e.consumption_kwh,
7397 period: e.period,
7398 })
7399 .collect();
7400
7401 if !manufacturing.production_orders.is_empty() {
7403 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7404 &manufacturing.production_orders,
7405 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
7408 if !mfg_energy.is_empty() {
7409 info!(
7410 "ESG: {} energy inputs derived from {} production orders",
7411 mfg_energy.len(),
7412 manufacturing.production_orders.len(),
7413 );
7414 energy_inputs.extend(mfg_energy);
7415 }
7416 }
7417
7418 let mut emissions = Vec::new();
7419 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7420 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7421
7422 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7424 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7425 for payment in &document_flows.payments {
7426 if payment.is_vendor {
7427 *totals
7428 .entry(payment.business_partner_id.clone())
7429 .or_default() += payment.amount;
7430 }
7431 }
7432 totals
7433 };
7434 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7435 .master_data
7436 .vendors
7437 .iter()
7438 .map(|v| {
7439 let spend = vendor_payment_totals
7440 .get(&v.vendor_id)
7441 .copied()
7442 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7443 datasynth_generators::VendorSpendInput {
7444 vendor_id: v.vendor_id.clone(),
7445 category: format!("{:?}", v.vendor_type).to_lowercase(),
7446 spend,
7447 country: v.country.clone(),
7448 }
7449 })
7450 .collect();
7451 if !vendor_spend.is_empty() {
7452 emissions.extend(emission_gen.generate_scope3_purchased_goods(
7453 entity_id,
7454 &vendor_spend,
7455 start_date,
7456 end_date,
7457 ));
7458 }
7459
7460 let headcount = self.master_data.employees.len() as u32;
7462 if headcount > 0 {
7463 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7464 emissions.extend(emission_gen.generate_scope3_business_travel(
7465 entity_id,
7466 travel_spend,
7467 start_date,
7468 ));
7469 emissions
7470 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7471 }
7472
7473 snapshot.emission_count = emissions.len();
7474 snapshot.emissions = emissions;
7475 snapshot.energy = energy_records;
7476
7477 let mut workforce_gen =
7479 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7480 let total_headcount = headcount.max(100);
7481 snapshot.diversity =
7482 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7483 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7484
7485 if !self.master_data.employees.is_empty() {
7487 let hr_diversity = workforce_gen.generate_diversity_from_employees(
7488 entity_id,
7489 &self.master_data.employees,
7490 end_date,
7491 );
7492 if !hr_diversity.is_empty() {
7493 info!(
7494 "ESG: {} diversity metrics derived from {} actual employees",
7495 hr_diversity.len(),
7496 self.master_data.employees.len(),
7497 );
7498 snapshot.diversity.extend(hr_diversity);
7499 }
7500 }
7501
7502 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7503 entity_id,
7504 facility_count,
7505 start_date,
7506 end_date,
7507 );
7508
7509 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
7512 entity_id,
7513 &snapshot.safety_incidents,
7514 total_hours,
7515 start_date,
7516 );
7517 snapshot.safety_metrics = vec![safety_metric];
7518
7519 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7521 seed + 85,
7522 esg_cfg.governance.board_size,
7523 esg_cfg.governance.independence_target,
7524 );
7525 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7526
7527 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7529 esg_cfg.supply_chain_esg.clone(),
7530 seed + 86,
7531 );
7532 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7533 .master_data
7534 .vendors
7535 .iter()
7536 .map(|v| datasynth_generators::VendorInput {
7537 vendor_id: v.vendor_id.clone(),
7538 country: v.country.clone(),
7539 industry: format!("{:?}", v.vendor_type).to_lowercase(),
7540 quality_score: None,
7541 })
7542 .collect();
7543 snapshot.supplier_assessments =
7544 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7545
7546 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7548 seed + 87,
7549 esg_cfg.reporting.clone(),
7550 esg_cfg.climate_scenarios.clone(),
7551 );
7552 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7553 snapshot.disclosures = disclosure_gen.generate_disclosures(
7554 entity_id,
7555 &snapshot.materiality,
7556 start_date,
7557 end_date,
7558 );
7559 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7560 snapshot.disclosure_count = snapshot.disclosures.len();
7561
7562 if esg_cfg.anomaly_rate > 0.0 {
7564 let mut anomaly_injector =
7565 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7566 let mut labels = Vec::new();
7567 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7568 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7569 labels.extend(
7570 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7571 );
7572 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7573 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7574 snapshot.anomaly_labels = labels;
7575 }
7576
7577 stats.esg_emission_count = snapshot.emission_count;
7578 stats.esg_disclosure_count = snapshot.disclosure_count;
7579
7580 info!(
7581 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7582 snapshot.emission_count,
7583 snapshot.disclosure_count,
7584 snapshot.supplier_assessments.len()
7585 );
7586 self.check_resources_with_log("post-esg")?;
7587
7588 Ok(snapshot)
7589 }
7590
7591 fn phase_treasury_data(
7593 &mut self,
7594 document_flows: &DocumentFlowSnapshot,
7595 subledger: &SubledgerSnapshot,
7596 intercompany: &IntercompanySnapshot,
7597 stats: &mut EnhancedGenerationStatistics,
7598 ) -> SynthResult<TreasurySnapshot> {
7599 if !self.phase_config.generate_treasury {
7600 debug!("Phase 22: Skipped (treasury generation disabled)");
7601 return Ok(TreasurySnapshot::default());
7602 }
7603 let degradation = self.check_resources()?;
7604 if degradation >= DegradationLevel::Reduced {
7605 debug!(
7606 "Phase skipped due to resource pressure (degradation: {:?})",
7607 degradation
7608 );
7609 return Ok(TreasurySnapshot::default());
7610 }
7611 info!("Phase 22: Generating Treasury Data");
7612
7613 let seed = self.seed;
7614 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7615 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7616 let currency = self
7617 .config
7618 .companies
7619 .first()
7620 .map(|c| c.currency.as_str())
7621 .unwrap_or("USD");
7622 let entity_id = self
7623 .config
7624 .companies
7625 .first()
7626 .map(|c| c.code.as_str())
7627 .unwrap_or("1000");
7628
7629 let mut snapshot = TreasurySnapshot::default();
7630
7631 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7633 self.config.treasury.debt.clone(),
7634 seed + 90,
7635 );
7636 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7637
7638 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7640 self.config.treasury.hedging.clone(),
7641 seed + 91,
7642 );
7643 for debt in &snapshot.debt_instruments {
7644 if debt.rate_type == InterestRateType::Variable {
7645 let swap = hedge_gen.generate_ir_swap(
7646 currency,
7647 debt.principal,
7648 debt.origination_date,
7649 debt.maturity_date,
7650 );
7651 snapshot.hedging_instruments.push(swap);
7652 }
7653 }
7654
7655 {
7658 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7659 for payment in &document_flows.payments {
7660 if payment.currency != currency {
7661 let entry = fx_map
7662 .entry(payment.currency.clone())
7663 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7664 entry.0 += payment.amount;
7665 if payment.header.document_date > entry.1 {
7667 entry.1 = payment.header.document_date;
7668 }
7669 }
7670 }
7671 if !fx_map.is_empty() {
7672 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7673 .into_iter()
7674 .map(|(foreign_ccy, (net_amount, settlement_date))| {
7675 datasynth_generators::treasury::FxExposure {
7676 currency_pair: format!("{foreign_ccy}/{currency}"),
7677 foreign_currency: foreign_ccy,
7678 net_amount,
7679 settlement_date,
7680 description: "AP payment FX exposure".to_string(),
7681 }
7682 })
7683 .collect();
7684 let (fx_instruments, fx_relationships) =
7685 hedge_gen.generate(start_date, &fx_exposures);
7686 snapshot.hedging_instruments.extend(fx_instruments);
7687 snapshot.hedge_relationships.extend(fx_relationships);
7688 }
7689 }
7690
7691 if self.config.treasury.anomaly_rate > 0.0 {
7693 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7694 seed + 92,
7695 self.config.treasury.anomaly_rate,
7696 );
7697 let mut labels = Vec::new();
7698 labels.extend(
7699 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7700 );
7701 snapshot.treasury_anomaly_labels = labels;
7702 }
7703
7704 if self.config.treasury.cash_positioning.enabled {
7706 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7707
7708 for payment in &document_flows.payments {
7710 cash_flows.push(datasynth_generators::treasury::CashFlow {
7711 date: payment.header.document_date,
7712 account_id: format!("{entity_id}-MAIN"),
7713 amount: payment.amount,
7714 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7715 });
7716 }
7717
7718 for chain in &document_flows.o2c_chains {
7720 if let Some(ref receipt) = chain.customer_receipt {
7721 cash_flows.push(datasynth_generators::treasury::CashFlow {
7722 date: receipt.header.document_date,
7723 account_id: format!("{entity_id}-MAIN"),
7724 amount: receipt.amount,
7725 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7726 });
7727 }
7728 for receipt in &chain.remainder_receipts {
7730 cash_flows.push(datasynth_generators::treasury::CashFlow {
7731 date: receipt.header.document_date,
7732 account_id: format!("{entity_id}-MAIN"),
7733 amount: receipt.amount,
7734 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7735 });
7736 }
7737 }
7738
7739 if !cash_flows.is_empty() {
7740 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7741 self.config.treasury.cash_positioning.clone(),
7742 seed + 93,
7743 );
7744 let account_id = format!("{entity_id}-MAIN");
7745 snapshot.cash_positions = cash_gen.generate(
7746 entity_id,
7747 &account_id,
7748 currency,
7749 &cash_flows,
7750 start_date,
7751 start_date + chrono::Months::new(self.config.global.period_months),
7752 rust_decimal::Decimal::new(1_000_000, 0), );
7754 }
7755 }
7756
7757 if self.config.treasury.cash_forecasting.enabled {
7759 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7760
7761 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7763 .ar_invoices
7764 .iter()
7765 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7766 .map(|inv| {
7767 let days_past_due = if inv.due_date < end_date {
7768 (end_date - inv.due_date).num_days().max(0) as u32
7769 } else {
7770 0
7771 };
7772 datasynth_generators::treasury::ArAgingItem {
7773 expected_date: inv.due_date,
7774 amount: inv.amount_remaining,
7775 days_past_due,
7776 document_id: inv.invoice_number.clone(),
7777 }
7778 })
7779 .collect();
7780
7781 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7783 .ap_invoices
7784 .iter()
7785 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7786 .map(|inv| datasynth_generators::treasury::ApAgingItem {
7787 payment_date: inv.due_date,
7788 amount: inv.amount_remaining,
7789 document_id: inv.invoice_number.clone(),
7790 })
7791 .collect();
7792
7793 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7794 self.config.treasury.cash_forecasting.clone(),
7795 seed + 94,
7796 );
7797 let forecast = forecast_gen.generate(
7798 entity_id,
7799 currency,
7800 end_date,
7801 &ar_items,
7802 &ap_items,
7803 &[], );
7805 snapshot.cash_forecasts.push(forecast);
7806 }
7807
7808 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7810 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7811 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7812 self.config.treasury.cash_pooling.clone(),
7813 seed + 95,
7814 );
7815
7816 let account_ids: Vec<String> = snapshot
7818 .cash_positions
7819 .iter()
7820 .map(|cp| cp.bank_account_id.clone())
7821 .collect::<std::collections::HashSet<_>>()
7822 .into_iter()
7823 .collect();
7824
7825 if let Some(pool) =
7826 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7827 {
7828 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7830 for cp in &snapshot.cash_positions {
7831 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7832 }
7833
7834 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7835 latest_balances
7836 .into_iter()
7837 .filter(|(id, _)| pool.participant_accounts.contains(id))
7838 .map(
7839 |(id, balance)| datasynth_generators::treasury::AccountBalance {
7840 account_id: id,
7841 balance,
7842 },
7843 )
7844 .collect();
7845
7846 let sweeps =
7847 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7848 snapshot.cash_pool_sweeps = sweeps;
7849 snapshot.cash_pools.push(pool);
7850 }
7851 }
7852
7853 if self.config.treasury.bank_guarantees.enabled {
7855 let vendor_names: Vec<String> = self
7856 .master_data
7857 .vendors
7858 .iter()
7859 .map(|v| v.name.clone())
7860 .collect();
7861 if !vendor_names.is_empty() {
7862 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7863 self.config.treasury.bank_guarantees.clone(),
7864 seed + 96,
7865 );
7866 snapshot.bank_guarantees =
7867 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7868 }
7869 }
7870
7871 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7873 let entity_ids: Vec<String> = self
7874 .config
7875 .companies
7876 .iter()
7877 .map(|c| c.code.clone())
7878 .collect();
7879 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7880 .matched_pairs
7881 .iter()
7882 .map(|mp| {
7883 (
7884 mp.seller_company.clone(),
7885 mp.buyer_company.clone(),
7886 mp.amount,
7887 )
7888 })
7889 .collect();
7890 if entity_ids.len() >= 2 {
7891 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7892 self.config.treasury.netting.clone(),
7893 seed + 97,
7894 );
7895 snapshot.netting_runs = netting_gen.generate(
7896 &entity_ids,
7897 currency,
7898 start_date,
7899 self.config.global.period_months,
7900 &ic_amounts,
7901 );
7902 }
7903 }
7904
7905 {
7907 use datasynth_generators::treasury::TreasuryAccounting;
7908
7909 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7910 let mut treasury_jes = Vec::new();
7911
7912 if !snapshot.debt_instruments.is_empty() {
7914 let debt_jes =
7915 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
7916 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
7917 treasury_jes.extend(debt_jes);
7918 }
7919
7920 if !snapshot.hedging_instruments.is_empty() {
7922 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
7923 &snapshot.hedging_instruments,
7924 &snapshot.hedge_relationships,
7925 end_date,
7926 entity_id,
7927 );
7928 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
7929 treasury_jes.extend(hedge_jes);
7930 }
7931
7932 if !snapshot.cash_pool_sweeps.is_empty() {
7934 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
7935 &snapshot.cash_pool_sweeps,
7936 entity_id,
7937 );
7938 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
7939 treasury_jes.extend(sweep_jes);
7940 }
7941
7942 if !treasury_jes.is_empty() {
7943 debug!("Total treasury journal entries: {}", treasury_jes.len());
7944 }
7945 snapshot.journal_entries = treasury_jes;
7946 }
7947
7948 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7949 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7950 stats.cash_position_count = snapshot.cash_positions.len();
7951 stats.cash_forecast_count = snapshot.cash_forecasts.len();
7952 stats.cash_pool_count = snapshot.cash_pools.len();
7953
7954 info!(
7955 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
7956 snapshot.debt_instruments.len(),
7957 snapshot.hedging_instruments.len(),
7958 snapshot.cash_positions.len(),
7959 snapshot.cash_forecasts.len(),
7960 snapshot.cash_pools.len(),
7961 snapshot.bank_guarantees.len(),
7962 snapshot.netting_runs.len(),
7963 snapshot.journal_entries.len(),
7964 );
7965 self.check_resources_with_log("post-treasury")?;
7966
7967 Ok(snapshot)
7968 }
7969
7970 fn phase_project_accounting(
7972 &mut self,
7973 document_flows: &DocumentFlowSnapshot,
7974 hr: &HrSnapshot,
7975 stats: &mut EnhancedGenerationStatistics,
7976 ) -> SynthResult<ProjectAccountingSnapshot> {
7977 if !self.phase_config.generate_project_accounting {
7978 debug!("Phase 23: Skipped (project accounting disabled)");
7979 return Ok(ProjectAccountingSnapshot::default());
7980 }
7981 let degradation = self.check_resources()?;
7982 if degradation >= DegradationLevel::Reduced {
7983 debug!(
7984 "Phase skipped due to resource pressure (degradation: {:?})",
7985 degradation
7986 );
7987 return Ok(ProjectAccountingSnapshot::default());
7988 }
7989 info!("Phase 23: Generating Project Accounting Data");
7990
7991 let seed = self.seed;
7992 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7993 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7994 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7995 let company_code = self
7996 .config
7997 .companies
7998 .first()
7999 .map(|c| c.code.as_str())
8000 .unwrap_or("1000");
8001
8002 let mut snapshot = ProjectAccountingSnapshot::default();
8003
8004 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8006 self.config.project_accounting.clone(),
8007 seed + 95,
8008 );
8009 let pool = project_gen.generate(company_code, start_date, end_date);
8010 snapshot.projects = pool.projects.clone();
8011
8012 {
8014 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8015 Vec::new();
8016
8017 for te in &hr.time_entries {
8019 let total_hours = te.hours_regular + te.hours_overtime;
8020 if total_hours > 0.0 {
8021 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8022 id: te.entry_id.clone(),
8023 entity_id: company_code.to_string(),
8024 date: te.date,
8025 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8026 .unwrap_or(rust_decimal::Decimal::ZERO),
8027 source_type: CostSourceType::TimeEntry,
8028 hours: Some(
8029 rust_decimal::Decimal::from_f64_retain(total_hours)
8030 .unwrap_or(rust_decimal::Decimal::ZERO),
8031 ),
8032 });
8033 }
8034 }
8035
8036 for er in &hr.expense_reports {
8038 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8039 id: er.report_id.clone(),
8040 entity_id: company_code.to_string(),
8041 date: er.submission_date,
8042 amount: er.total_amount,
8043 source_type: CostSourceType::ExpenseReport,
8044 hours: None,
8045 });
8046 }
8047
8048 for po in &document_flows.purchase_orders {
8050 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8051 id: po.header.document_id.clone(),
8052 entity_id: company_code.to_string(),
8053 date: po.header.document_date,
8054 amount: po.total_net_amount,
8055 source_type: CostSourceType::PurchaseOrder,
8056 hours: None,
8057 });
8058 }
8059
8060 for vi in &document_flows.vendor_invoices {
8062 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8063 id: vi.header.document_id.clone(),
8064 entity_id: company_code.to_string(),
8065 date: vi.header.document_date,
8066 amount: vi.payable_amount,
8067 source_type: CostSourceType::VendorInvoice,
8068 hours: None,
8069 });
8070 }
8071
8072 if !source_docs.is_empty() && !pool.projects.is_empty() {
8073 let mut cost_gen =
8074 datasynth_generators::project_accounting::ProjectCostGenerator::new(
8075 self.config.project_accounting.cost_allocation.clone(),
8076 seed + 99,
8077 );
8078 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8079 }
8080 }
8081
8082 if self.config.project_accounting.change_orders.enabled {
8084 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8085 self.config.project_accounting.change_orders.clone(),
8086 seed + 96,
8087 );
8088 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8089 }
8090
8091 if self.config.project_accounting.milestones.enabled {
8093 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8094 self.config.project_accounting.milestones.clone(),
8095 seed + 97,
8096 );
8097 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8098 }
8099
8100 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8102 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8103 self.config.project_accounting.earned_value.clone(),
8104 seed + 98,
8105 );
8106 snapshot.earned_value_metrics =
8107 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8108 }
8109
8110 if self.config.project_accounting.revenue_recognition.enabled
8112 && !snapshot.projects.is_empty()
8113 && !snapshot.cost_lines.is_empty()
8114 {
8115 use datasynth_generators::project_accounting::RevenueGenerator;
8116 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8117 let avg_contract_value =
8118 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8119 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8120
8121 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8124 snapshot
8125 .projects
8126 .iter()
8127 .filter(|p| {
8128 matches!(
8129 p.project_type,
8130 datasynth_core::models::ProjectType::Customer
8131 )
8132 })
8133 .map(|p| {
8134 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8135 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8136 } else {
8138 avg_contract_value
8139 };
8140 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8142 })
8143 .collect();
8144
8145 if !contract_values.is_empty() {
8146 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8147 snapshot.revenue_records = rev_gen.generate(
8148 &snapshot.projects,
8149 &snapshot.cost_lines,
8150 &contract_values,
8151 start_date,
8152 end_date,
8153 );
8154 debug!(
8155 "Generated {} revenue recognition records for {} customer projects",
8156 snapshot.revenue_records.len(),
8157 contract_values.len()
8158 );
8159 }
8160 }
8161
8162 stats.project_count = snapshot.projects.len();
8163 stats.project_change_order_count = snapshot.change_orders.len();
8164 stats.project_cost_line_count = snapshot.cost_lines.len();
8165
8166 info!(
8167 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8168 snapshot.projects.len(),
8169 snapshot.change_orders.len(),
8170 snapshot.milestones.len(),
8171 snapshot.earned_value_metrics.len()
8172 );
8173 self.check_resources_with_log("post-project-accounting")?;
8174
8175 Ok(snapshot)
8176 }
8177
8178 fn phase_evolution_events(
8180 &mut self,
8181 stats: &mut EnhancedGenerationStatistics,
8182 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8183 if !self.phase_config.generate_evolution_events {
8184 debug!("Phase 24: Skipped (evolution events disabled)");
8185 return Ok((Vec::new(), Vec::new()));
8186 }
8187 info!("Phase 24: Generating Process Evolution + Organizational Events");
8188
8189 let seed = self.seed;
8190 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8191 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8192 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8193
8194 let mut proc_gen =
8196 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8197 seed + 100,
8198 );
8199 let process_events = proc_gen.generate_events(start_date, end_date);
8200
8201 let company_codes: Vec<String> = self
8203 .config
8204 .companies
8205 .iter()
8206 .map(|c| c.code.clone())
8207 .collect();
8208 let mut org_gen =
8209 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8210 seed + 101,
8211 );
8212 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8213
8214 stats.process_evolution_event_count = process_events.len();
8215 stats.organizational_event_count = org_events.len();
8216
8217 info!(
8218 "Evolution events generated: {} process evolution, {} organizational",
8219 process_events.len(),
8220 org_events.len()
8221 );
8222 self.check_resources_with_log("post-evolution-events")?;
8223
8224 Ok((process_events, org_events))
8225 }
8226
8227 fn phase_disruption_events(
8230 &self,
8231 stats: &mut EnhancedGenerationStatistics,
8232 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8233 if !self.config.organizational_events.enabled {
8234 debug!("Phase 24b: Skipped (organizational events disabled)");
8235 return Ok(Vec::new());
8236 }
8237 info!("Phase 24b: Generating Disruption Events");
8238
8239 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8240 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8241 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8242
8243 let company_codes: Vec<String> = self
8244 .config
8245 .companies
8246 .iter()
8247 .map(|c| c.code.clone())
8248 .collect();
8249
8250 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8251 let events = gen.generate(start_date, end_date, &company_codes);
8252
8253 stats.disruption_event_count = events.len();
8254 info!("Disruption events generated: {} events", events.len());
8255 self.check_resources_with_log("post-disruption-events")?;
8256
8257 Ok(events)
8258 }
8259
8260 fn phase_counterfactuals(
8267 &self,
8268 journal_entries: &[JournalEntry],
8269 stats: &mut EnhancedGenerationStatistics,
8270 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8271 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8272 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8273 return Ok(Vec::new());
8274 }
8275 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8276
8277 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8278
8279 let mut gen = CounterfactualGenerator::new(self.seed + 110);
8280
8281 let specs = [
8283 CounterfactualSpec::ScaleAmount { factor: 2.5 },
8284 CounterfactualSpec::ShiftDate { days: -14 },
8285 CounterfactualSpec::SelfApprove,
8286 CounterfactualSpec::SplitTransaction { split_count: 3 },
8287 ];
8288
8289 let pairs: Vec<_> = journal_entries
8290 .iter()
8291 .enumerate()
8292 .map(|(i, je)| {
8293 let spec = &specs[i % specs.len()];
8294 gen.generate(je, spec)
8295 })
8296 .collect();
8297
8298 stats.counterfactual_pair_count = pairs.len();
8299 info!(
8300 "Counterfactual pairs generated: {} pairs from {} journal entries",
8301 pairs.len(),
8302 journal_entries.len()
8303 );
8304 self.check_resources_with_log("post-counterfactuals")?;
8305
8306 Ok(pairs)
8307 }
8308
8309 fn phase_red_flags(
8316 &self,
8317 anomaly_labels: &AnomalyLabels,
8318 document_flows: &DocumentFlowSnapshot,
8319 stats: &mut EnhancedGenerationStatistics,
8320 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8321 if !self.config.fraud.enabled {
8322 debug!("Phase 26: Skipped (fraud generation disabled)");
8323 return Ok(Vec::new());
8324 }
8325 info!("Phase 26: Generating Fraud Red-Flag Indicators");
8326
8327 use datasynth_generators::fraud::RedFlagGenerator;
8328
8329 let generator = RedFlagGenerator::new();
8330 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8331
8332 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8334 .labels
8335 .iter()
8336 .filter(|label| label.anomaly_type.is_intentional())
8337 .map(|label| label.document_id.as_str())
8338 .collect();
8339
8340 let mut flags = Vec::new();
8341
8342 for chain in &document_flows.p2p_chains {
8344 let doc_id = &chain.purchase_order.header.document_id;
8345 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8346 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8347 }
8348
8349 for chain in &document_flows.o2c_chains {
8351 let doc_id = &chain.sales_order.header.document_id;
8352 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8353 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8354 }
8355
8356 stats.red_flag_count = flags.len();
8357 info!(
8358 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8359 flags.len(),
8360 document_flows.p2p_chains.len(),
8361 document_flows.o2c_chains.len(),
8362 fraud_doc_ids.len()
8363 );
8364 self.check_resources_with_log("post-red-flags")?;
8365
8366 Ok(flags)
8367 }
8368
8369 fn phase_collusion_rings(
8375 &mut self,
8376 stats: &mut EnhancedGenerationStatistics,
8377 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8378 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8379 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8380 return Ok(Vec::new());
8381 }
8382 info!("Phase 26b: Generating Collusion Rings");
8383
8384 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8385 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8386 let months = self.config.global.period_months;
8387
8388 let employee_ids: Vec<String> = self
8389 .master_data
8390 .employees
8391 .iter()
8392 .map(|e| e.employee_id.clone())
8393 .collect();
8394 let vendor_ids: Vec<String> = self
8395 .master_data
8396 .vendors
8397 .iter()
8398 .map(|v| v.vendor_id.clone())
8399 .collect();
8400
8401 let mut generator =
8402 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8403 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8404
8405 stats.collusion_ring_count = rings.len();
8406 info!(
8407 "Collusion rings generated: {} rings, total members: {}",
8408 rings.len(),
8409 rings
8410 .iter()
8411 .map(datasynth_generators::fraud::CollusionRing::size)
8412 .sum::<usize>()
8413 );
8414 self.check_resources_with_log("post-collusion-rings")?;
8415
8416 Ok(rings)
8417 }
8418
8419 fn phase_temporal_attributes(
8424 &mut self,
8425 stats: &mut EnhancedGenerationStatistics,
8426 ) -> SynthResult<
8427 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8428 > {
8429 if !self.config.temporal_attributes.enabled {
8430 debug!("Phase 27: Skipped (temporal attributes disabled)");
8431 return Ok(Vec::new());
8432 }
8433 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8434
8435 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8436 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8437
8438 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8442 || self.config.temporal_attributes.enabled;
8443 let temporal_config = {
8444 let ta = &self.config.temporal_attributes;
8445 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8446 .enabled(ta.enabled)
8447 .closed_probability(ta.valid_time.closed_probability)
8448 .avg_validity_days(ta.valid_time.avg_validity_days)
8449 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8450 .with_version_chains(if generate_version_chains {
8451 ta.avg_versions_per_entity
8452 } else {
8453 1.0
8454 })
8455 .build()
8456 };
8457 let temporal_config = if self
8459 .config
8460 .temporal_attributes
8461 .transaction_time
8462 .allow_backdating
8463 {
8464 let mut c = temporal_config;
8465 c.transaction_time.allow_backdating = true;
8466 c.transaction_time.backdating_probability = self
8467 .config
8468 .temporal_attributes
8469 .transaction_time
8470 .backdating_probability;
8471 c.transaction_time.max_backdate_days = self
8472 .config
8473 .temporal_attributes
8474 .transaction_time
8475 .max_backdate_days;
8476 c
8477 } else {
8478 temporal_config
8479 };
8480 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8481 temporal_config,
8482 self.seed + 130,
8483 start_date,
8484 );
8485
8486 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8487 self.seed + 130,
8488 datasynth_core::GeneratorType::Vendor,
8489 );
8490
8491 let chains: Vec<_> = self
8492 .master_data
8493 .vendors
8494 .iter()
8495 .map(|vendor| {
8496 let id = uuid_factory.next();
8497 gen.generate_version_chain(vendor.clone(), id)
8498 })
8499 .collect();
8500
8501 stats.temporal_version_chain_count = chains.len();
8502 info!("Temporal version chains generated: {} chains", chains.len());
8503 self.check_resources_with_log("post-temporal-attributes")?;
8504
8505 Ok(chains)
8506 }
8507
8508 fn phase_entity_relationships(
8518 &self,
8519 journal_entries: &[JournalEntry],
8520 document_flows: &DocumentFlowSnapshot,
8521 stats: &mut EnhancedGenerationStatistics,
8522 ) -> SynthResult<(
8523 Option<datasynth_core::models::EntityGraph>,
8524 Vec<datasynth_core::models::CrossProcessLink>,
8525 )> {
8526 use datasynth_generators::relationships::{
8527 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8528 TransactionSummary,
8529 };
8530
8531 let rs_enabled = self.config.relationship_strength.enabled;
8532 let cpl_enabled = self.config.cross_process_links.enabled
8533 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8534
8535 if !rs_enabled && !cpl_enabled {
8536 debug!(
8537 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8538 );
8539 return Ok((None, Vec::new()));
8540 }
8541
8542 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8543
8544 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8545 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8546
8547 let company_code = self
8548 .config
8549 .companies
8550 .first()
8551 .map(|c| c.code.as_str())
8552 .unwrap_or("1000");
8553
8554 let gen_config = EntityGraphConfig {
8556 enabled: rs_enabled,
8557 cross_process: datasynth_generators::relationships::CrossProcessConfig {
8558 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8559 enable_return_flows: false,
8560 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8561 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8562 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8564 1.0
8565 } else {
8566 0.30
8567 },
8568 ..Default::default()
8569 },
8570 strength_config: datasynth_generators::relationships::StrengthConfig {
8571 transaction_volume_weight: self
8572 .config
8573 .relationship_strength
8574 .calculation
8575 .transaction_volume_weight,
8576 transaction_count_weight: self
8577 .config
8578 .relationship_strength
8579 .calculation
8580 .transaction_count_weight,
8581 duration_weight: self
8582 .config
8583 .relationship_strength
8584 .calculation
8585 .relationship_duration_weight,
8586 recency_weight: self.config.relationship_strength.calculation.recency_weight,
8587 mutual_connections_weight: self
8588 .config
8589 .relationship_strength
8590 .calculation
8591 .mutual_connections_weight,
8592 recency_half_life_days: self
8593 .config
8594 .relationship_strength
8595 .calculation
8596 .recency_half_life_days,
8597 },
8598 ..Default::default()
8599 };
8600
8601 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8602
8603 let entity_graph = if rs_enabled {
8605 let vendor_summaries: Vec<EntitySummary> = self
8607 .master_data
8608 .vendors
8609 .iter()
8610 .map(|v| {
8611 EntitySummary::new(
8612 &v.vendor_id,
8613 &v.name,
8614 datasynth_core::models::GraphEntityType::Vendor,
8615 start_date,
8616 )
8617 })
8618 .collect();
8619
8620 let customer_summaries: Vec<EntitySummary> = self
8621 .master_data
8622 .customers
8623 .iter()
8624 .map(|c| {
8625 EntitySummary::new(
8626 &c.customer_id,
8627 &c.name,
8628 datasynth_core::models::GraphEntityType::Customer,
8629 start_date,
8630 )
8631 })
8632 .collect();
8633
8634 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8639 std::collections::HashMap::new();
8640
8641 for je in journal_entries {
8642 let cc = je.header.company_code.clone();
8643 let posting_date = je.header.posting_date;
8644 for line in &je.lines {
8645 if let Some(ref tp) = line.trading_partner {
8646 let amount = if line.debit_amount > line.credit_amount {
8647 line.debit_amount
8648 } else {
8649 line.credit_amount
8650 };
8651 let entry = txn_summaries
8652 .entry((cc.clone(), tp.clone()))
8653 .or_insert_with(|| TransactionSummary {
8654 total_volume: rust_decimal::Decimal::ZERO,
8655 transaction_count: 0,
8656 first_transaction_date: posting_date,
8657 last_transaction_date: posting_date,
8658 related_entities: std::collections::HashSet::new(),
8659 });
8660 entry.total_volume += amount;
8661 entry.transaction_count += 1;
8662 if posting_date < entry.first_transaction_date {
8663 entry.first_transaction_date = posting_date;
8664 }
8665 if posting_date > entry.last_transaction_date {
8666 entry.last_transaction_date = posting_date;
8667 }
8668 entry.related_entities.insert(cc.clone());
8669 }
8670 }
8671 }
8672
8673 for chain in &document_flows.p2p_chains {
8676 let cc = chain.purchase_order.header.company_code.clone();
8677 let vendor_id = chain.purchase_order.vendor_id.clone();
8678 let po_date = chain.purchase_order.header.document_date;
8679 let amount = chain.purchase_order.total_net_amount;
8680
8681 let entry = txn_summaries
8682 .entry((cc.clone(), vendor_id))
8683 .or_insert_with(|| TransactionSummary {
8684 total_volume: rust_decimal::Decimal::ZERO,
8685 transaction_count: 0,
8686 first_transaction_date: po_date,
8687 last_transaction_date: po_date,
8688 related_entities: std::collections::HashSet::new(),
8689 });
8690 entry.total_volume += amount;
8691 entry.transaction_count += 1;
8692 if po_date < entry.first_transaction_date {
8693 entry.first_transaction_date = po_date;
8694 }
8695 if po_date > entry.last_transaction_date {
8696 entry.last_transaction_date = po_date;
8697 }
8698 entry.related_entities.insert(cc);
8699 }
8700
8701 for chain in &document_flows.o2c_chains {
8703 let cc = chain.sales_order.header.company_code.clone();
8704 let customer_id = chain.sales_order.customer_id.clone();
8705 let so_date = chain.sales_order.header.document_date;
8706 let amount = chain.sales_order.total_net_amount;
8707
8708 let entry = txn_summaries
8709 .entry((cc.clone(), customer_id))
8710 .or_insert_with(|| TransactionSummary {
8711 total_volume: rust_decimal::Decimal::ZERO,
8712 transaction_count: 0,
8713 first_transaction_date: so_date,
8714 last_transaction_date: so_date,
8715 related_entities: std::collections::HashSet::new(),
8716 });
8717 entry.total_volume += amount;
8718 entry.transaction_count += 1;
8719 if so_date < entry.first_transaction_date {
8720 entry.first_transaction_date = so_date;
8721 }
8722 if so_date > entry.last_transaction_date {
8723 entry.last_transaction_date = so_date;
8724 }
8725 entry.related_entities.insert(cc);
8726 }
8727
8728 let as_of_date = journal_entries
8729 .last()
8730 .map(|je| je.header.posting_date)
8731 .unwrap_or(start_date);
8732
8733 let graph = gen.generate_entity_graph(
8734 company_code,
8735 as_of_date,
8736 &vendor_summaries,
8737 &customer_summaries,
8738 &txn_summaries,
8739 );
8740
8741 info!(
8742 "Entity relationship graph: {} nodes, {} edges",
8743 graph.nodes.len(),
8744 graph.edges.len()
8745 );
8746 stats.entity_relationship_node_count = graph.nodes.len();
8747 stats.entity_relationship_edge_count = graph.edges.len();
8748 Some(graph)
8749 } else {
8750 None
8751 };
8752
8753 let cross_process_links = if cpl_enabled {
8755 let gr_refs: Vec<GoodsReceiptRef> = document_flows
8757 .p2p_chains
8758 .iter()
8759 .flat_map(|chain| {
8760 let vendor_id = chain.purchase_order.vendor_id.clone();
8761 let cc = chain.purchase_order.header.company_code.clone();
8762 chain.goods_receipts.iter().flat_map(move |gr| {
8763 gr.items.iter().filter_map({
8764 let doc_id = gr.header.document_id.clone();
8765 let v_id = vendor_id.clone();
8766 let company = cc.clone();
8767 let receipt_date = gr.header.document_date;
8768 move |item| {
8769 item.base
8770 .material_id
8771 .as_ref()
8772 .map(|mat_id| GoodsReceiptRef {
8773 document_id: doc_id.clone(),
8774 material_id: mat_id.clone(),
8775 quantity: item.base.quantity,
8776 receipt_date,
8777 vendor_id: v_id.clone(),
8778 company_code: company.clone(),
8779 })
8780 }
8781 })
8782 })
8783 })
8784 .collect();
8785
8786 let del_refs: Vec<DeliveryRef> = document_flows
8788 .o2c_chains
8789 .iter()
8790 .flat_map(|chain| {
8791 let customer_id = chain.sales_order.customer_id.clone();
8792 let cc = chain.sales_order.header.company_code.clone();
8793 chain.deliveries.iter().flat_map(move |del| {
8794 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8795 del.items.iter().filter_map({
8796 let doc_id = del.header.document_id.clone();
8797 let c_id = customer_id.clone();
8798 let company = cc.clone();
8799 move |item| {
8800 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8801 document_id: doc_id.clone(),
8802 material_id: mat_id.clone(),
8803 quantity: item.base.quantity,
8804 delivery_date,
8805 customer_id: c_id.clone(),
8806 company_code: company.clone(),
8807 })
8808 }
8809 })
8810 })
8811 })
8812 .collect();
8813
8814 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8815 info!("Cross-process links generated: {} links", links.len());
8816 stats.cross_process_link_count = links.len();
8817 links
8818 } else {
8819 Vec::new()
8820 };
8821
8822 self.check_resources_with_log("post-entity-relationships")?;
8823 Ok((entity_graph, cross_process_links))
8824 }
8825
8826 fn phase_industry_data(
8828 &self,
8829 stats: &mut EnhancedGenerationStatistics,
8830 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8831 if !self.config.industry_specific.enabled {
8832 return None;
8833 }
8834 info!("Phase 29: Generating industry-specific data");
8835 let output = datasynth_generators::industry::factory::generate_industry_output(
8836 self.config.global.industry,
8837 );
8838 stats.industry_gl_account_count = output.gl_accounts.len();
8839 info!(
8840 "Industry data generated: {} GL accounts for {:?}",
8841 output.gl_accounts.len(),
8842 self.config.global.industry
8843 );
8844 Some(output)
8845 }
8846
8847 fn phase_opening_balances(
8849 &mut self,
8850 coa: &Arc<ChartOfAccounts>,
8851 stats: &mut EnhancedGenerationStatistics,
8852 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8853 if !self.config.balance.generate_opening_balances {
8854 debug!("Phase 3b: Skipped (opening balance generation disabled)");
8855 return Ok(Vec::new());
8856 }
8857 info!("Phase 3b: Generating Opening Balances");
8858
8859 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8860 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8861 let fiscal_year = start_date.year();
8862
8863 let industry = match self.config.global.industry {
8864 IndustrySector::Manufacturing => IndustryType::Manufacturing,
8865 IndustrySector::Retail => IndustryType::Retail,
8866 IndustrySector::FinancialServices => IndustryType::Financial,
8867 IndustrySector::Healthcare => IndustryType::Healthcare,
8868 IndustrySector::Technology => IndustryType::Technology,
8869 _ => IndustryType::Manufacturing,
8870 };
8871
8872 let config = datasynth_generators::OpeningBalanceConfig {
8873 industry,
8874 ..Default::default()
8875 };
8876 let mut gen =
8877 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8878
8879 let mut results = Vec::new();
8880 for company in &self.config.companies {
8881 let spec = OpeningBalanceSpec::new(
8882 company.code.clone(),
8883 start_date,
8884 fiscal_year,
8885 company.currency.clone(),
8886 rust_decimal::Decimal::new(10_000_000, 0),
8887 industry,
8888 );
8889 let ob = gen.generate(&spec, coa, start_date, &company.code);
8890 results.push(ob);
8891 }
8892
8893 stats.opening_balance_count = results.len();
8894 info!("Opening balances generated: {} companies", results.len());
8895 self.check_resources_with_log("post-opening-balances")?;
8896
8897 Ok(results)
8898 }
8899
8900 fn phase_subledger_reconciliation(
8902 &mut self,
8903 subledger: &SubledgerSnapshot,
8904 entries: &[JournalEntry],
8905 stats: &mut EnhancedGenerationStatistics,
8906 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8907 if !self.config.balance.reconcile_subledgers {
8908 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8909 return Ok(Vec::new());
8910 }
8911 info!("Phase 9b: Reconciling GL to subledger balances");
8912
8913 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8914 .map(|d| d + chrono::Months::new(self.config.global.period_months))
8915 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8916
8917 let tracker_config = BalanceTrackerConfig {
8919 validate_on_each_entry: false,
8920 track_history: false,
8921 fail_on_validation_error: false,
8922 ..Default::default()
8923 };
8924 let recon_currency = self
8925 .config
8926 .companies
8927 .first()
8928 .map(|c| c.currency.clone())
8929 .unwrap_or_else(|| "USD".to_string());
8930 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8931 let validation_errors = tracker.apply_entries(entries);
8932 if !validation_errors.is_empty() {
8933 warn!(
8934 error_count = validation_errors.len(),
8935 "Balance tracker encountered validation errors during subledger reconciliation"
8936 );
8937 for err in &validation_errors {
8938 debug!("Balance validation error: {:?}", err);
8939 }
8940 }
8941
8942 let mut engine = datasynth_generators::ReconciliationEngine::new(
8943 datasynth_generators::ReconciliationConfig::default(),
8944 );
8945
8946 let mut results = Vec::new();
8947 let company_code = self
8948 .config
8949 .companies
8950 .first()
8951 .map(|c| c.code.as_str())
8952 .unwrap_or("1000");
8953
8954 if !subledger.ar_invoices.is_empty() {
8956 let gl_balance = tracker
8957 .get_account_balance(
8958 company_code,
8959 datasynth_core::accounts::control_accounts::AR_CONTROL,
8960 )
8961 .map(|b| b.closing_balance)
8962 .unwrap_or_default();
8963 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8964 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8965 }
8966
8967 if !subledger.ap_invoices.is_empty() {
8969 let gl_balance = tracker
8970 .get_account_balance(
8971 company_code,
8972 datasynth_core::accounts::control_accounts::AP_CONTROL,
8973 )
8974 .map(|b| b.closing_balance)
8975 .unwrap_or_default();
8976 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8977 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8978 }
8979
8980 if !subledger.fa_records.is_empty() {
8982 let gl_asset_balance = tracker
8983 .get_account_balance(
8984 company_code,
8985 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
8986 )
8987 .map(|b| b.closing_balance)
8988 .unwrap_or_default();
8989 let gl_accum_depr_balance = tracker
8990 .get_account_balance(
8991 company_code,
8992 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
8993 )
8994 .map(|b| b.closing_balance)
8995 .unwrap_or_default();
8996 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
8997 subledger.fa_records.iter().collect();
8998 let (asset_recon, depr_recon) = engine.reconcile_fa(
8999 company_code,
9000 end_date,
9001 gl_asset_balance,
9002 gl_accum_depr_balance,
9003 &fa_refs,
9004 );
9005 results.push(asset_recon);
9006 results.push(depr_recon);
9007 }
9008
9009 if !subledger.inventory_positions.is_empty() {
9011 let gl_balance = tracker
9012 .get_account_balance(
9013 company_code,
9014 datasynth_core::accounts::control_accounts::INVENTORY,
9015 )
9016 .map(|b| b.closing_balance)
9017 .unwrap_or_default();
9018 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9019 subledger.inventory_positions.iter().collect();
9020 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9021 }
9022
9023 stats.subledger_reconciliation_count = results.len();
9024 let passed = results.iter().filter(|r| r.is_balanced()).count();
9025 let failed = results.len() - passed;
9026 info!(
9027 "Subledger reconciliation: {} checks, {} passed, {} failed",
9028 results.len(),
9029 passed,
9030 failed
9031 );
9032 self.check_resources_with_log("post-subledger-reconciliation")?;
9033
9034 Ok(results)
9035 }
9036
9037 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9039 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9040
9041 let coa_framework = self.resolve_coa_framework();
9042
9043 let mut gen = ChartOfAccountsGenerator::new(
9044 self.config.chart_of_accounts.complexity,
9045 self.config.global.industry,
9046 self.seed,
9047 )
9048 .with_coa_framework(coa_framework);
9049
9050 let coa = Arc::new(gen.generate());
9051 self.coa = Some(Arc::clone(&coa));
9052
9053 if let Some(pb) = pb {
9054 pb.finish_with_message("Chart of Accounts complete");
9055 }
9056
9057 Ok(coa)
9058 }
9059
9060 fn generate_master_data(&mut self) -> SynthResult<()> {
9062 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9063 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9064 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9065
9066 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
9068
9069 let pack = self.primary_pack().clone();
9071
9072 let vendors_per_company = self.phase_config.vendors_per_company;
9074 let customers_per_company = self.phase_config.customers_per_company;
9075 let materials_per_company = self.phase_config.materials_per_company;
9076 let assets_per_company = self.phase_config.assets_per_company;
9077 let coa_framework = self.resolve_coa_framework();
9078
9079 let per_company_results: Vec<_> = self
9082 .config
9083 .companies
9084 .par_iter()
9085 .enumerate()
9086 .map(|(i, company)| {
9087 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9088 let pack = pack.clone();
9089
9090 let mut vendor_gen = VendorGenerator::new(company_seed);
9092 vendor_gen.set_country_pack(pack.clone());
9093 vendor_gen.set_coa_framework(coa_framework);
9094 vendor_gen.set_counter_offset(i * vendors_per_company);
9095 if self.config.vendor_network.enabled {
9097 let vn = &self.config.vendor_network;
9098 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9099 enabled: true,
9100 depth: vn.depth,
9101 tier1_count: datasynth_generators::TierCountConfig::new(
9102 vn.tier1.min,
9103 vn.tier1.max,
9104 ),
9105 tier2_per_parent: datasynth_generators::TierCountConfig::new(
9106 vn.tier2_per_parent.min,
9107 vn.tier2_per_parent.max,
9108 ),
9109 tier3_per_parent: datasynth_generators::TierCountConfig::new(
9110 vn.tier3_per_parent.min,
9111 vn.tier3_per_parent.max,
9112 ),
9113 cluster_distribution: datasynth_generators::ClusterDistribution {
9114 reliable_strategic: vn.clusters.reliable_strategic,
9115 standard_operational: vn.clusters.standard_operational,
9116 transactional: vn.clusters.transactional,
9117 problematic: vn.clusters.problematic,
9118 },
9119 concentration_limits: datasynth_generators::ConcentrationLimits {
9120 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9121 max_top5: vn.dependencies.top_5_concentration,
9122 },
9123 ..datasynth_generators::VendorNetworkConfig::default()
9124 });
9125 }
9126 let vendor_pool =
9127 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9128
9129 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9131 customer_gen.set_country_pack(pack.clone());
9132 customer_gen.set_coa_framework(coa_framework);
9133 customer_gen.set_counter_offset(i * customers_per_company);
9134 if self.config.customer_segmentation.enabled {
9136 let cs = &self.config.customer_segmentation;
9137 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9138 enabled: true,
9139 segment_distribution: datasynth_generators::SegmentDistribution {
9140 enterprise: cs.value_segments.enterprise.customer_share,
9141 mid_market: cs.value_segments.mid_market.customer_share,
9142 smb: cs.value_segments.smb.customer_share,
9143 consumer: cs.value_segments.consumer.customer_share,
9144 },
9145 referral_config: datasynth_generators::ReferralConfig {
9146 enabled: cs.networks.referrals.enabled,
9147 referral_rate: cs.networks.referrals.referral_rate,
9148 ..Default::default()
9149 },
9150 hierarchy_config: datasynth_generators::HierarchyConfig {
9151 enabled: cs.networks.corporate_hierarchies.enabled,
9152 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9153 ..Default::default()
9154 },
9155 ..Default::default()
9156 };
9157 customer_gen.set_segmentation_config(seg_cfg);
9158 }
9159 let customer_pool = customer_gen.generate_customer_pool(
9160 customers_per_company,
9161 &company.code,
9162 start_date,
9163 );
9164
9165 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9167 material_gen.set_country_pack(pack.clone());
9168 material_gen.set_counter_offset(i * materials_per_company);
9169 let material_pool = material_gen.generate_material_pool(
9170 materials_per_company,
9171 &company.code,
9172 start_date,
9173 );
9174
9175 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9177 let asset_pool = asset_gen.generate_asset_pool(
9178 assets_per_company,
9179 &company.code,
9180 (start_date, end_date),
9181 );
9182
9183 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9185 employee_gen.set_country_pack(pack);
9186 let employee_pool =
9187 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9188
9189 let employee_change_history =
9191 employee_gen.generate_all_change_history(&employee_pool, end_date);
9192
9193 let employee_ids: Vec<String> = employee_pool
9195 .employees
9196 .iter()
9197 .map(|e| e.employee_id.clone())
9198 .collect();
9199 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9200 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9201
9202 (
9203 vendor_pool.vendors,
9204 customer_pool.customers,
9205 material_pool.materials,
9206 asset_pool.assets,
9207 employee_pool.employees,
9208 employee_change_history,
9209 cost_centers,
9210 )
9211 })
9212 .collect();
9213
9214 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9216 per_company_results
9217 {
9218 self.master_data.vendors.extend(vendors);
9219 self.master_data.customers.extend(customers);
9220 self.master_data.materials.extend(materials);
9221 self.master_data.assets.extend(assets);
9222 self.master_data.employees.extend(employees);
9223 self.master_data.cost_centers.extend(cost_centers);
9224 self.master_data
9225 .employee_change_history
9226 .extend(change_history);
9227 }
9228
9229 if let Some(pb) = &pb {
9230 pb.inc(total);
9231 }
9232 if let Some(pb) = pb {
9233 pb.finish_with_message("Master data generation complete");
9234 }
9235
9236 Ok(())
9237 }
9238
9239 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9241 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9242 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9243
9244 let months = (self.config.global.period_months as usize).max(1);
9247 let p2p_count = self
9248 .phase_config
9249 .p2p_chains
9250 .min(self.master_data.vendors.len() * 2 * months);
9251 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9252
9253 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9255 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9256 p2p_gen.set_country_pack(self.primary_pack().clone());
9257
9258 for i in 0..p2p_count {
9259 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9260 let materials: Vec<&Material> = self
9261 .master_data
9262 .materials
9263 .iter()
9264 .skip(i % self.master_data.materials.len().max(1))
9265 .take(2.min(self.master_data.materials.len()))
9266 .collect();
9267
9268 if materials.is_empty() {
9269 continue;
9270 }
9271
9272 let company = &self.config.companies[i % self.config.companies.len()];
9273 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9274 let fiscal_period = po_date.month() as u8;
9275 let created_by = if self.master_data.employees.is_empty() {
9276 "SYSTEM"
9277 } else {
9278 self.master_data.employees[i % self.master_data.employees.len()]
9279 .user_id
9280 .as_str()
9281 };
9282
9283 let chain = p2p_gen.generate_chain(
9284 &company.code,
9285 vendor,
9286 &materials,
9287 po_date,
9288 start_date.year() as u16,
9289 fiscal_period,
9290 created_by,
9291 );
9292
9293 flows.purchase_orders.push(chain.purchase_order.clone());
9295 flows.goods_receipts.extend(chain.goods_receipts.clone());
9296 if let Some(vi) = &chain.vendor_invoice {
9297 flows.vendor_invoices.push(vi.clone());
9298 }
9299 if let Some(payment) = &chain.payment {
9300 flows.payments.push(payment.clone());
9301 }
9302 for remainder in &chain.remainder_payments {
9303 flows.payments.push(remainder.clone());
9304 }
9305 flows.p2p_chains.push(chain);
9306
9307 if let Some(pb) = &pb {
9308 pb.inc(1);
9309 }
9310 }
9311
9312 if let Some(pb) = pb {
9313 pb.finish_with_message("P2P document flows complete");
9314 }
9315
9316 let o2c_count = self
9319 .phase_config
9320 .o2c_chains
9321 .min(self.master_data.customers.len() * 2 * months);
9322 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9323
9324 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9326 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9327 o2c_gen.set_country_pack(self.primary_pack().clone());
9328
9329 for i in 0..o2c_count {
9330 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9331 let materials: Vec<&Material> = self
9332 .master_data
9333 .materials
9334 .iter()
9335 .skip(i % self.master_data.materials.len().max(1))
9336 .take(2.min(self.master_data.materials.len()))
9337 .collect();
9338
9339 if materials.is_empty() {
9340 continue;
9341 }
9342
9343 let company = &self.config.companies[i % self.config.companies.len()];
9344 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9345 let fiscal_period = so_date.month() as u8;
9346 let created_by = if self.master_data.employees.is_empty() {
9347 "SYSTEM"
9348 } else {
9349 self.master_data.employees[i % self.master_data.employees.len()]
9350 .user_id
9351 .as_str()
9352 };
9353
9354 let chain = o2c_gen.generate_chain(
9355 &company.code,
9356 customer,
9357 &materials,
9358 so_date,
9359 start_date.year() as u16,
9360 fiscal_period,
9361 created_by,
9362 );
9363
9364 flows.sales_orders.push(chain.sales_order.clone());
9366 flows.deliveries.extend(chain.deliveries.clone());
9367 if let Some(ci) = &chain.customer_invoice {
9368 flows.customer_invoices.push(ci.clone());
9369 }
9370 if let Some(receipt) = &chain.customer_receipt {
9371 flows.payments.push(receipt.clone());
9372 }
9373 for receipt in &chain.remainder_receipts {
9375 flows.payments.push(receipt.clone());
9376 }
9377 flows.o2c_chains.push(chain);
9378
9379 if let Some(pb) = &pb {
9380 pb.inc(1);
9381 }
9382 }
9383
9384 if let Some(pb) = pb {
9385 pb.finish_with_message("O2C document flows complete");
9386 }
9387
9388 {
9392 let mut refs = Vec::new();
9393 for doc in &flows.purchase_orders {
9394 refs.extend(doc.header.document_references.iter().cloned());
9395 }
9396 for doc in &flows.goods_receipts {
9397 refs.extend(doc.header.document_references.iter().cloned());
9398 }
9399 for doc in &flows.vendor_invoices {
9400 refs.extend(doc.header.document_references.iter().cloned());
9401 }
9402 for doc in &flows.sales_orders {
9403 refs.extend(doc.header.document_references.iter().cloned());
9404 }
9405 for doc in &flows.deliveries {
9406 refs.extend(doc.header.document_references.iter().cloned());
9407 }
9408 for doc in &flows.customer_invoices {
9409 refs.extend(doc.header.document_references.iter().cloned());
9410 }
9411 for doc in &flows.payments {
9412 refs.extend(doc.header.document_references.iter().cloned());
9413 }
9414 debug!(
9415 "Collected {} document cross-references from document headers",
9416 refs.len()
9417 );
9418 flows.document_references = refs;
9419 }
9420
9421 Ok(())
9422 }
9423
9424 fn generate_journal_entries(
9426 &mut self,
9427 coa: &Arc<ChartOfAccounts>,
9428 ) -> SynthResult<Vec<JournalEntry>> {
9429 use datasynth_core::traits::ParallelGenerator;
9430
9431 let total = self.calculate_total_transactions();
9432 let pb = self.create_progress_bar(total, "Generating Journal Entries");
9433
9434 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9435 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9436 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9437
9438 let company_codes: Vec<String> = self
9439 .config
9440 .companies
9441 .iter()
9442 .map(|c| c.code.clone())
9443 .collect();
9444
9445 let generator = JournalEntryGenerator::new_with_params(
9446 self.config.transactions.clone(),
9447 Arc::clone(coa),
9448 company_codes,
9449 start_date,
9450 end_date,
9451 self.seed,
9452 );
9453
9454 let je_pack = self.primary_pack();
9458
9459 let mut generator = generator
9460 .with_master_data(
9461 &self.master_data.vendors,
9462 &self.master_data.customers,
9463 &self.master_data.materials,
9464 )
9465 .with_country_pack_names(je_pack)
9466 .with_country_pack_temporal(
9467 self.config.temporal_patterns.clone(),
9468 self.seed + 200,
9469 je_pack,
9470 )
9471 .with_persona_errors(true)
9472 .with_fraud_config(self.config.fraud.clone());
9473
9474 if self.config.temporal.enabled {
9476 let drift_config = self.config.temporal.to_core_config();
9477 generator = generator.with_drift_config(drift_config, self.seed + 100);
9478 }
9479
9480 self.check_memory_limit()?;
9482
9483 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9485
9486 let entries = if total >= 10_000 && num_threads > 1 {
9490 let sub_generators = generator.split(num_threads);
9493 let entries_per_thread = total as usize / num_threads;
9494 let remainder = total as usize % num_threads;
9495
9496 let batches: Vec<Vec<JournalEntry>> = sub_generators
9497 .into_par_iter()
9498 .enumerate()
9499 .map(|(i, mut gen)| {
9500 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9501 gen.generate_batch(count)
9502 })
9503 .collect();
9504
9505 let entries = JournalEntryGenerator::merge_results(batches);
9507
9508 if let Some(pb) = &pb {
9509 pb.inc(total);
9510 }
9511 entries
9512 } else {
9513 let mut entries = Vec::with_capacity(total as usize);
9515 for _ in 0..total {
9516 let entry = generator.generate();
9517 entries.push(entry);
9518 if let Some(pb) = &pb {
9519 pb.inc(1);
9520 }
9521 }
9522 entries
9523 };
9524
9525 if let Some(pb) = pb {
9526 pb.finish_with_message("Journal entries complete");
9527 }
9528
9529 Ok(entries)
9530 }
9531
9532 fn generate_jes_from_document_flows(
9537 &mut self,
9538 flows: &DocumentFlowSnapshot,
9539 ) -> SynthResult<Vec<JournalEntry>> {
9540 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9541 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9542
9543 let je_config = match self.resolve_coa_framework() {
9544 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9545 CoAFramework::GermanSkr04 => {
9546 let fa = datasynth_core::FrameworkAccounts::german_gaap();
9547 DocumentFlowJeConfig::from(&fa)
9548 }
9549 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9550 };
9551
9552 let populate_fec = je_config.populate_fec_fields;
9553 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9554
9555 if populate_fec {
9559 let mut aux_lookup = std::collections::HashMap::new();
9560 for vendor in &self.master_data.vendors {
9561 if let Some(ref aux) = vendor.auxiliary_gl_account {
9562 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9563 }
9564 }
9565 for customer in &self.master_data.customers {
9566 if let Some(ref aux) = customer.auxiliary_gl_account {
9567 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9568 }
9569 }
9570 if !aux_lookup.is_empty() {
9571 generator.set_auxiliary_account_lookup(aux_lookup);
9572 }
9573 }
9574
9575 let mut entries = Vec::new();
9576
9577 for chain in &flows.p2p_chains {
9579 let chain_entries = generator.generate_from_p2p_chain(chain);
9580 entries.extend(chain_entries);
9581 if let Some(pb) = &pb {
9582 pb.inc(1);
9583 }
9584 }
9585
9586 for chain in &flows.o2c_chains {
9588 let chain_entries = generator.generate_from_o2c_chain(chain);
9589 entries.extend(chain_entries);
9590 if let Some(pb) = &pb {
9591 pb.inc(1);
9592 }
9593 }
9594
9595 if let Some(pb) = pb {
9596 pb.finish_with_message(format!(
9597 "Generated {} JEs from document flows",
9598 entries.len()
9599 ));
9600 }
9601
9602 Ok(entries)
9603 }
9604
9605 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9611 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9612
9613 let mut jes = Vec::with_capacity(payroll_runs.len());
9614
9615 for run in payroll_runs {
9616 let mut je = JournalEntry::new_simple(
9617 format!("JE-PAYROLL-{}", run.payroll_id),
9618 run.company_code.clone(),
9619 run.run_date,
9620 format!("Payroll {}", run.payroll_id),
9621 );
9622
9623 je.add_line(JournalEntryLine {
9625 line_number: 1,
9626 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9627 debit_amount: run.total_gross,
9628 reference: Some(run.payroll_id.clone()),
9629 text: Some(format!(
9630 "Payroll {} ({} employees)",
9631 run.payroll_id, run.employee_count
9632 )),
9633 ..Default::default()
9634 });
9635
9636 je.add_line(JournalEntryLine {
9638 line_number: 2,
9639 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9640 credit_amount: run.total_gross,
9641 reference: Some(run.payroll_id.clone()),
9642 ..Default::default()
9643 });
9644
9645 jes.push(je);
9646 }
9647
9648 jes
9649 }
9650
9651 fn link_document_flows_to_subledgers(
9656 &mut self,
9657 flows: &DocumentFlowSnapshot,
9658 ) -> SynthResult<SubledgerSnapshot> {
9659 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9660 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9661
9662 let vendor_names: std::collections::HashMap<String, String> = self
9664 .master_data
9665 .vendors
9666 .iter()
9667 .map(|v| (v.vendor_id.clone(), v.name.clone()))
9668 .collect();
9669 let customer_names: std::collections::HashMap<String, String> = self
9670 .master_data
9671 .customers
9672 .iter()
9673 .map(|c| (c.customer_id.clone(), c.name.clone()))
9674 .collect();
9675
9676 let mut linker = DocumentFlowLinker::new()
9677 .with_vendor_names(vendor_names)
9678 .with_customer_names(customer_names);
9679
9680 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9682 if let Some(pb) = &pb {
9683 pb.inc(flows.vendor_invoices.len() as u64);
9684 }
9685
9686 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9688 if let Some(pb) = &pb {
9689 pb.inc(flows.customer_invoices.len() as u64);
9690 }
9691
9692 if let Some(pb) = pb {
9693 pb.finish_with_message(format!(
9694 "Linked {} AP and {} AR invoices",
9695 ap_invoices.len(),
9696 ar_invoices.len()
9697 ));
9698 }
9699
9700 Ok(SubledgerSnapshot {
9701 ap_invoices,
9702 ar_invoices,
9703 fa_records: Vec::new(),
9704 inventory_positions: Vec::new(),
9705 inventory_movements: Vec::new(),
9706 ar_aging_reports: Vec::new(),
9708 ap_aging_reports: Vec::new(),
9709 depreciation_runs: Vec::new(),
9711 inventory_valuations: Vec::new(),
9712 dunning_runs: Vec::new(),
9714 dunning_letters: Vec::new(),
9715 })
9716 }
9717
9718 #[allow(clippy::too_many_arguments)]
9723 fn generate_ocpm_events(
9724 &mut self,
9725 flows: &DocumentFlowSnapshot,
9726 sourcing: &SourcingSnapshot,
9727 hr: &HrSnapshot,
9728 manufacturing: &ManufacturingSnapshot,
9729 banking: &BankingSnapshot,
9730 audit: &AuditSnapshot,
9731 financial_reporting: &FinancialReportingSnapshot,
9732 ) -> SynthResult<OcpmSnapshot> {
9733 let total_chains = flows.p2p_chains.len()
9734 + flows.o2c_chains.len()
9735 + sourcing.sourcing_projects.len()
9736 + hr.payroll_runs.len()
9737 + manufacturing.production_orders.len()
9738 + banking.customers.len()
9739 + audit.engagements.len()
9740 + financial_reporting.bank_reconciliations.len();
9741 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9742
9743 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9745 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9746
9747 let ocpm_config = OcpmGeneratorConfig {
9749 generate_p2p: true,
9750 generate_o2c: true,
9751 generate_s2c: !sourcing.sourcing_projects.is_empty(),
9752 generate_h2r: !hr.payroll_runs.is_empty(),
9753 generate_mfg: !manufacturing.production_orders.is_empty(),
9754 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9755 generate_bank: !banking.customers.is_empty(),
9756 generate_audit: !audit.engagements.is_empty(),
9757 happy_path_rate: 0.75,
9758 exception_path_rate: 0.20,
9759 error_path_rate: 0.05,
9760 add_duration_variability: true,
9761 duration_std_dev_factor: 0.3,
9762 };
9763 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9764 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9765
9766 let available_users: Vec<String> = self
9768 .master_data
9769 .employees
9770 .iter()
9771 .take(20)
9772 .map(|e| e.user_id.clone())
9773 .collect();
9774
9775 let fallback_date =
9777 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9778 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9779 .unwrap_or(fallback_date);
9780 let base_midnight = base_date
9781 .and_hms_opt(0, 0, 0)
9782 .expect("midnight is always valid");
9783 let base_datetime =
9784 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9785
9786 let add_result = |event_log: &mut OcpmEventLog,
9788 result: datasynth_ocpm::CaseGenerationResult| {
9789 for event in result.events {
9790 event_log.add_event(event);
9791 }
9792 for object in result.objects {
9793 event_log.add_object(object);
9794 }
9795 for relationship in result.relationships {
9796 event_log.add_relationship(relationship);
9797 }
9798 for corr in result.correlation_events {
9799 event_log.add_correlation_event(corr);
9800 }
9801 event_log.add_case(result.case_trace);
9802 };
9803
9804 for chain in &flows.p2p_chains {
9806 let po = &chain.purchase_order;
9807 let documents = P2pDocuments::new(
9808 &po.header.document_id,
9809 &po.vendor_id,
9810 &po.header.company_code,
9811 po.total_net_amount,
9812 &po.header.currency,
9813 &ocpm_uuid_factory,
9814 )
9815 .with_goods_receipt(
9816 chain
9817 .goods_receipts
9818 .first()
9819 .map(|gr| gr.header.document_id.as_str())
9820 .unwrap_or(""),
9821 &ocpm_uuid_factory,
9822 )
9823 .with_invoice(
9824 chain
9825 .vendor_invoice
9826 .as_ref()
9827 .map(|vi| vi.header.document_id.as_str())
9828 .unwrap_or(""),
9829 &ocpm_uuid_factory,
9830 )
9831 .with_payment(
9832 chain
9833 .payment
9834 .as_ref()
9835 .map(|p| p.header.document_id.as_str())
9836 .unwrap_or(""),
9837 &ocpm_uuid_factory,
9838 );
9839
9840 let start_time =
9841 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9842 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9843 add_result(&mut event_log, result);
9844
9845 if let Some(pb) = &pb {
9846 pb.inc(1);
9847 }
9848 }
9849
9850 for chain in &flows.o2c_chains {
9852 let so = &chain.sales_order;
9853 let documents = O2cDocuments::new(
9854 &so.header.document_id,
9855 &so.customer_id,
9856 &so.header.company_code,
9857 so.total_net_amount,
9858 &so.header.currency,
9859 &ocpm_uuid_factory,
9860 )
9861 .with_delivery(
9862 chain
9863 .deliveries
9864 .first()
9865 .map(|d| d.header.document_id.as_str())
9866 .unwrap_or(""),
9867 &ocpm_uuid_factory,
9868 )
9869 .with_invoice(
9870 chain
9871 .customer_invoice
9872 .as_ref()
9873 .map(|ci| ci.header.document_id.as_str())
9874 .unwrap_or(""),
9875 &ocpm_uuid_factory,
9876 )
9877 .with_receipt(
9878 chain
9879 .customer_receipt
9880 .as_ref()
9881 .map(|r| r.header.document_id.as_str())
9882 .unwrap_or(""),
9883 &ocpm_uuid_factory,
9884 );
9885
9886 let start_time =
9887 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9888 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9889 add_result(&mut event_log, result);
9890
9891 if let Some(pb) = &pb {
9892 pb.inc(1);
9893 }
9894 }
9895
9896 for project in &sourcing.sourcing_projects {
9898 let vendor_id = sourcing
9900 .contracts
9901 .iter()
9902 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9903 .map(|c| c.vendor_id.clone())
9904 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9905 .or_else(|| {
9906 self.master_data
9907 .vendors
9908 .first()
9909 .map(|v| v.vendor_id.clone())
9910 })
9911 .unwrap_or_else(|| "V000".to_string());
9912 let mut docs = S2cDocuments::new(
9913 &project.project_id,
9914 &vendor_id,
9915 &project.company_code,
9916 project.estimated_annual_spend,
9917 &ocpm_uuid_factory,
9918 );
9919 if let Some(rfx) = sourcing
9921 .rfx_events
9922 .iter()
9923 .find(|r| r.sourcing_project_id == project.project_id)
9924 {
9925 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9926 if let Some(bid) = sourcing.bids.iter().find(|b| {
9928 b.rfx_id == rfx.rfx_id
9929 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9930 }) {
9931 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9932 }
9933 }
9934 if let Some(contract) = sourcing
9936 .contracts
9937 .iter()
9938 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9939 {
9940 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9941 }
9942 let start_time = base_datetime - chrono::Duration::days(90);
9943 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9944 add_result(&mut event_log, result);
9945
9946 if let Some(pb) = &pb {
9947 pb.inc(1);
9948 }
9949 }
9950
9951 for run in &hr.payroll_runs {
9953 let employee_id = hr
9955 .payroll_line_items
9956 .iter()
9957 .find(|li| li.payroll_id == run.payroll_id)
9958 .map(|li| li.employee_id.as_str())
9959 .unwrap_or("EMP000");
9960 let docs = H2rDocuments::new(
9961 &run.payroll_id,
9962 employee_id,
9963 &run.company_code,
9964 run.total_gross,
9965 &ocpm_uuid_factory,
9966 )
9967 .with_time_entries(
9968 hr.time_entries
9969 .iter()
9970 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9971 .take(5)
9972 .map(|t| t.entry_id.as_str())
9973 .collect(),
9974 );
9975 let start_time = base_datetime - chrono::Duration::days(30);
9976 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9977 add_result(&mut event_log, result);
9978
9979 if let Some(pb) = &pb {
9980 pb.inc(1);
9981 }
9982 }
9983
9984 for order in &manufacturing.production_orders {
9986 let mut docs = MfgDocuments::new(
9987 &order.order_id,
9988 &order.material_id,
9989 &order.company_code,
9990 order.planned_quantity,
9991 &ocpm_uuid_factory,
9992 )
9993 .with_operations(
9994 order
9995 .operations
9996 .iter()
9997 .map(|o| format!("OP-{:04}", o.operation_number))
9998 .collect::<Vec<_>>()
9999 .iter()
10000 .map(std::string::String::as_str)
10001 .collect(),
10002 );
10003 if let Some(insp) = manufacturing
10005 .quality_inspections
10006 .iter()
10007 .find(|i| i.reference_id == order.order_id)
10008 {
10009 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10010 }
10011 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10013 cc.items
10014 .iter()
10015 .any(|item| item.material_id == order.material_id)
10016 }) {
10017 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10018 }
10019 let start_time = base_datetime - chrono::Duration::days(60);
10020 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10021 add_result(&mut event_log, result);
10022
10023 if let Some(pb) = &pb {
10024 pb.inc(1);
10025 }
10026 }
10027
10028 for customer in &banking.customers {
10030 let customer_id_str = customer.customer_id.to_string();
10031 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10032 if let Some(account) = banking
10034 .accounts
10035 .iter()
10036 .find(|a| a.primary_owner_id == customer.customer_id)
10037 {
10038 let account_id_str = account.account_id.to_string();
10039 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10040 let txn_strs: Vec<String> = banking
10042 .transactions
10043 .iter()
10044 .filter(|t| t.account_id == account.account_id)
10045 .take(10)
10046 .map(|t| t.transaction_id.to_string())
10047 .collect();
10048 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10049 let txn_amounts: Vec<rust_decimal::Decimal> = banking
10050 .transactions
10051 .iter()
10052 .filter(|t| t.account_id == account.account_id)
10053 .take(10)
10054 .map(|t| t.amount)
10055 .collect();
10056 if !txn_ids.is_empty() {
10057 docs = docs.with_transactions(txn_ids, txn_amounts);
10058 }
10059 }
10060 let start_time = base_datetime - chrono::Duration::days(180);
10061 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10062 add_result(&mut event_log, result);
10063
10064 if let Some(pb) = &pb {
10065 pb.inc(1);
10066 }
10067 }
10068
10069 for engagement in &audit.engagements {
10071 let engagement_id_str = engagement.engagement_id.to_string();
10072 let docs = AuditDocuments::new(
10073 &engagement_id_str,
10074 &engagement.client_entity_id,
10075 &ocpm_uuid_factory,
10076 )
10077 .with_workpapers(
10078 audit
10079 .workpapers
10080 .iter()
10081 .filter(|w| w.engagement_id == engagement.engagement_id)
10082 .take(10)
10083 .map(|w| w.workpaper_id.to_string())
10084 .collect::<Vec<_>>()
10085 .iter()
10086 .map(std::string::String::as_str)
10087 .collect(),
10088 )
10089 .with_evidence(
10090 audit
10091 .evidence
10092 .iter()
10093 .filter(|e| e.engagement_id == engagement.engagement_id)
10094 .take(10)
10095 .map(|e| e.evidence_id.to_string())
10096 .collect::<Vec<_>>()
10097 .iter()
10098 .map(std::string::String::as_str)
10099 .collect(),
10100 )
10101 .with_risks(
10102 audit
10103 .risk_assessments
10104 .iter()
10105 .filter(|r| r.engagement_id == engagement.engagement_id)
10106 .take(5)
10107 .map(|r| r.risk_id.to_string())
10108 .collect::<Vec<_>>()
10109 .iter()
10110 .map(std::string::String::as_str)
10111 .collect(),
10112 )
10113 .with_findings(
10114 audit
10115 .findings
10116 .iter()
10117 .filter(|f| f.engagement_id == engagement.engagement_id)
10118 .take(5)
10119 .map(|f| f.finding_id.to_string())
10120 .collect::<Vec<_>>()
10121 .iter()
10122 .map(std::string::String::as_str)
10123 .collect(),
10124 )
10125 .with_judgments(
10126 audit
10127 .judgments
10128 .iter()
10129 .filter(|j| j.engagement_id == engagement.engagement_id)
10130 .take(5)
10131 .map(|j| j.judgment_id.to_string())
10132 .collect::<Vec<_>>()
10133 .iter()
10134 .map(std::string::String::as_str)
10135 .collect(),
10136 );
10137 let start_time = base_datetime - chrono::Duration::days(120);
10138 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10139 add_result(&mut event_log, result);
10140
10141 if let Some(pb) = &pb {
10142 pb.inc(1);
10143 }
10144 }
10145
10146 for recon in &financial_reporting.bank_reconciliations {
10148 let docs = BankReconDocuments::new(
10149 &recon.reconciliation_id,
10150 &recon.bank_account_id,
10151 &recon.company_code,
10152 recon.bank_ending_balance,
10153 &ocpm_uuid_factory,
10154 )
10155 .with_statement_lines(
10156 recon
10157 .statement_lines
10158 .iter()
10159 .take(20)
10160 .map(|l| l.line_id.as_str())
10161 .collect(),
10162 )
10163 .with_reconciling_items(
10164 recon
10165 .reconciling_items
10166 .iter()
10167 .take(10)
10168 .map(|i| i.item_id.as_str())
10169 .collect(),
10170 );
10171 let start_time = base_datetime - chrono::Duration::days(30);
10172 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10173 add_result(&mut event_log, result);
10174
10175 if let Some(pb) = &pb {
10176 pb.inc(1);
10177 }
10178 }
10179
10180 event_log.compute_variants();
10182
10183 let summary = event_log.summary();
10184
10185 if let Some(pb) = pb {
10186 pb.finish_with_message(format!(
10187 "Generated {} OCPM events, {} objects",
10188 summary.event_count, summary.object_count
10189 ));
10190 }
10191
10192 Ok(OcpmSnapshot {
10193 event_count: summary.event_count,
10194 object_count: summary.object_count,
10195 case_count: summary.case_count,
10196 event_log: Some(event_log),
10197 })
10198 }
10199
10200 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10202 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10203
10204 let total_rate = if self.config.anomaly_injection.enabled {
10207 self.config.anomaly_injection.rates.total_rate
10208 } else if self.config.fraud.enabled {
10209 self.config.fraud.fraud_rate
10210 } else {
10211 0.02
10212 };
10213
10214 let fraud_rate = if self.config.anomaly_injection.enabled {
10215 self.config.anomaly_injection.rates.fraud_rate
10216 } else {
10217 AnomalyRateConfig::default().fraud_rate
10218 };
10219
10220 let error_rate = if self.config.anomaly_injection.enabled {
10221 self.config.anomaly_injection.rates.error_rate
10222 } else {
10223 AnomalyRateConfig::default().error_rate
10224 };
10225
10226 let process_issue_rate = if self.config.anomaly_injection.enabled {
10227 self.config.anomaly_injection.rates.process_rate
10228 } else {
10229 AnomalyRateConfig::default().process_issue_rate
10230 };
10231
10232 let anomaly_config = AnomalyInjectorConfig {
10233 rates: AnomalyRateConfig {
10234 total_rate,
10235 fraud_rate,
10236 error_rate,
10237 process_issue_rate,
10238 ..Default::default()
10239 },
10240 seed: self.seed + 5000,
10241 ..Default::default()
10242 };
10243
10244 let mut injector = AnomalyInjector::new(anomaly_config);
10245 let result = injector.process_entries(entries);
10246
10247 if let Some(pb) = &pb {
10248 pb.inc(entries.len() as u64);
10249 pb.finish_with_message("Anomaly injection complete");
10250 }
10251
10252 let mut by_type = HashMap::new();
10253 for label in &result.labels {
10254 *by_type
10255 .entry(format!("{:?}", label.anomaly_type))
10256 .or_insert(0) += 1;
10257 }
10258
10259 Ok(AnomalyLabels {
10260 labels: result.labels,
10261 summary: Some(result.summary),
10262 by_type,
10263 })
10264 }
10265
10266 fn validate_journal_entries(
10275 &mut self,
10276 entries: &[JournalEntry],
10277 ) -> SynthResult<BalanceValidationResult> {
10278 let clean_entries: Vec<&JournalEntry> = entries
10280 .iter()
10281 .filter(|e| {
10282 e.header
10283 .header_text
10284 .as_ref()
10285 .map(|t| !t.contains("[HUMAN_ERROR:"))
10286 .unwrap_or(true)
10287 })
10288 .collect();
10289
10290 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10291
10292 let config = BalanceTrackerConfig {
10294 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
10298 };
10299 let validation_currency = self
10300 .config
10301 .companies
10302 .first()
10303 .map(|c| c.currency.clone())
10304 .unwrap_or_else(|| "USD".to_string());
10305
10306 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10307
10308 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10310 let errors = tracker.apply_entries(&clean_refs);
10311
10312 if let Some(pb) = &pb {
10313 pb.inc(entries.len() as u64);
10314 }
10315
10316 let has_unbalanced = tracker
10319 .get_validation_errors()
10320 .iter()
10321 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10322
10323 let mut all_errors = errors;
10326 all_errors.extend(tracker.get_validation_errors().iter().cloned());
10327 let company_codes: Vec<String> = self
10328 .config
10329 .companies
10330 .iter()
10331 .map(|c| c.code.clone())
10332 .collect();
10333
10334 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10335 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10336 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10337
10338 for company_code in &company_codes {
10339 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10340 all_errors.push(e);
10341 }
10342 }
10343
10344 let stats = tracker.get_statistics();
10346
10347 let is_balanced = all_errors.is_empty();
10349
10350 if let Some(pb) = pb {
10351 let msg = if is_balanced {
10352 "Balance validation passed"
10353 } else {
10354 "Balance validation completed with errors"
10355 };
10356 pb.finish_with_message(msg);
10357 }
10358
10359 Ok(BalanceValidationResult {
10360 validated: true,
10361 is_balanced,
10362 entries_processed: stats.entries_processed,
10363 total_debits: stats.total_debits,
10364 total_credits: stats.total_credits,
10365 accounts_tracked: stats.accounts_tracked,
10366 companies_tracked: stats.companies_tracked,
10367 validation_errors: all_errors,
10368 has_unbalanced_entries: has_unbalanced,
10369 })
10370 }
10371
10372 fn inject_data_quality(
10377 &mut self,
10378 entries: &mut [JournalEntry],
10379 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10380 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10381
10382 let config = if self.config.data_quality.enabled {
10385 let dq = &self.config.data_quality;
10386 DataQualityConfig {
10387 enable_missing_values: dq.missing_values.enabled,
10388 missing_values: datasynth_generators::MissingValueConfig {
10389 global_rate: dq.effective_missing_rate(),
10390 ..Default::default()
10391 },
10392 enable_format_variations: dq.format_variations.enabled,
10393 format_variations: datasynth_generators::FormatVariationConfig {
10394 date_variation_rate: dq.format_variations.dates.rate,
10395 amount_variation_rate: dq.format_variations.amounts.rate,
10396 identifier_variation_rate: dq.format_variations.identifiers.rate,
10397 ..Default::default()
10398 },
10399 enable_duplicates: dq.duplicates.enabled,
10400 duplicates: datasynth_generators::DuplicateConfig {
10401 duplicate_rate: dq.effective_duplicate_rate(),
10402 ..Default::default()
10403 },
10404 enable_typos: dq.typos.enabled,
10405 typos: datasynth_generators::TypoConfig {
10406 char_error_rate: dq.effective_typo_rate(),
10407 ..Default::default()
10408 },
10409 enable_encoding_issues: dq.encoding_issues.enabled,
10410 encoding_issue_rate: dq.encoding_issues.rate,
10411 seed: self.seed.wrapping_add(77), track_statistics: true,
10413 }
10414 } else {
10415 DataQualityConfig::minimal()
10416 };
10417 let mut injector = DataQualityInjector::new(config);
10418
10419 injector.set_country_pack(self.primary_pack().clone());
10421
10422 let context = HashMap::new();
10424
10425 for entry in entries.iter_mut() {
10426 if let Some(text) = &entry.header.header_text {
10428 let processed = injector.process_text_field(
10429 "header_text",
10430 text,
10431 &entry.header.document_id.to_string(),
10432 &context,
10433 );
10434 match processed {
10435 Some(new_text) if new_text != *text => {
10436 entry.header.header_text = Some(new_text);
10437 }
10438 None => {
10439 entry.header.header_text = None; }
10441 _ => {}
10442 }
10443 }
10444
10445 if let Some(ref_text) = &entry.header.reference {
10447 let processed = injector.process_text_field(
10448 "reference",
10449 ref_text,
10450 &entry.header.document_id.to_string(),
10451 &context,
10452 );
10453 match processed {
10454 Some(new_text) if new_text != *ref_text => {
10455 entry.header.reference = Some(new_text);
10456 }
10457 None => {
10458 entry.header.reference = None;
10459 }
10460 _ => {}
10461 }
10462 }
10463
10464 let user_persona = entry.header.user_persona.clone();
10466 if let Some(processed) = injector.process_text_field(
10467 "user_persona",
10468 &user_persona,
10469 &entry.header.document_id.to_string(),
10470 &context,
10471 ) {
10472 if processed != user_persona {
10473 entry.header.user_persona = processed;
10474 }
10475 }
10476
10477 for line in &mut entry.lines {
10479 if let Some(ref text) = line.line_text {
10481 let processed = injector.process_text_field(
10482 "line_text",
10483 text,
10484 &entry.header.document_id.to_string(),
10485 &context,
10486 );
10487 match processed {
10488 Some(new_text) if new_text != *text => {
10489 line.line_text = Some(new_text);
10490 }
10491 None => {
10492 line.line_text = None;
10493 }
10494 _ => {}
10495 }
10496 }
10497
10498 if let Some(cc) = &line.cost_center {
10500 let processed = injector.process_text_field(
10501 "cost_center",
10502 cc,
10503 &entry.header.document_id.to_string(),
10504 &context,
10505 );
10506 match processed {
10507 Some(new_cc) if new_cc != *cc => {
10508 line.cost_center = Some(new_cc);
10509 }
10510 None => {
10511 line.cost_center = None;
10512 }
10513 _ => {}
10514 }
10515 }
10516 }
10517
10518 if let Some(pb) = &pb {
10519 pb.inc(1);
10520 }
10521 }
10522
10523 if let Some(pb) = pb {
10524 pb.finish_with_message("Data quality injection complete");
10525 }
10526
10527 let quality_issues = injector.issues().to_vec();
10528 Ok((injector.stats().clone(), quality_issues))
10529 }
10530
10531 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10542 let use_fsm = self
10544 .config
10545 .audit
10546 .fsm
10547 .as_ref()
10548 .map(|f| f.enabled)
10549 .unwrap_or(false);
10550
10551 if use_fsm {
10552 return self.generate_audit_data_with_fsm(entries);
10553 }
10554
10555 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10557 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10558 let fiscal_year = start_date.year() as u16;
10559 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10560
10561 let total_revenue: rust_decimal::Decimal = entries
10563 .iter()
10564 .flat_map(|e| e.lines.iter())
10565 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10566 .map(|l| l.credit_amount)
10567 .sum();
10568
10569 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10571
10572 let mut snapshot = AuditSnapshot::default();
10573
10574 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10576 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10577 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10578 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10579 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10580 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10581 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10582 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10583 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10584 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10585 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10586 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10587
10588 let accounts: Vec<String> = self
10590 .coa
10591 .as_ref()
10592 .map(|coa| {
10593 coa.get_postable_accounts()
10594 .iter()
10595 .map(|acc| acc.account_code().to_string())
10596 .collect()
10597 })
10598 .unwrap_or_default();
10599
10600 for (i, company) in self.config.companies.iter().enumerate() {
10602 let company_revenue = total_revenue
10604 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10605
10606 let engagements_for_company =
10608 self.phase_config.audit_engagements / self.config.companies.len().max(1);
10609 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10610 1
10611 } else {
10612 0
10613 };
10614
10615 for _eng_idx in 0..(engagements_for_company + extra) {
10616 let mut engagement = engagement_gen.generate_engagement(
10618 &company.code,
10619 &company.name,
10620 fiscal_year,
10621 period_end,
10622 company_revenue,
10623 None, );
10625
10626 if !self.master_data.employees.is_empty() {
10628 let emp_count = self.master_data.employees.len();
10629 let base = (i * 10 + _eng_idx) % emp_count;
10631 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10632 .employee_id
10633 .clone();
10634 engagement.engagement_manager_id = self.master_data.employees
10635 [(base + 1) % emp_count]
10636 .employee_id
10637 .clone();
10638 let real_team: Vec<String> = engagement
10639 .team_member_ids
10640 .iter()
10641 .enumerate()
10642 .map(|(j, _)| {
10643 self.master_data.employees[(base + 2 + j) % emp_count]
10644 .employee_id
10645 .clone()
10646 })
10647 .collect();
10648 engagement.team_member_ids = real_team;
10649 }
10650
10651 if let Some(pb) = &pb {
10652 pb.inc(1);
10653 }
10654
10655 let team_members: Vec<String> = engagement.team_member_ids.clone();
10657
10658 let workpapers =
10660 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10661
10662 for wp in &workpapers {
10663 if let Some(pb) = &pb {
10664 pb.inc(1);
10665 }
10666
10667 let evidence = evidence_gen.generate_evidence_for_workpaper(
10669 wp,
10670 &team_members,
10671 wp.preparer_date,
10672 );
10673
10674 for _ in &evidence {
10675 if let Some(pb) = &pb {
10676 pb.inc(1);
10677 }
10678 }
10679
10680 snapshot.evidence.extend(evidence);
10681 }
10682
10683 let risks =
10685 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10686
10687 for _ in &risks {
10688 if let Some(pb) = &pb {
10689 pb.inc(1);
10690 }
10691 }
10692 snapshot.risk_assessments.extend(risks);
10693
10694 let findings = finding_gen.generate_findings_for_engagement(
10696 &engagement,
10697 &workpapers,
10698 &team_members,
10699 );
10700
10701 for _ in &findings {
10702 if let Some(pb) = &pb {
10703 pb.inc(1);
10704 }
10705 }
10706 snapshot.findings.extend(findings);
10707
10708 let judgments =
10710 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10711
10712 for _ in &judgments {
10713 if let Some(pb) = &pb {
10714 pb.inc(1);
10715 }
10716 }
10717 snapshot.judgments.extend(judgments);
10718
10719 let (confs, resps) =
10721 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10722 snapshot.confirmations.extend(confs);
10723 snapshot.confirmation_responses.extend(resps);
10724
10725 let team_pairs: Vec<(String, String)> = team_members
10727 .iter()
10728 .map(|id| {
10729 let name = self
10730 .master_data
10731 .employees
10732 .iter()
10733 .find(|e| e.employee_id == *id)
10734 .map(|e| e.display_name.clone())
10735 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10736 (id.clone(), name)
10737 })
10738 .collect();
10739 for wp in &workpapers {
10740 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10741 snapshot.procedure_steps.extend(steps);
10742 }
10743
10744 for wp in &workpapers {
10746 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10747 snapshot.samples.push(sample);
10748 }
10749 }
10750
10751 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10753 snapshot.analytical_results.extend(analytical);
10754
10755 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10757 snapshot.ia_functions.push(ia_func);
10758 snapshot.ia_reports.extend(ia_reports);
10759
10760 let vendor_names: Vec<String> = self
10762 .master_data
10763 .vendors
10764 .iter()
10765 .map(|v| v.name.clone())
10766 .collect();
10767 let customer_names: Vec<String> = self
10768 .master_data
10769 .customers
10770 .iter()
10771 .map(|c| c.name.clone())
10772 .collect();
10773 let (parties, rp_txns) =
10774 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10775 snapshot.related_parties.extend(parties);
10776 snapshot.related_party_transactions.extend(rp_txns);
10777
10778 snapshot.workpapers.extend(workpapers);
10780
10781 {
10783 let scope_id = format!(
10784 "SCOPE-{}-{}",
10785 engagement.engagement_id.simple(),
10786 &engagement.client_entity_id
10787 );
10788 let scope = datasynth_core::models::audit::AuditScope::new(
10789 scope_id.clone(),
10790 engagement.engagement_id.to_string(),
10791 engagement.client_entity_id.clone(),
10792 engagement.materiality,
10793 );
10794 let mut eng = engagement;
10796 eng.scope_id = Some(scope_id);
10797 snapshot.audit_scopes.push(scope);
10798 snapshot.engagements.push(eng);
10799 }
10800 }
10801 }
10802
10803 if self.config.companies.len() > 1 {
10807 let group_materiality = snapshot
10810 .engagements
10811 .first()
10812 .map(|e| e.materiality)
10813 .unwrap_or_else(|| {
10814 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10815 total_revenue * pct
10816 });
10817
10818 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10819 let group_engagement_id = snapshot
10820 .engagements
10821 .first()
10822 .map(|e| e.engagement_id.to_string())
10823 .unwrap_or_else(|| "GROUP-ENG".to_string());
10824
10825 let component_snapshot = component_gen.generate(
10826 &self.config.companies,
10827 group_materiality,
10828 &group_engagement_id,
10829 period_end,
10830 );
10831
10832 snapshot.component_auditors = component_snapshot.component_auditors;
10833 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10834 snapshot.component_instructions = component_snapshot.component_instructions;
10835 snapshot.component_reports = component_snapshot.component_reports;
10836
10837 info!(
10838 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10839 snapshot.component_auditors.len(),
10840 snapshot.component_instructions.len(),
10841 snapshot.component_reports.len(),
10842 );
10843 }
10844
10845 {
10849 let applicable_framework = self
10850 .config
10851 .accounting_standards
10852 .framework
10853 .as_ref()
10854 .map(|f| format!("{f:?}"))
10855 .unwrap_or_else(|| "IFRS".to_string());
10856
10857 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10858 let entity_count = self.config.companies.len();
10859
10860 for engagement in &snapshot.engagements {
10861 let company = self
10862 .config
10863 .companies
10864 .iter()
10865 .find(|c| c.code == engagement.client_entity_id);
10866 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10867 let letter_date = engagement.planning_start;
10868 let letter = letter_gen.generate(
10869 &engagement.engagement_id.to_string(),
10870 &engagement.client_name,
10871 entity_count,
10872 engagement.period_end_date,
10873 currency,
10874 &applicable_framework,
10875 letter_date,
10876 );
10877 snapshot.engagement_letters.push(letter);
10878 }
10879
10880 info!(
10881 "ISA 210 engagement letters: {} generated",
10882 snapshot.engagement_letters.len()
10883 );
10884 }
10885
10886 {
10890 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10891 let entity_codes: Vec<String> = self
10892 .config
10893 .companies
10894 .iter()
10895 .map(|c| c.code.clone())
10896 .collect();
10897 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10898 info!(
10899 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10900 subsequent.len(),
10901 subsequent
10902 .iter()
10903 .filter(|e| matches!(
10904 e.classification,
10905 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10906 ))
10907 .count(),
10908 subsequent
10909 .iter()
10910 .filter(|e| matches!(
10911 e.classification,
10912 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10913 ))
10914 .count(),
10915 );
10916 snapshot.subsequent_events = subsequent;
10917 }
10918
10919 {
10923 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10924 let entity_codes: Vec<String> = self
10925 .config
10926 .companies
10927 .iter()
10928 .map(|c| c.code.clone())
10929 .collect();
10930 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10931 info!(
10932 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10933 soc_snapshot.service_organizations.len(),
10934 soc_snapshot.soc_reports.len(),
10935 soc_snapshot.user_entity_controls.len(),
10936 );
10937 snapshot.service_organizations = soc_snapshot.service_organizations;
10938 snapshot.soc_reports = soc_snapshot.soc_reports;
10939 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10940 }
10941
10942 {
10946 use datasynth_generators::audit::going_concern_generator::{
10947 GoingConcernGenerator, GoingConcernInput,
10948 };
10949 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10950 let entity_codes: Vec<String> = self
10951 .config
10952 .companies
10953 .iter()
10954 .map(|c| c.code.clone())
10955 .collect();
10956 let assessment_date = period_end + chrono::Duration::days(75);
10958 let period_label = format!("FY{}", period_end.year());
10959
10960 let gc_inputs: Vec<GoingConcernInput> = self
10971 .config
10972 .companies
10973 .iter()
10974 .map(|company| {
10975 let code = &company.code;
10976 let mut revenue = rust_decimal::Decimal::ZERO;
10977 let mut expenses = rust_decimal::Decimal::ZERO;
10978 let mut current_assets = rust_decimal::Decimal::ZERO;
10979 let mut current_liabs = rust_decimal::Decimal::ZERO;
10980 let mut total_debt = rust_decimal::Decimal::ZERO;
10981
10982 for je in entries.iter().filter(|je| &je.header.company_code == code) {
10983 for line in &je.lines {
10984 let acct = line.gl_account.as_str();
10985 let net = line.debit_amount - line.credit_amount;
10986 if acct.starts_with('4') {
10987 revenue -= net;
10989 } else if acct.starts_with('6') {
10990 expenses += net;
10992 }
10993 if acct.starts_with('1') {
10995 if let Ok(n) = acct.parse::<u32>() {
10997 if (1000..=1499).contains(&n) {
10998 current_assets += net;
10999 }
11000 }
11001 } else if acct.starts_with('2') {
11002 if let Ok(n) = acct.parse::<u32>() {
11003 if (2000..=2499).contains(&n) {
11004 current_liabs -= net; } else if (2500..=2999).contains(&n) {
11007 total_debt -= net;
11009 }
11010 }
11011 }
11012 }
11013 }
11014
11015 let net_income = revenue - expenses;
11016 let working_capital = current_assets - current_liabs;
11017 let operating_cash_flow = net_income;
11020
11021 GoingConcernInput {
11022 entity_code: code.clone(),
11023 net_income,
11024 working_capital,
11025 operating_cash_flow,
11026 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11027 assessment_date,
11028 }
11029 })
11030 .collect();
11031
11032 let assessments = if gc_inputs.is_empty() {
11033 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11034 } else {
11035 gc_gen.generate_for_entities_with_inputs(
11036 &entity_codes,
11037 &gc_inputs,
11038 assessment_date,
11039 &period_label,
11040 )
11041 };
11042 info!(
11043 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11044 assessments.len(),
11045 assessments.iter().filter(|a| matches!(
11046 a.auditor_conclusion,
11047 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11048 )).count(),
11049 assessments.iter().filter(|a| matches!(
11050 a.auditor_conclusion,
11051 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11052 )).count(),
11053 assessments.iter().filter(|a| matches!(
11054 a.auditor_conclusion,
11055 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11056 )).count(),
11057 );
11058 snapshot.going_concern_assessments = assessments;
11059 }
11060
11061 {
11065 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11066 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11067 let entity_codes: Vec<String> = self
11068 .config
11069 .companies
11070 .iter()
11071 .map(|c| c.code.clone())
11072 .collect();
11073 let estimates = est_gen.generate_for_entities(&entity_codes);
11074 info!(
11075 "ISA 540 accounting estimates: {} estimates across {} entities \
11076 ({} with retrospective reviews, {} with auditor point estimates)",
11077 estimates.len(),
11078 entity_codes.len(),
11079 estimates
11080 .iter()
11081 .filter(|e| e.retrospective_review.is_some())
11082 .count(),
11083 estimates
11084 .iter()
11085 .filter(|e| e.auditor_point_estimate.is_some())
11086 .count(),
11087 );
11088 snapshot.accounting_estimates = estimates;
11089 }
11090
11091 {
11095 use datasynth_generators::audit::audit_opinion_generator::{
11096 AuditOpinionGenerator, AuditOpinionInput,
11097 };
11098
11099 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11100
11101 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11103 .engagements
11104 .iter()
11105 .map(|eng| {
11106 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11108 .findings
11109 .iter()
11110 .filter(|f| f.engagement_id == eng.engagement_id)
11111 .cloned()
11112 .collect();
11113
11114 let gc = snapshot
11116 .going_concern_assessments
11117 .iter()
11118 .find(|g| g.entity_code == eng.client_entity_id)
11119 .cloned();
11120
11121 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11123 snapshot.component_reports.clone();
11124
11125 let auditor = self
11126 .master_data
11127 .employees
11128 .first()
11129 .map(|e| e.display_name.clone())
11130 .unwrap_or_else(|| "Global Audit LLP".into());
11131
11132 let partner = self
11133 .master_data
11134 .employees
11135 .get(1)
11136 .map(|e| e.display_name.clone())
11137 .unwrap_or_else(|| eng.engagement_partner_id.clone());
11138
11139 AuditOpinionInput {
11140 entity_code: eng.client_entity_id.clone(),
11141 entity_name: eng.client_name.clone(),
11142 engagement_id: eng.engagement_id,
11143 period_end: eng.period_end_date,
11144 findings: eng_findings,
11145 going_concern: gc,
11146 component_reports: comp_reports,
11147 is_us_listed: {
11149 let fw = &self.config.audit_standards.isa_compliance.framework;
11150 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11151 },
11152 auditor_name: auditor,
11153 engagement_partner: partner,
11154 }
11155 })
11156 .collect();
11157
11158 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11159
11160 for go in &generated_opinions {
11161 snapshot
11162 .key_audit_matters
11163 .extend(go.key_audit_matters.clone());
11164 }
11165 snapshot.audit_opinions = generated_opinions
11166 .into_iter()
11167 .map(|go| go.opinion)
11168 .collect();
11169
11170 info!(
11171 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11172 snapshot.audit_opinions.len(),
11173 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11174 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11175 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11176 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11177 );
11178 }
11179
11180 {
11184 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11185
11186 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11187
11188 for (i, company) in self.config.companies.iter().enumerate() {
11189 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11191 .engagements
11192 .iter()
11193 .filter(|e| e.client_entity_id == company.code)
11194 .map(|e| e.engagement_id)
11195 .collect();
11196
11197 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11198 .findings
11199 .iter()
11200 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11201 .cloned()
11202 .collect();
11203
11204 let emp_count = self.master_data.employees.len();
11206 let ceo_name = if emp_count > 0 {
11207 self.master_data.employees[i % emp_count]
11208 .display_name
11209 .clone()
11210 } else {
11211 format!("CEO of {}", company.name)
11212 };
11213 let cfo_name = if emp_count > 1 {
11214 self.master_data.employees[(i + 1) % emp_count]
11215 .display_name
11216 .clone()
11217 } else {
11218 format!("CFO of {}", company.name)
11219 };
11220
11221 let materiality = snapshot
11223 .engagements
11224 .iter()
11225 .find(|e| e.client_entity_id == company.code)
11226 .map(|e| e.materiality)
11227 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11228
11229 let input = SoxGeneratorInput {
11230 company_code: company.code.clone(),
11231 company_name: company.name.clone(),
11232 fiscal_year,
11233 period_end,
11234 findings: company_findings,
11235 ceo_name,
11236 cfo_name,
11237 materiality_threshold: materiality,
11238 revenue_percent: rust_decimal::Decimal::from(100),
11239 assets_percent: rust_decimal::Decimal::from(100),
11240 significant_accounts: vec![
11241 "Revenue".into(),
11242 "Accounts Receivable".into(),
11243 "Inventory".into(),
11244 "Fixed Assets".into(),
11245 "Accounts Payable".into(),
11246 ],
11247 };
11248
11249 let (certs, assessment) = sox_gen.generate(&input);
11250 snapshot.sox_302_certifications.extend(certs);
11251 snapshot.sox_404_assessments.push(assessment);
11252 }
11253
11254 info!(
11255 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11256 snapshot.sox_302_certifications.len(),
11257 snapshot.sox_404_assessments.len(),
11258 snapshot
11259 .sox_404_assessments
11260 .iter()
11261 .filter(|a| a.icfr_effective)
11262 .count(),
11263 snapshot
11264 .sox_404_assessments
11265 .iter()
11266 .filter(|a| !a.icfr_effective)
11267 .count(),
11268 );
11269 }
11270
11271 {
11275 use datasynth_generators::audit::materiality_generator::{
11276 MaterialityGenerator, MaterialityInput,
11277 };
11278
11279 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11280
11281 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11285
11286 for company in &self.config.companies {
11287 let company_code = company.code.clone();
11288
11289 let company_revenue: rust_decimal::Decimal = entries
11291 .iter()
11292 .filter(|e| e.company_code() == company_code)
11293 .flat_map(|e| e.lines.iter())
11294 .filter(|l| l.account_code.starts_with('4'))
11295 .map(|l| l.credit_amount)
11296 .sum();
11297
11298 let total_assets: rust_decimal::Decimal = entries
11300 .iter()
11301 .filter(|e| e.company_code() == company_code)
11302 .flat_map(|e| e.lines.iter())
11303 .filter(|l| l.account_code.starts_with('1'))
11304 .map(|l| l.debit_amount)
11305 .sum();
11306
11307 let total_expenses: rust_decimal::Decimal = entries
11309 .iter()
11310 .filter(|e| e.company_code() == company_code)
11311 .flat_map(|e| e.lines.iter())
11312 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11313 .map(|l| l.debit_amount)
11314 .sum();
11315
11316 let equity: rust_decimal::Decimal = entries
11318 .iter()
11319 .filter(|e| e.company_code() == company_code)
11320 .flat_map(|e| e.lines.iter())
11321 .filter(|l| l.account_code.starts_with('3'))
11322 .map(|l| l.credit_amount)
11323 .sum();
11324
11325 let pretax_income = company_revenue - total_expenses;
11326
11327 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11329 let w = rust_decimal::Decimal::try_from(company.volume_weight)
11330 .unwrap_or(rust_decimal::Decimal::ONE);
11331 (
11332 total_revenue * w,
11333 total_revenue * w * rust_decimal::Decimal::from(3),
11334 total_revenue * w * rust_decimal::Decimal::new(1, 1),
11335 total_revenue * w * rust_decimal::Decimal::from(2),
11336 )
11337 } else {
11338 (company_revenue, total_assets, pretax_income, equity)
11339 };
11340
11341 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
11344 entity_code: company_code,
11345 period: format!("FY{}", fiscal_year),
11346 revenue: rev,
11347 pretax_income: pti,
11348 total_assets: assets,
11349 equity: eq,
11350 gross_profit,
11351 });
11352 }
11353
11354 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11355
11356 info!(
11357 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11358 {} total assets, {} equity benchmarks)",
11359 snapshot.materiality_calculations.len(),
11360 snapshot
11361 .materiality_calculations
11362 .iter()
11363 .filter(|m| matches!(
11364 m.benchmark,
11365 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11366 ))
11367 .count(),
11368 snapshot
11369 .materiality_calculations
11370 .iter()
11371 .filter(|m| matches!(
11372 m.benchmark,
11373 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11374 ))
11375 .count(),
11376 snapshot
11377 .materiality_calculations
11378 .iter()
11379 .filter(|m| matches!(
11380 m.benchmark,
11381 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11382 ))
11383 .count(),
11384 snapshot
11385 .materiality_calculations
11386 .iter()
11387 .filter(|m| matches!(
11388 m.benchmark,
11389 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11390 ))
11391 .count(),
11392 );
11393 }
11394
11395 {
11399 use datasynth_generators::audit::cra_generator::CraGenerator;
11400
11401 let mut cra_gen = CraGenerator::new(self.seed + 8315);
11402
11403 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11405 .audit_scopes
11406 .iter()
11407 .map(|s| (s.entity_code.clone(), s.id.clone()))
11408 .collect();
11409
11410 for company in &self.config.companies {
11411 let cras = cra_gen.generate_for_entity(&company.code, None);
11412 let scope_id = entity_scope_map.get(&company.code).cloned();
11413 let cras_with_scope: Vec<_> = cras
11414 .into_iter()
11415 .map(|mut cra| {
11416 cra.scope_id = scope_id.clone();
11417 cra
11418 })
11419 .collect();
11420 snapshot.combined_risk_assessments.extend(cras_with_scope);
11421 }
11422
11423 let significant_count = snapshot
11424 .combined_risk_assessments
11425 .iter()
11426 .filter(|c| c.significant_risk)
11427 .count();
11428 let high_cra_count = snapshot
11429 .combined_risk_assessments
11430 .iter()
11431 .filter(|c| {
11432 matches!(
11433 c.combined_risk,
11434 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11435 )
11436 })
11437 .count();
11438
11439 info!(
11440 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11441 snapshot.combined_risk_assessments.len(),
11442 significant_count,
11443 high_cra_count,
11444 );
11445 }
11446
11447 {
11451 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11452
11453 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11454
11455 for company in &self.config.companies {
11457 let entity_code = company.code.clone();
11458
11459 let tolerable_error = snapshot
11461 .materiality_calculations
11462 .iter()
11463 .find(|m| m.entity_code == entity_code)
11464 .map(|m| m.tolerable_error);
11465
11466 let entity_cras: Vec<_> = snapshot
11468 .combined_risk_assessments
11469 .iter()
11470 .filter(|c| c.entity_code == entity_code)
11471 .cloned()
11472 .collect();
11473
11474 if !entity_cras.is_empty() {
11475 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11476 snapshot.sampling_plans.extend(plans);
11477 snapshot.sampled_items.extend(items);
11478 }
11479 }
11480
11481 let misstatement_count = snapshot
11482 .sampled_items
11483 .iter()
11484 .filter(|i| i.misstatement_found)
11485 .count();
11486
11487 info!(
11488 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11489 snapshot.sampling_plans.len(),
11490 snapshot.sampled_items.len(),
11491 misstatement_count,
11492 );
11493 }
11494
11495 {
11499 use datasynth_generators::audit::scots_generator::{
11500 ScotsGenerator, ScotsGeneratorConfig,
11501 };
11502
11503 let ic_enabled = self.config.intercompany.enabled;
11504
11505 let config = ScotsGeneratorConfig {
11506 intercompany_enabled: ic_enabled,
11507 ..ScotsGeneratorConfig::default()
11508 };
11509 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11510
11511 for company in &self.config.companies {
11512 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11513 snapshot
11514 .significant_transaction_classes
11515 .extend(entity_scots);
11516 }
11517
11518 let estimation_count = snapshot
11519 .significant_transaction_classes
11520 .iter()
11521 .filter(|s| {
11522 matches!(
11523 s.transaction_type,
11524 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11525 )
11526 })
11527 .count();
11528
11529 info!(
11530 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11531 snapshot.significant_transaction_classes.len(),
11532 estimation_count,
11533 );
11534 }
11535
11536 {
11540 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11541
11542 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11543 let entity_codes: Vec<String> = self
11544 .config
11545 .companies
11546 .iter()
11547 .map(|c| c.code.clone())
11548 .collect();
11549 let unusual_flags =
11550 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11551 info!(
11552 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11553 unusual_flags.len(),
11554 unusual_flags
11555 .iter()
11556 .filter(|f| matches!(
11557 f.severity,
11558 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11559 ))
11560 .count(),
11561 unusual_flags
11562 .iter()
11563 .filter(|f| matches!(
11564 f.severity,
11565 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11566 ))
11567 .count(),
11568 unusual_flags
11569 .iter()
11570 .filter(|f| matches!(
11571 f.severity,
11572 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11573 ))
11574 .count(),
11575 );
11576 snapshot.unusual_items = unusual_flags;
11577 }
11578
11579 {
11583 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11584
11585 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11586 let entity_codes: Vec<String> = self
11587 .config
11588 .companies
11589 .iter()
11590 .map(|c| c.code.clone())
11591 .collect();
11592 let current_period_label = format!("FY{fiscal_year}");
11593 let prior_period_label = format!("FY{}", fiscal_year - 1);
11594 let analytical_rels = ar_gen.generate_for_entities(
11595 &entity_codes,
11596 entries,
11597 ¤t_period_label,
11598 &prior_period_label,
11599 );
11600 let out_of_range = analytical_rels
11601 .iter()
11602 .filter(|r| !r.within_expected_range)
11603 .count();
11604 info!(
11605 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11606 analytical_rels.len(),
11607 out_of_range,
11608 );
11609 snapshot.analytical_relationships = analytical_rels;
11610 }
11611
11612 if let Some(pb) = pb {
11613 pb.finish_with_message(format!(
11614 "Audit data: {} engagements, {} workpapers, {} evidence, \
11615 {} confirmations, {} procedure steps, {} samples, \
11616 {} analytical, {} IA funcs, {} related parties, \
11617 {} component auditors, {} letters, {} subsequent events, \
11618 {} service orgs, {} going concern, {} accounting estimates, \
11619 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11620 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11621 {} unusual items, {} analytical relationships",
11622 snapshot.engagements.len(),
11623 snapshot.workpapers.len(),
11624 snapshot.evidence.len(),
11625 snapshot.confirmations.len(),
11626 snapshot.procedure_steps.len(),
11627 snapshot.samples.len(),
11628 snapshot.analytical_results.len(),
11629 snapshot.ia_functions.len(),
11630 snapshot.related_parties.len(),
11631 snapshot.component_auditors.len(),
11632 snapshot.engagement_letters.len(),
11633 snapshot.subsequent_events.len(),
11634 snapshot.service_organizations.len(),
11635 snapshot.going_concern_assessments.len(),
11636 snapshot.accounting_estimates.len(),
11637 snapshot.audit_opinions.len(),
11638 snapshot.key_audit_matters.len(),
11639 snapshot.sox_302_certifications.len(),
11640 snapshot.sox_404_assessments.len(),
11641 snapshot.materiality_calculations.len(),
11642 snapshot.combined_risk_assessments.len(),
11643 snapshot.sampling_plans.len(),
11644 snapshot.significant_transaction_classes.len(),
11645 snapshot.unusual_items.len(),
11646 snapshot.analytical_relationships.len(),
11647 ));
11648 }
11649
11650 {
11657 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11658 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11659 debug!(
11660 "PCAOB-ISA mappings generated: {} mappings",
11661 snapshot.isa_pcaob_mappings.len()
11662 );
11663 }
11664
11665 {
11672 use datasynth_standards::audit::isa_reference::IsaStandard;
11673 snapshot.isa_mappings = IsaStandard::standard_entries();
11674 debug!(
11675 "ISA standard entries generated: {} standards",
11676 snapshot.isa_mappings.len()
11677 );
11678 }
11679
11680 {
11683 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11684 .engagements
11685 .iter()
11686 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11687 .collect();
11688
11689 for rpt in &mut snapshot.related_party_transactions {
11690 if rpt.journal_entry_id.is_some() {
11691 continue; }
11693 let entity = engagement_by_id
11694 .get(&rpt.engagement_id.to_string())
11695 .copied()
11696 .unwrap_or("");
11697
11698 let best_je = entries
11700 .iter()
11701 .filter(|je| je.header.company_code == entity)
11702 .min_by_key(|je| {
11703 (je.header.posting_date - rpt.transaction_date)
11704 .num_days()
11705 .abs()
11706 });
11707
11708 if let Some(je) = best_je {
11709 rpt.journal_entry_id = Some(je.header.document_id.to_string());
11710 }
11711 }
11712
11713 let linked = snapshot
11714 .related_party_transactions
11715 .iter()
11716 .filter(|t| t.journal_entry_id.is_some())
11717 .count();
11718 debug!(
11719 "Linked {}/{} related party transactions to journal entries",
11720 linked,
11721 snapshot.related_party_transactions.len()
11722 );
11723 }
11724
11725 Ok(snapshot)
11726 }
11727
11728 fn generate_audit_data_with_fsm(
11735 &mut self,
11736 entries: &[JournalEntry],
11737 ) -> SynthResult<AuditSnapshot> {
11738 use datasynth_audit_fsm::{
11739 context::EngagementContext,
11740 engine::AuditFsmEngine,
11741 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11742 };
11743 use rand::SeedableRng;
11744 use rand_chacha::ChaCha8Rng;
11745
11746 info!("Audit FSM: generating audit data via FSM engine");
11747
11748 let fsm_config = self
11749 .config
11750 .audit
11751 .fsm
11752 .as_ref()
11753 .expect("FSM config must be present when FSM is enabled");
11754
11755 let bwp = match fsm_config.blueprint.as_str() {
11757 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11758 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11759 _ => {
11760 warn!(
11761 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11762 fsm_config.blueprint
11763 );
11764 BlueprintWithPreconditions::load_builtin_fsa()
11765 }
11766 }
11767 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11768
11769 let overlay = match fsm_config.overlay.as_str() {
11771 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11772 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11773 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11774 _ => {
11775 warn!(
11776 "Unknown FSM overlay '{}', falling back to builtin:default",
11777 fsm_config.overlay
11778 );
11779 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11780 }
11781 }
11782 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11783
11784 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11786 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11787 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11788
11789 let company = self.config.companies.first();
11791 let company_code = company
11792 .map(|c| c.code.clone())
11793 .unwrap_or_else(|| "UNKNOWN".to_string());
11794 let company_name = company
11795 .map(|c| c.name.clone())
11796 .unwrap_or_else(|| "Unknown Company".to_string());
11797 let currency = company
11798 .map(|c| c.currency.clone())
11799 .unwrap_or_else(|| "USD".to_string());
11800
11801 let entity_entries: Vec<_> = entries
11803 .iter()
11804 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11805 .cloned()
11806 .collect();
11807 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
11811 .iter()
11812 .flat_map(|e| e.lines.iter())
11813 .filter(|l| l.account_code.starts_with('4'))
11814 .map(|l| l.credit_amount - l.debit_amount)
11815 .sum();
11816
11817 let total_assets: rust_decimal::Decimal = entries
11818 .iter()
11819 .flat_map(|e| e.lines.iter())
11820 .filter(|l| l.account_code.starts_with('1'))
11821 .map(|l| l.debit_amount - l.credit_amount)
11822 .sum();
11823
11824 let total_expenses: rust_decimal::Decimal = entries
11825 .iter()
11826 .flat_map(|e| e.lines.iter())
11827 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11828 .map(|l| l.debit_amount)
11829 .sum();
11830
11831 let equity: rust_decimal::Decimal = entries
11832 .iter()
11833 .flat_map(|e| e.lines.iter())
11834 .filter(|l| l.account_code.starts_with('3'))
11835 .map(|l| l.credit_amount - l.debit_amount)
11836 .sum();
11837
11838 let total_debt: rust_decimal::Decimal = entries
11839 .iter()
11840 .flat_map(|e| e.lines.iter())
11841 .filter(|l| l.account_code.starts_with('2'))
11842 .map(|l| l.credit_amount - l.debit_amount)
11843 .sum();
11844
11845 let pretax_income = total_revenue - total_expenses;
11846
11847 let cogs: rust_decimal::Decimal = entries
11848 .iter()
11849 .flat_map(|e| e.lines.iter())
11850 .filter(|l| l.account_code.starts_with('5'))
11851 .map(|l| l.debit_amount)
11852 .sum();
11853 let gross_profit = total_revenue - cogs;
11854
11855 let current_assets: rust_decimal::Decimal = entries
11856 .iter()
11857 .flat_map(|e| e.lines.iter())
11858 .filter(|l| {
11859 l.account_code.starts_with("10")
11860 || l.account_code.starts_with("11")
11861 || l.account_code.starts_with("12")
11862 || l.account_code.starts_with("13")
11863 })
11864 .map(|l| l.debit_amount - l.credit_amount)
11865 .sum();
11866 let current_liabilities: rust_decimal::Decimal = entries
11867 .iter()
11868 .flat_map(|e| e.lines.iter())
11869 .filter(|l| {
11870 l.account_code.starts_with("20")
11871 || l.account_code.starts_with("21")
11872 || l.account_code.starts_with("22")
11873 })
11874 .map(|l| l.credit_amount - l.debit_amount)
11875 .sum();
11876 let working_capital = current_assets - current_liabilities;
11877
11878 let depreciation: rust_decimal::Decimal = entries
11879 .iter()
11880 .flat_map(|e| e.lines.iter())
11881 .filter(|l| l.account_code.starts_with("60"))
11882 .map(|l| l.debit_amount)
11883 .sum();
11884 let operating_cash_flow = pretax_income + depreciation;
11885
11886 let accounts: Vec<String> = self
11888 .coa
11889 .as_ref()
11890 .map(|coa| {
11891 coa.get_postable_accounts()
11892 .iter()
11893 .map(|acc| acc.account_code().to_string())
11894 .collect()
11895 })
11896 .unwrap_or_default();
11897
11898 let team_member_ids: Vec<String> = self
11900 .master_data
11901 .employees
11902 .iter()
11903 .take(8) .map(|e| e.employee_id.clone())
11905 .collect();
11906 let team_member_pairs: Vec<(String, String)> = self
11907 .master_data
11908 .employees
11909 .iter()
11910 .take(8)
11911 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11912 .collect();
11913
11914 let vendor_names: Vec<String> = self
11915 .master_data
11916 .vendors
11917 .iter()
11918 .map(|v| v.name.clone())
11919 .collect();
11920 let customer_names: Vec<String> = self
11921 .master_data
11922 .customers
11923 .iter()
11924 .map(|c| c.name.clone())
11925 .collect();
11926
11927 let entity_codes: Vec<String> = self
11928 .config
11929 .companies
11930 .iter()
11931 .map(|c| c.code.clone())
11932 .collect();
11933
11934 let journal_entry_ids: Vec<String> = entries
11936 .iter()
11937 .take(50)
11938 .map(|e| e.header.document_id.to_string())
11939 .collect();
11940
11941 let mut account_balances = std::collections::HashMap::<String, f64>::new();
11943 for entry in entries {
11944 for line in &entry.lines {
11945 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
11946 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
11947 *account_balances
11948 .entry(line.account_code.clone())
11949 .or_insert(0.0) += debit_f64 - credit_f64;
11950 }
11951 }
11952
11953 let control_ids: Vec<String> = Vec::new();
11958 let anomaly_refs: Vec<String> = Vec::new();
11959
11960 let mut context = EngagementContext {
11961 company_code,
11962 company_name,
11963 fiscal_year: start_date.year(),
11964 currency,
11965 total_revenue,
11966 total_assets,
11967 engagement_start: start_date,
11968 report_date: period_end,
11969 pretax_income,
11970 equity,
11971 gross_profit,
11972 working_capital,
11973 operating_cash_flow,
11974 total_debt,
11975 team_member_ids,
11976 team_member_pairs,
11977 accounts,
11978 vendor_names,
11979 customer_names,
11980 journal_entry_ids,
11981 account_balances,
11982 control_ids,
11983 anomaly_refs,
11984 journal_entries: entries.to_vec(),
11985 is_us_listed: false,
11986 entity_codes,
11987 auditor_firm_name: "DataSynth Audit LLP".into(),
11988 accounting_framework: self
11989 .config
11990 .accounting_standards
11991 .framework
11992 .map(|f| match f {
11993 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
11994 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
11995 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
11996 "French GAAP"
11997 }
11998 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
11999 "German GAAP"
12000 }
12001 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12002 "Dual Reporting"
12003 }
12004 })
12005 .unwrap_or("IFRS")
12006 .into(),
12007 };
12008
12009 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12011 let rng = ChaCha8Rng::seed_from_u64(seed);
12012 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12013
12014 let mut result = engine
12015 .run_engagement(&context)
12016 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12017
12018 info!(
12019 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12020 {} phases completed, duration {:.1}h",
12021 result.event_log.len(),
12022 result.artifacts.total_artifacts(),
12023 result.anomalies.len(),
12024 result.phases_completed.len(),
12025 result.total_duration_hours,
12026 );
12027
12028 let tb_entity = context.company_code.clone();
12030 let tb_fy = context.fiscal_year;
12031 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12032 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12033 entries,
12034 &tb_entity,
12035 tb_fy,
12036 self.coa.as_ref().map(|c| c.as_ref()),
12037 );
12038
12039 let bag = result.artifacts;
12041 let mut snapshot = AuditSnapshot {
12042 engagements: bag.engagements,
12043 engagement_letters: bag.engagement_letters,
12044 materiality_calculations: bag.materiality_calculations,
12045 risk_assessments: bag.risk_assessments,
12046 combined_risk_assessments: bag.combined_risk_assessments,
12047 workpapers: bag.workpapers,
12048 evidence: bag.evidence,
12049 findings: bag.findings,
12050 judgments: bag.judgments,
12051 sampling_plans: bag.sampling_plans,
12052 sampled_items: bag.sampled_items,
12053 analytical_results: bag.analytical_results,
12054 going_concern_assessments: bag.going_concern_assessments,
12055 subsequent_events: bag.subsequent_events,
12056 audit_opinions: bag.audit_opinions,
12057 key_audit_matters: bag.key_audit_matters,
12058 procedure_steps: bag.procedure_steps,
12059 samples: bag.samples,
12060 confirmations: bag.confirmations,
12061 confirmation_responses: bag.confirmation_responses,
12062 fsm_event_trail: Some(result.event_log),
12064 ..Default::default()
12066 };
12067
12068 {
12070 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12071 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12072 }
12073 {
12074 use datasynth_standards::audit::isa_reference::IsaStandard;
12075 snapshot.isa_mappings = IsaStandard::standard_entries();
12076 }
12077
12078 info!(
12079 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12080 {} risk assessments, {} findings, {} materiality calcs",
12081 snapshot.engagements.len(),
12082 snapshot.workpapers.len(),
12083 snapshot.evidence.len(),
12084 snapshot.risk_assessments.len(),
12085 snapshot.findings.len(),
12086 snapshot.materiality_calculations.len(),
12087 );
12088
12089 Ok(snapshot)
12090 }
12091
12092 fn export_graphs(
12099 &mut self,
12100 entries: &[JournalEntry],
12101 _coa: &Arc<ChartOfAccounts>,
12102 stats: &mut EnhancedGenerationStatistics,
12103 ) -> SynthResult<GraphExportSnapshot> {
12104 let pb = self.create_progress_bar(100, "Exporting Graphs");
12105
12106 let mut snapshot = GraphExportSnapshot::default();
12107
12108 let output_dir = self
12110 .output_path
12111 .clone()
12112 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12113 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12114
12115 for graph_type in &self.config.graph_export.graph_types {
12117 if let Some(pb) = &pb {
12118 pb.inc(10);
12119 }
12120
12121 let graph_config = TransactionGraphConfig {
12123 include_vendors: false,
12124 include_customers: false,
12125 create_debit_credit_edges: true,
12126 include_document_nodes: graph_type.include_document_nodes,
12127 min_edge_weight: graph_type.min_edge_weight,
12128 aggregate_parallel_edges: graph_type.aggregate_edges,
12129 framework: None,
12130 };
12131
12132 let mut builder = TransactionGraphBuilder::new(graph_config);
12133 builder.add_journal_entries(entries);
12134 let graph = builder.build();
12135
12136 stats.graph_node_count += graph.node_count();
12138 stats.graph_edge_count += graph.edge_count();
12139
12140 if let Some(pb) = &pb {
12141 pb.inc(40);
12142 }
12143
12144 for format in &self.config.graph_export.formats {
12146 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12147
12148 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12150 warn!("Failed to create graph output directory: {}", e);
12151 continue;
12152 }
12153
12154 match format {
12155 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12156 let pyg_config = PyGExportConfig {
12157 common: datasynth_graph::CommonExportConfig {
12158 export_node_features: true,
12159 export_edge_features: true,
12160 export_node_labels: true,
12161 export_edge_labels: true,
12162 export_masks: true,
12163 train_ratio: self.config.graph_export.train_ratio,
12164 val_ratio: self.config.graph_export.validation_ratio,
12165 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12166 },
12167 one_hot_categoricals: false,
12168 };
12169
12170 let exporter = PyGExporter::new(pyg_config);
12171 match exporter.export(&graph, &format_dir) {
12172 Ok(metadata) => {
12173 snapshot.exports.insert(
12174 format!("{}_{}", graph_type.name, "pytorch_geometric"),
12175 GraphExportInfo {
12176 name: graph_type.name.clone(),
12177 format: "pytorch_geometric".to_string(),
12178 output_path: format_dir.clone(),
12179 node_count: metadata.num_nodes,
12180 edge_count: metadata.num_edges,
12181 },
12182 );
12183 snapshot.graph_count += 1;
12184 }
12185 Err(e) => {
12186 warn!("Failed to export PyTorch Geometric graph: {}", e);
12187 }
12188 }
12189 }
12190 datasynth_config::schema::GraphExportFormat::Neo4j => {
12191 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12192
12193 let neo4j_config = Neo4jExportConfig {
12194 export_node_properties: true,
12195 export_edge_properties: true,
12196 export_features: true,
12197 generate_cypher: true,
12198 generate_admin_import: true,
12199 database_name: "synth".to_string(),
12200 cypher_batch_size: 1000,
12201 };
12202
12203 let exporter = Neo4jExporter::new(neo4j_config);
12204 match exporter.export(&graph, &format_dir) {
12205 Ok(metadata) => {
12206 snapshot.exports.insert(
12207 format!("{}_{}", graph_type.name, "neo4j"),
12208 GraphExportInfo {
12209 name: graph_type.name.clone(),
12210 format: "neo4j".to_string(),
12211 output_path: format_dir.clone(),
12212 node_count: metadata.num_nodes,
12213 edge_count: metadata.num_edges,
12214 },
12215 );
12216 snapshot.graph_count += 1;
12217 }
12218 Err(e) => {
12219 warn!("Failed to export Neo4j graph: {}", e);
12220 }
12221 }
12222 }
12223 datasynth_config::schema::GraphExportFormat::Dgl => {
12224 use datasynth_graph::{DGLExportConfig, DGLExporter};
12225
12226 let dgl_config = DGLExportConfig {
12227 common: datasynth_graph::CommonExportConfig {
12228 export_node_features: true,
12229 export_edge_features: true,
12230 export_node_labels: true,
12231 export_edge_labels: true,
12232 export_masks: true,
12233 train_ratio: self.config.graph_export.train_ratio,
12234 val_ratio: self.config.graph_export.validation_ratio,
12235 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12236 },
12237 heterogeneous: self.config.graph_export.dgl.heterogeneous,
12238 include_pickle_script: true, };
12240
12241 let exporter = DGLExporter::new(dgl_config);
12242 match exporter.export(&graph, &format_dir) {
12243 Ok(metadata) => {
12244 snapshot.exports.insert(
12245 format!("{}_{}", graph_type.name, "dgl"),
12246 GraphExportInfo {
12247 name: graph_type.name.clone(),
12248 format: "dgl".to_string(),
12249 output_path: format_dir.clone(),
12250 node_count: metadata.common.num_nodes,
12251 edge_count: metadata.common.num_edges,
12252 },
12253 );
12254 snapshot.graph_count += 1;
12255 }
12256 Err(e) => {
12257 warn!("Failed to export DGL graph: {}", e);
12258 }
12259 }
12260 }
12261 datasynth_config::schema::GraphExportFormat::RustGraph => {
12262 use datasynth_graph::{
12263 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12264 };
12265
12266 let rustgraph_config = RustGraphExportConfig {
12267 include_features: true,
12268 include_temporal: true,
12269 include_labels: true,
12270 source_name: "datasynth".to_string(),
12271 batch_id: None,
12272 output_format: RustGraphOutputFormat::JsonLines,
12273 export_node_properties: true,
12274 export_edge_properties: true,
12275 pretty_print: false,
12276 };
12277
12278 let exporter = RustGraphExporter::new(rustgraph_config);
12279 match exporter.export(&graph, &format_dir) {
12280 Ok(metadata) => {
12281 snapshot.exports.insert(
12282 format!("{}_{}", graph_type.name, "rustgraph"),
12283 GraphExportInfo {
12284 name: graph_type.name.clone(),
12285 format: "rustgraph".to_string(),
12286 output_path: format_dir.clone(),
12287 node_count: metadata.num_nodes,
12288 edge_count: metadata.num_edges,
12289 },
12290 );
12291 snapshot.graph_count += 1;
12292 }
12293 Err(e) => {
12294 warn!("Failed to export RustGraph: {}", e);
12295 }
12296 }
12297 }
12298 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12299 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12301 }
12302 }
12303 }
12304
12305 if let Some(pb) = &pb {
12306 pb.inc(40);
12307 }
12308 }
12309
12310 stats.graph_export_count = snapshot.graph_count;
12311 snapshot.exported = snapshot.graph_count > 0;
12312
12313 if let Some(pb) = pb {
12314 pb.finish_with_message(format!(
12315 "Graphs exported: {} graphs ({} nodes, {} edges)",
12316 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12317 ));
12318 }
12319
12320 Ok(snapshot)
12321 }
12322
12323 fn build_additional_graphs(
12328 &self,
12329 banking: &BankingSnapshot,
12330 intercompany: &IntercompanySnapshot,
12331 entries: &[JournalEntry],
12332 stats: &mut EnhancedGenerationStatistics,
12333 ) {
12334 let output_dir = self
12335 .output_path
12336 .clone()
12337 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12338 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12339
12340 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12342 info!("Phase 10c: Building banking network graph");
12343 let config = BankingGraphConfig::default();
12344 let mut builder = BankingGraphBuilder::new(config);
12345 builder.add_customers(&banking.customers);
12346 builder.add_accounts(&banking.accounts, &banking.customers);
12347 builder.add_transactions(&banking.transactions);
12348 let graph = builder.build();
12349
12350 let node_count = graph.node_count();
12351 let edge_count = graph.edge_count();
12352 stats.graph_node_count += node_count;
12353 stats.graph_edge_count += edge_count;
12354
12355 for format in &self.config.graph_export.formats {
12357 if matches!(
12358 format,
12359 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12360 ) {
12361 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12362 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12363 warn!("Failed to create banking graph output dir: {}", e);
12364 continue;
12365 }
12366 let pyg_config = PyGExportConfig::default();
12367 let exporter = PyGExporter::new(pyg_config);
12368 if let Err(e) = exporter.export(&graph, &format_dir) {
12369 warn!("Failed to export banking graph as PyG: {}", e);
12370 } else {
12371 info!(
12372 "Banking network graph exported: {} nodes, {} edges",
12373 node_count, edge_count
12374 );
12375 }
12376 }
12377 }
12378 }
12379
12380 let approval_entries: Vec<_> = entries
12382 .iter()
12383 .filter(|je| je.header.approval_workflow.is_some())
12384 .collect();
12385
12386 if !approval_entries.is_empty() {
12387 info!(
12388 "Phase 10c: Building approval network graph ({} entries with approvals)",
12389 approval_entries.len()
12390 );
12391 let config = ApprovalGraphConfig::default();
12392 let mut builder = ApprovalGraphBuilder::new(config);
12393
12394 for je in &approval_entries {
12395 if let Some(ref wf) = je.header.approval_workflow {
12396 for action in &wf.actions {
12397 let record = datasynth_core::models::ApprovalRecord {
12398 approval_id: format!(
12399 "APR-{}-{}",
12400 je.header.document_id, action.approval_level
12401 ),
12402 document_number: je.header.document_id.to_string(),
12403 document_type: "JE".to_string(),
12404 company_code: je.company_code().to_string(),
12405 requester_id: wf.preparer_id.clone(),
12406 requester_name: Some(wf.preparer_name.clone()),
12407 approver_id: action.actor_id.clone(),
12408 approver_name: action.actor_name.clone(),
12409 approval_date: je.posting_date(),
12410 action: format!("{:?}", action.action),
12411 amount: wf.amount,
12412 approval_limit: None,
12413 comments: action.comments.clone(),
12414 delegation_from: None,
12415 is_auto_approved: false,
12416 };
12417 builder.add_approval(&record);
12418 }
12419 }
12420 }
12421
12422 let graph = builder.build();
12423 let node_count = graph.node_count();
12424 let edge_count = graph.edge_count();
12425 stats.graph_node_count += node_count;
12426 stats.graph_edge_count += edge_count;
12427
12428 for format in &self.config.graph_export.formats {
12430 if matches!(
12431 format,
12432 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12433 ) {
12434 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12435 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12436 warn!("Failed to create approval graph output dir: {}", e);
12437 continue;
12438 }
12439 let pyg_config = PyGExportConfig::default();
12440 let exporter = PyGExporter::new(pyg_config);
12441 if let Err(e) = exporter.export(&graph, &format_dir) {
12442 warn!("Failed to export approval graph as PyG: {}", e);
12443 } else {
12444 info!(
12445 "Approval network graph exported: {} nodes, {} edges",
12446 node_count, edge_count
12447 );
12448 }
12449 }
12450 }
12451 }
12452
12453 if self.config.companies.len() >= 2 {
12455 info!(
12456 "Phase 10c: Building entity relationship graph ({} companies)",
12457 self.config.companies.len()
12458 );
12459
12460 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12461 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12462
12463 let parent_code = &self.config.companies[0].code;
12465 let mut companies: Vec<datasynth_core::models::Company> =
12466 Vec::with_capacity(self.config.companies.len());
12467
12468 let first = &self.config.companies[0];
12470 companies.push(datasynth_core::models::Company::parent(
12471 &first.code,
12472 &first.name,
12473 &first.country,
12474 &first.currency,
12475 ));
12476
12477 for cc in self.config.companies.iter().skip(1) {
12479 companies.push(datasynth_core::models::Company::subsidiary(
12480 &cc.code,
12481 &cc.name,
12482 &cc.country,
12483 &cc.currency,
12484 parent_code,
12485 rust_decimal::Decimal::from(100),
12486 ));
12487 }
12488
12489 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12491 self.config
12492 .companies
12493 .iter()
12494 .skip(1)
12495 .enumerate()
12496 .map(|(i, cc)| {
12497 let mut rel =
12498 datasynth_core::models::intercompany::IntercompanyRelationship::new(
12499 format!("REL{:03}", i + 1),
12500 parent_code.clone(),
12501 cc.code.clone(),
12502 rust_decimal::Decimal::from(100),
12503 start_date,
12504 );
12505 rel.functional_currency = cc.currency.clone();
12506 rel
12507 })
12508 .collect();
12509
12510 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12511 builder.add_companies(&companies);
12512 builder.add_ownership_relationships(&relationships);
12513
12514 for pair in &intercompany.matched_pairs {
12516 builder.add_intercompany_edge(
12517 &pair.seller_company,
12518 &pair.buyer_company,
12519 pair.amount,
12520 &format!("{:?}", pair.transaction_type),
12521 );
12522 }
12523
12524 let graph = builder.build();
12525 let node_count = graph.node_count();
12526 let edge_count = graph.edge_count();
12527 stats.graph_node_count += node_count;
12528 stats.graph_edge_count += edge_count;
12529
12530 for format in &self.config.graph_export.formats {
12532 if matches!(
12533 format,
12534 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12535 ) {
12536 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12537 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12538 warn!("Failed to create entity graph output dir: {}", e);
12539 continue;
12540 }
12541 let pyg_config = PyGExportConfig::default();
12542 let exporter = PyGExporter::new(pyg_config);
12543 if let Err(e) = exporter.export(&graph, &format_dir) {
12544 warn!("Failed to export entity graph as PyG: {}", e);
12545 } else {
12546 info!(
12547 "Entity relationship graph exported: {} nodes, {} edges",
12548 node_count, edge_count
12549 );
12550 }
12551 }
12552 }
12553 } else {
12554 debug!(
12555 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12556 self.config.companies.len()
12557 );
12558 }
12559 }
12560
12561 #[allow(clippy::too_many_arguments)]
12568 fn export_hypergraph(
12569 &self,
12570 coa: &Arc<ChartOfAccounts>,
12571 entries: &[JournalEntry],
12572 document_flows: &DocumentFlowSnapshot,
12573 sourcing: &SourcingSnapshot,
12574 hr: &HrSnapshot,
12575 manufacturing: &ManufacturingSnapshot,
12576 banking: &BankingSnapshot,
12577 audit: &AuditSnapshot,
12578 financial_reporting: &FinancialReportingSnapshot,
12579 ocpm: &OcpmSnapshot,
12580 compliance: &ComplianceRegulationsSnapshot,
12581 stats: &mut EnhancedGenerationStatistics,
12582 ) -> SynthResult<HypergraphExportInfo> {
12583 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12584 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12585 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12586 use datasynth_graph::models::hypergraph::AggregationStrategy;
12587
12588 let hg_settings = &self.config.graph_export.hypergraph;
12589
12590 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12592 "truncate" => AggregationStrategy::Truncate,
12593 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12594 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12595 "importance_sample" => AggregationStrategy::ImportanceSample,
12596 _ => AggregationStrategy::PoolByCounterparty,
12597 };
12598
12599 let builder_config = HypergraphConfig {
12600 max_nodes: hg_settings.max_nodes,
12601 aggregation_strategy,
12602 include_coso: hg_settings.governance_layer.include_coso,
12603 include_controls: hg_settings.governance_layer.include_controls,
12604 include_sox: hg_settings.governance_layer.include_sox,
12605 include_vendors: hg_settings.governance_layer.include_vendors,
12606 include_customers: hg_settings.governance_layer.include_customers,
12607 include_employees: hg_settings.governance_layer.include_employees,
12608 include_p2p: hg_settings.process_layer.include_p2p,
12609 include_o2c: hg_settings.process_layer.include_o2c,
12610 include_s2c: hg_settings.process_layer.include_s2c,
12611 include_h2r: hg_settings.process_layer.include_h2r,
12612 include_mfg: hg_settings.process_layer.include_mfg,
12613 include_bank: hg_settings.process_layer.include_bank,
12614 include_audit: hg_settings.process_layer.include_audit,
12615 include_r2r: hg_settings.process_layer.include_r2r,
12616 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12617 docs_per_counterparty_threshold: hg_settings
12618 .process_layer
12619 .docs_per_counterparty_threshold,
12620 include_accounts: hg_settings.accounting_layer.include_accounts,
12621 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12622 include_cross_layer_edges: hg_settings.cross_layer.enabled,
12623 include_compliance: self.config.compliance_regulations.enabled,
12624 include_tax: true,
12625 include_treasury: true,
12626 include_esg: true,
12627 include_project: true,
12628 include_intercompany: true,
12629 include_temporal_events: true,
12630 };
12631
12632 let mut builder = HypergraphBuilder::new(builder_config);
12633
12634 builder.add_coso_framework();
12636
12637 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12640 let controls = InternalControl::standard_controls();
12641 builder.add_controls(&controls);
12642 }
12643
12644 builder.add_vendors(&self.master_data.vendors);
12646 builder.add_customers(&self.master_data.customers);
12647 builder.add_employees(&self.master_data.employees);
12648
12649 builder.add_p2p_documents(
12651 &document_flows.purchase_orders,
12652 &document_flows.goods_receipts,
12653 &document_flows.vendor_invoices,
12654 &document_flows.payments,
12655 );
12656 builder.add_o2c_documents(
12657 &document_flows.sales_orders,
12658 &document_flows.deliveries,
12659 &document_flows.customer_invoices,
12660 );
12661 builder.add_s2c_documents(
12662 &sourcing.sourcing_projects,
12663 &sourcing.qualifications,
12664 &sourcing.rfx_events,
12665 &sourcing.bids,
12666 &sourcing.bid_evaluations,
12667 &sourcing.contracts,
12668 );
12669 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12670 builder.add_mfg_documents(
12671 &manufacturing.production_orders,
12672 &manufacturing.quality_inspections,
12673 &manufacturing.cycle_counts,
12674 );
12675 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12676 builder.add_audit_documents(
12677 &audit.engagements,
12678 &audit.workpapers,
12679 &audit.findings,
12680 &audit.evidence,
12681 &audit.risk_assessments,
12682 &audit.judgments,
12683 &audit.materiality_calculations,
12684 &audit.audit_opinions,
12685 &audit.going_concern_assessments,
12686 );
12687 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12688
12689 if let Some(ref event_log) = ocpm.event_log {
12691 builder.add_ocpm_events(event_log);
12692 }
12693
12694 if self.config.compliance_regulations.enabled
12696 && hg_settings.governance_layer.include_controls
12697 {
12698 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12700 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12701 .standard_records
12702 .iter()
12703 .filter_map(|r| {
12704 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12705 registry.get(&sid).cloned()
12706 })
12707 .collect();
12708
12709 builder.add_compliance_regulations(
12710 &standards,
12711 &compliance.findings,
12712 &compliance.filings,
12713 );
12714 }
12715
12716 builder.add_accounts(coa);
12718 builder.add_journal_entries_as_hyperedges(entries);
12719
12720 let hypergraph = builder.build();
12722
12723 let output_dir = self
12725 .output_path
12726 .clone()
12727 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12728 let hg_dir = output_dir
12729 .join(&self.config.graph_export.output_subdirectory)
12730 .join(&hg_settings.output_subdirectory);
12731
12732 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12734 "unified" => {
12735 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12736 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12737 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12738 })?;
12739 (
12740 metadata.num_nodes,
12741 metadata.num_edges,
12742 metadata.num_hyperedges,
12743 )
12744 }
12745 _ => {
12746 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12748 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12749 SynthError::generation(format!("Hypergraph export failed: {e}"))
12750 })?;
12751 (
12752 metadata.num_nodes,
12753 metadata.num_edges,
12754 metadata.num_hyperedges,
12755 )
12756 }
12757 };
12758
12759 #[cfg(feature = "streaming")]
12761 if let Some(ref target_url) = hg_settings.stream_target {
12762 use crate::stream_client::{StreamClient, StreamConfig};
12763 use std::io::Write as _;
12764
12765 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12766 let stream_config = StreamConfig {
12767 target_url: target_url.clone(),
12768 batch_size: hg_settings.stream_batch_size,
12769 api_key,
12770 ..StreamConfig::default()
12771 };
12772
12773 match StreamClient::new(stream_config) {
12774 Ok(mut client) => {
12775 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12776 match exporter.export_to_writer(&hypergraph, &mut client) {
12777 Ok(_) => {
12778 if let Err(e) = client.flush() {
12779 warn!("Failed to flush stream client: {}", e);
12780 } else {
12781 info!("Streamed {} records to {}", client.total_sent(), target_url);
12782 }
12783 }
12784 Err(e) => {
12785 warn!("Streaming export failed: {}", e);
12786 }
12787 }
12788 }
12789 Err(e) => {
12790 warn!("Failed to create stream client: {}", e);
12791 }
12792 }
12793 }
12794
12795 stats.graph_node_count += num_nodes;
12797 stats.graph_edge_count += num_edges;
12798 stats.graph_export_count += 1;
12799
12800 Ok(HypergraphExportInfo {
12801 node_count: num_nodes,
12802 edge_count: num_edges,
12803 hyperedge_count: num_hyperedges,
12804 output_path: hg_dir,
12805 })
12806 }
12807
12808 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12813 let pb = self.create_progress_bar(100, "Generating Banking Data");
12814
12815 let orchestrator = BankingOrchestratorBuilder::new()
12817 .config(self.config.banking.clone())
12818 .seed(self.seed + 9000)
12819 .country_pack(self.primary_pack().clone())
12820 .build();
12821
12822 if let Some(pb) = &pb {
12823 pb.inc(10);
12824 }
12825
12826 let result = orchestrator.generate();
12828
12829 if let Some(pb) = &pb {
12830 pb.inc(90);
12831 pb.finish_with_message(format!(
12832 "Banking: {} customers, {} transactions",
12833 result.customers.len(),
12834 result.transactions.len()
12835 ));
12836 }
12837
12838 let mut banking_customers = result.customers;
12843 let core_customers = &self.master_data.customers;
12844 if !core_customers.is_empty() {
12845 for (i, bc) in banking_customers.iter_mut().enumerate() {
12846 let core = &core_customers[i % core_customers.len()];
12847 bc.name = CustomerName::business(&core.name);
12848 bc.residence_country = core.country.clone();
12849 bc.enterprise_customer_id = Some(core.customer_id.clone());
12850 }
12851 debug!(
12852 "Cross-referenced {} banking customers with {} core customers",
12853 banking_customers.len(),
12854 core_customers.len()
12855 );
12856 }
12857
12858 Ok(BankingSnapshot {
12859 customers: banking_customers,
12860 accounts: result.accounts,
12861 transactions: result.transactions,
12862 transaction_labels: result.transaction_labels,
12863 customer_labels: result.customer_labels,
12864 account_labels: result.account_labels,
12865 relationship_labels: result.relationship_labels,
12866 narratives: result.narratives,
12867 suspicious_count: result.stats.suspicious_count,
12868 scenario_count: result.scenarios.len(),
12869 })
12870 }
12871
12872 fn calculate_total_transactions(&self) -> u64 {
12874 let months = self.config.global.period_months as f64;
12875 self.config
12876 .companies
12877 .iter()
12878 .map(|c| {
12879 let annual = c.annual_transaction_volume.count() as f64;
12880 let weighted = annual * c.volume_weight;
12881 (weighted * months / 12.0) as u64
12882 })
12883 .sum()
12884 }
12885
12886 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12888 if !self.phase_config.show_progress {
12889 return None;
12890 }
12891
12892 let pb = if let Some(mp) = &self.multi_progress {
12893 mp.add(ProgressBar::new(total))
12894 } else {
12895 ProgressBar::new(total)
12896 };
12897
12898 pb.set_style(
12899 ProgressStyle::default_bar()
12900 .template(&format!(
12901 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12902 ))
12903 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12904 .progress_chars("#>-"),
12905 );
12906
12907 Some(pb)
12908 }
12909
12910 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12912 self.coa.clone()
12913 }
12914
12915 pub fn get_master_data(&self) -> &MasterDataSnapshot {
12917 &self.master_data
12918 }
12919
12920 fn phase_compliance_regulations(
12922 &mut self,
12923 _stats: &mut EnhancedGenerationStatistics,
12924 ) -> SynthResult<ComplianceRegulationsSnapshot> {
12925 if !self.phase_config.generate_compliance_regulations {
12926 return Ok(ComplianceRegulationsSnapshot::default());
12927 }
12928
12929 info!("Phase: Generating Compliance Regulations Data");
12930
12931 let cr_config = &self.config.compliance_regulations;
12932
12933 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
12935 self.config
12936 .companies
12937 .iter()
12938 .map(|c| c.country.clone())
12939 .collect::<std::collections::HashSet<_>>()
12940 .into_iter()
12941 .collect()
12942 } else {
12943 cr_config.jurisdictions.clone()
12944 };
12945
12946 let fallback_date =
12948 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
12949 let reference_date = cr_config
12950 .reference_date
12951 .as_ref()
12952 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
12953 .unwrap_or_else(|| {
12954 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12955 .unwrap_or(fallback_date)
12956 });
12957
12958 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
12960 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
12961 let cross_reference_records = reg_gen.generate_cross_reference_records();
12962 let jurisdiction_records =
12963 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
12964
12965 info!(
12966 " Standards: {} records, {} cross-references, {} jurisdictions",
12967 standard_records.len(),
12968 cross_reference_records.len(),
12969 jurisdiction_records.len()
12970 );
12971
12972 let audit_procedures = if cr_config.audit_procedures.enabled {
12974 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
12975 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
12976 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
12977 confidence_level: cr_config.audit_procedures.confidence_level,
12978 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
12979 };
12980 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
12981 self.seed + 9000,
12982 proc_config,
12983 );
12984 let registry = reg_gen.registry();
12985 let mut all_procs = Vec::new();
12986 for jurisdiction in &jurisdictions {
12987 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
12988 all_procs.extend(procs);
12989 }
12990 info!(" Audit procedures: {}", all_procs.len());
12991 all_procs
12992 } else {
12993 Vec::new()
12994 };
12995
12996 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
12998 let finding_config =
12999 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13000 finding_rate: cr_config.findings.finding_rate,
13001 material_weakness_rate: cr_config.findings.material_weakness_rate,
13002 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13003 generate_remediation: cr_config.findings.generate_remediation,
13004 };
13005 let mut finding_gen =
13006 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13007 self.seed + 9100,
13008 finding_config,
13009 );
13010 let mut all_findings = Vec::new();
13011 for company in &self.config.companies {
13012 let company_findings =
13013 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13014 all_findings.extend(company_findings);
13015 }
13016 info!(" Compliance findings: {}", all_findings.len());
13017 all_findings
13018 } else {
13019 Vec::new()
13020 };
13021
13022 let filings = if cr_config.filings.enabled {
13024 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13025 filing_types: cr_config.filings.filing_types.clone(),
13026 generate_status_progression: cr_config.filings.generate_status_progression,
13027 };
13028 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13029 self.seed + 9200,
13030 filing_config,
13031 );
13032 let company_codes: Vec<String> = self
13033 .config
13034 .companies
13035 .iter()
13036 .map(|c| c.code.clone())
13037 .collect();
13038 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13039 .unwrap_or(fallback_date);
13040 let filings = filing_gen.generate_filings(
13041 &company_codes,
13042 &jurisdictions,
13043 start_date,
13044 self.config.global.period_months,
13045 );
13046 info!(" Regulatory filings: {}", filings.len());
13047 filings
13048 } else {
13049 Vec::new()
13050 };
13051
13052 let compliance_graph = if cr_config.graph.enabled {
13054 let graph_config = datasynth_graph::ComplianceGraphConfig {
13055 include_standard_nodes: cr_config.graph.include_compliance_nodes,
13056 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13057 include_cross_references: cr_config.graph.include_cross_references,
13058 include_supersession_edges: cr_config.graph.include_supersession_edges,
13059 include_account_links: cr_config.graph.include_account_links,
13060 include_control_links: cr_config.graph.include_control_links,
13061 include_company_links: cr_config.graph.include_company_links,
13062 };
13063 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13064
13065 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13067 .iter()
13068 .map(|r| datasynth_graph::StandardNodeInput {
13069 standard_id: r.standard_id.clone(),
13070 title: r.title.clone(),
13071 category: r.category.clone(),
13072 domain: r.domain.clone(),
13073 is_active: r.is_active,
13074 features: vec![if r.is_active { 1.0 } else { 0.0 }],
13075 applicable_account_types: r.applicable_account_types.clone(),
13076 applicable_processes: r.applicable_processes.clone(),
13077 })
13078 .collect();
13079 builder.add_standards(&standard_inputs);
13080
13081 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13083 jurisdiction_records
13084 .iter()
13085 .map(|r| datasynth_graph::JurisdictionNodeInput {
13086 country_code: r.country_code.clone(),
13087 country_name: r.country_name.clone(),
13088 framework: r.accounting_framework.clone(),
13089 standard_count: r.standard_count,
13090 tax_rate: r.statutory_tax_rate,
13091 })
13092 .collect();
13093 builder.add_jurisdictions(&jurisdiction_inputs);
13094
13095 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13097 cross_reference_records
13098 .iter()
13099 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13100 from_standard: r.from_standard.clone(),
13101 to_standard: r.to_standard.clone(),
13102 relationship: r.relationship.clone(),
13103 convergence_level: r.convergence_level,
13104 })
13105 .collect();
13106 builder.add_cross_references(&xref_inputs);
13107
13108 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13110 .iter()
13111 .map(|r| datasynth_graph::JurisdictionMappingInput {
13112 country_code: r.jurisdiction.clone(),
13113 standard_id: r.standard_id.clone(),
13114 })
13115 .collect();
13116 builder.add_jurisdiction_mappings(&mapping_inputs);
13117
13118 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13120 .iter()
13121 .map(|p| datasynth_graph::ProcedureNodeInput {
13122 procedure_id: p.procedure_id.clone(),
13123 standard_id: p.standard_id.clone(),
13124 procedure_type: p.procedure_type.clone(),
13125 sample_size: p.sample_size,
13126 confidence_level: p.confidence_level,
13127 })
13128 .collect();
13129 builder.add_procedures(&proc_inputs);
13130
13131 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13133 .iter()
13134 .map(|f| datasynth_graph::FindingNodeInput {
13135 finding_id: f.finding_id.to_string(),
13136 standard_id: f
13137 .related_standards
13138 .first()
13139 .map(|s| s.as_str().to_string())
13140 .unwrap_or_default(),
13141 severity: f.severity.to_string(),
13142 deficiency_level: f.deficiency_level.to_string(),
13143 severity_score: f.deficiency_level.severity_score(),
13144 control_id: f.control_id.clone(),
13145 affected_accounts: f.affected_accounts.clone(),
13146 })
13147 .collect();
13148 builder.add_findings(&finding_inputs);
13149
13150 if cr_config.graph.include_account_links {
13152 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13153 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13154 for std_record in &standard_records {
13155 if let Some(std_obj) =
13156 registry.get(&datasynth_core::models::compliance::StandardId::parse(
13157 &std_record.standard_id,
13158 ))
13159 {
13160 for acct_type in &std_obj.applicable_account_types {
13161 account_links.push(datasynth_graph::AccountLinkInput {
13162 standard_id: std_record.standard_id.clone(),
13163 account_code: acct_type.clone(),
13164 account_name: acct_type.clone(),
13165 });
13166 }
13167 }
13168 }
13169 builder.add_account_links(&account_links);
13170 }
13171
13172 if cr_config.graph.include_control_links {
13174 let mut control_links = Vec::new();
13175 let sox_like_ids: Vec<String> = standard_records
13177 .iter()
13178 .filter(|r| {
13179 r.standard_id.starts_with("SOX")
13180 || r.standard_id.starts_with("PCAOB-AS-2201")
13181 })
13182 .map(|r| r.standard_id.clone())
13183 .collect();
13184 let control_ids = [
13186 ("C001", "Cash Controls"),
13187 ("C002", "Large Transaction Approval"),
13188 ("C010", "PO Approval"),
13189 ("C011", "Three-Way Match"),
13190 ("C020", "Revenue Recognition"),
13191 ("C021", "Credit Check"),
13192 ("C030", "Manual JE Approval"),
13193 ("C031", "Period Close Review"),
13194 ("C032", "Account Reconciliation"),
13195 ("C040", "Payroll Processing"),
13196 ("C050", "Fixed Asset Capitalization"),
13197 ("C060", "Intercompany Elimination"),
13198 ];
13199 for sox_id in &sox_like_ids {
13200 for (ctrl_id, ctrl_name) in &control_ids {
13201 control_links.push(datasynth_graph::ControlLinkInput {
13202 standard_id: sox_id.clone(),
13203 control_id: ctrl_id.to_string(),
13204 control_name: ctrl_name.to_string(),
13205 });
13206 }
13207 }
13208 builder.add_control_links(&control_links);
13209 }
13210
13211 if cr_config.graph.include_company_links {
13213 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13214 .iter()
13215 .enumerate()
13216 .map(|(i, f)| datasynth_graph::FilingNodeInput {
13217 filing_id: format!("F{:04}", i + 1),
13218 filing_type: f.filing_type.to_string(),
13219 company_code: f.company_code.clone(),
13220 jurisdiction: f.jurisdiction.clone(),
13221 status: format!("{:?}", f.status),
13222 })
13223 .collect();
13224 builder.add_filings(&filing_inputs);
13225 }
13226
13227 let graph = builder.build();
13228 info!(
13229 " Compliance graph: {} nodes, {} edges",
13230 graph.nodes.len(),
13231 graph.edges.len()
13232 );
13233 Some(graph)
13234 } else {
13235 None
13236 };
13237
13238 self.check_resources_with_log("post-compliance-regulations")?;
13239
13240 Ok(ComplianceRegulationsSnapshot {
13241 standard_records,
13242 cross_reference_records,
13243 jurisdiction_records,
13244 audit_procedures,
13245 findings,
13246 filings,
13247 compliance_graph,
13248 })
13249 }
13250
13251 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13253 use super::lineage::LineageGraphBuilder;
13254
13255 let mut builder = LineageGraphBuilder::new();
13256
13257 builder.add_config_section("config:global", "Global Config");
13259 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13260 builder.add_config_section("config:transactions", "Transaction Config");
13261
13262 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13264 builder.add_generator_phase("phase:je", "Journal Entry Generation");
13265
13266 builder.configured_by("phase:coa", "config:chart_of_accounts");
13268 builder.configured_by("phase:je", "config:transactions");
13269
13270 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13272 builder.produced_by("output:je", "phase:je");
13273
13274 if self.phase_config.generate_master_data {
13276 builder.add_config_section("config:master_data", "Master Data Config");
13277 builder.add_generator_phase("phase:master_data", "Master Data Generation");
13278 builder.configured_by("phase:master_data", "config:master_data");
13279 builder.input_to("phase:master_data", "phase:je");
13280 }
13281
13282 if self.phase_config.generate_document_flows {
13283 builder.add_config_section("config:document_flows", "Document Flow Config");
13284 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13285 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13286 builder.configured_by("phase:p2p", "config:document_flows");
13287 builder.configured_by("phase:o2c", "config:document_flows");
13288
13289 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13290 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13291 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13292 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13293 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13294
13295 builder.produced_by("output:po", "phase:p2p");
13296 builder.produced_by("output:gr", "phase:p2p");
13297 builder.produced_by("output:vi", "phase:p2p");
13298 builder.produced_by("output:so", "phase:o2c");
13299 builder.produced_by("output:ci", "phase:o2c");
13300 }
13301
13302 if self.phase_config.inject_anomalies {
13303 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13304 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13305 builder.configured_by("phase:anomaly", "config:fraud");
13306 builder.add_output_file(
13307 "output:labels",
13308 "Anomaly Labels",
13309 "labels/anomaly_labels.csv",
13310 );
13311 builder.produced_by("output:labels", "phase:anomaly");
13312 }
13313
13314 if self.phase_config.generate_audit {
13315 builder.add_config_section("config:audit", "Audit Config");
13316 builder.add_generator_phase("phase:audit", "Audit Data Generation");
13317 builder.configured_by("phase:audit", "config:audit");
13318 }
13319
13320 if self.phase_config.generate_banking {
13321 builder.add_config_section("config:banking", "Banking Config");
13322 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13323 builder.configured_by("phase:banking", "config:banking");
13324 }
13325
13326 if self.config.llm.enabled {
13327 builder.add_config_section("config:llm", "LLM Enrichment Config");
13328 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13329 builder.configured_by("phase:llm_enrichment", "config:llm");
13330 }
13331
13332 if self.config.diffusion.enabled {
13333 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13334 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13335 builder.configured_by("phase:diffusion", "config:diffusion");
13336 }
13337
13338 if self.config.causal.enabled {
13339 builder.add_config_section("config:causal", "Causal Generation Config");
13340 builder.add_generator_phase("phase:causal", "Causal Overlay");
13341 builder.configured_by("phase:causal", "config:causal");
13342 }
13343
13344 builder.build()
13345 }
13346
13347 fn compute_company_revenue(
13356 entries: &[JournalEntry],
13357 company_code: &str,
13358 ) -> rust_decimal::Decimal {
13359 use rust_decimal::Decimal;
13360 let mut revenue = Decimal::ZERO;
13361 for je in entries {
13362 if je.header.company_code != company_code {
13363 continue;
13364 }
13365 for line in &je.lines {
13366 if line.gl_account.starts_with('4') {
13367 revenue += line.credit_amount - line.debit_amount;
13369 }
13370 }
13371 }
13372 revenue.max(Decimal::ZERO)
13373 }
13374
13375 fn compute_entity_net_assets(
13379 entries: &[JournalEntry],
13380 entity_code: &str,
13381 ) -> rust_decimal::Decimal {
13382 use rust_decimal::Decimal;
13383 let mut asset_net = Decimal::ZERO;
13384 let mut liability_net = Decimal::ZERO;
13385 for je in entries {
13386 if je.header.company_code != entity_code {
13387 continue;
13388 }
13389 for line in &je.lines {
13390 if line.gl_account.starts_with('1') {
13391 asset_net += line.debit_amount - line.credit_amount;
13392 } else if line.gl_account.starts_with('2') {
13393 liability_net += line.credit_amount - line.debit_amount;
13394 }
13395 }
13396 }
13397 asset_net - liability_net
13398 }
13399}
13400
13401fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13403 match format {
13404 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13405 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13406 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13407 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13408 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13409 }
13410}
13411
13412fn compute_trial_balance_entries(
13417 entries: &[JournalEntry],
13418 entity_code: &str,
13419 fiscal_year: i32,
13420 coa: Option<&ChartOfAccounts>,
13421) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13422 use std::collections::BTreeMap;
13423
13424 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13425 BTreeMap::new();
13426
13427 for je in entries {
13428 for line in &je.lines {
13429 let entry = balances.entry(line.account_code.clone()).or_default();
13430 entry.0 += line.debit_amount;
13431 entry.1 += line.credit_amount;
13432 }
13433 }
13434
13435 balances
13436 .into_iter()
13437 .map(
13438 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13439 account_description: coa
13440 .and_then(|c| c.get_account(&account_code))
13441 .map(|a| a.description().to_string())
13442 .unwrap_or_else(|| account_code.clone()),
13443 account_code,
13444 debit_balance: debit,
13445 credit_balance: credit,
13446 net_balance: debit - credit,
13447 entity_code: entity_code.to_string(),
13448 period: format!("FY{}", fiscal_year),
13449 },
13450 )
13451 .collect()
13452}
13453
13454#[cfg(test)]
13455#[allow(clippy::unwrap_used)]
13456mod tests {
13457 use super::*;
13458 use datasynth_config::schema::*;
13459
13460 fn create_test_config() -> GeneratorConfig {
13461 GeneratorConfig {
13462 global: GlobalConfig {
13463 industry: IndustrySector::Manufacturing,
13464 start_date: "2024-01-01".to_string(),
13465 period_months: 1,
13466 seed: Some(42),
13467 parallel: false,
13468 group_currency: "USD".to_string(),
13469 presentation_currency: None,
13470 worker_threads: 0,
13471 memory_limit_mb: 0,
13472 fiscal_year_months: None,
13473 },
13474 companies: vec![CompanyConfig {
13475 code: "1000".to_string(),
13476 name: "Test Company".to_string(),
13477 currency: "USD".to_string(),
13478 functional_currency: None,
13479 country: "US".to_string(),
13480 annual_transaction_volume: TransactionVolume::TenK,
13481 volume_weight: 1.0,
13482 fiscal_year_variant: "K4".to_string(),
13483 }],
13484 chart_of_accounts: ChartOfAccountsConfig {
13485 complexity: CoAComplexity::Small,
13486 industry_specific: true,
13487 custom_accounts: None,
13488 min_hierarchy_depth: 2,
13489 max_hierarchy_depth: 4,
13490 },
13491 transactions: TransactionConfig::default(),
13492 output: OutputConfig::default(),
13493 fraud: FraudConfig::default(),
13494 internal_controls: InternalControlsConfig::default(),
13495 business_processes: BusinessProcessConfig::default(),
13496 user_personas: UserPersonaConfig::default(),
13497 templates: TemplateConfig::default(),
13498 approval: ApprovalConfig::default(),
13499 departments: DepartmentConfig::default(),
13500 master_data: MasterDataConfig::default(),
13501 document_flows: DocumentFlowConfig::default(),
13502 intercompany: IntercompanyConfig::default(),
13503 balance: BalanceConfig::default(),
13504 ocpm: OcpmConfig::default(),
13505 audit: AuditGenerationConfig::default(),
13506 banking: datasynth_banking::BankingConfig::default(),
13507 data_quality: DataQualitySchemaConfig::default(),
13508 scenario: ScenarioConfig::default(),
13509 temporal: TemporalDriftConfig::default(),
13510 graph_export: GraphExportConfig::default(),
13511 streaming: StreamingSchemaConfig::default(),
13512 rate_limit: RateLimitSchemaConfig::default(),
13513 temporal_attributes: TemporalAttributeSchemaConfig::default(),
13514 relationships: RelationshipSchemaConfig::default(),
13515 accounting_standards: AccountingStandardsConfig::default(),
13516 audit_standards: AuditStandardsConfig::default(),
13517 distributions: Default::default(),
13518 temporal_patterns: Default::default(),
13519 vendor_network: VendorNetworkSchemaConfig::default(),
13520 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13521 relationship_strength: RelationshipStrengthSchemaConfig::default(),
13522 cross_process_links: CrossProcessLinksSchemaConfig::default(),
13523 organizational_events: OrganizationalEventsSchemaConfig::default(),
13524 behavioral_drift: BehavioralDriftSchemaConfig::default(),
13525 market_drift: MarketDriftSchemaConfig::default(),
13526 drift_labeling: DriftLabelingSchemaConfig::default(),
13527 anomaly_injection: Default::default(),
13528 industry_specific: Default::default(),
13529 fingerprint_privacy: Default::default(),
13530 quality_gates: Default::default(),
13531 compliance: Default::default(),
13532 webhooks: Default::default(),
13533 llm: Default::default(),
13534 diffusion: Default::default(),
13535 causal: Default::default(),
13536 source_to_pay: Default::default(),
13537 financial_reporting: Default::default(),
13538 hr: Default::default(),
13539 manufacturing: Default::default(),
13540 sales_quotes: Default::default(),
13541 tax: Default::default(),
13542 treasury: Default::default(),
13543 project_accounting: Default::default(),
13544 esg: Default::default(),
13545 country_packs: None,
13546 scenarios: Default::default(),
13547 session: Default::default(),
13548 compliance_regulations: Default::default(),
13549 }
13550 }
13551
13552 #[test]
13553 fn test_enhanced_orchestrator_creation() {
13554 let config = create_test_config();
13555 let orchestrator = EnhancedOrchestrator::with_defaults(config);
13556 assert!(orchestrator.is_ok());
13557 }
13558
13559 #[test]
13560 fn test_minimal_generation() {
13561 let config = create_test_config();
13562 let phase_config = PhaseConfig {
13563 generate_master_data: false,
13564 generate_document_flows: false,
13565 generate_journal_entries: true,
13566 inject_anomalies: false,
13567 show_progress: false,
13568 ..Default::default()
13569 };
13570
13571 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13572 let result = orchestrator.generate();
13573
13574 assert!(result.is_ok());
13575 let result = result.unwrap();
13576 assert!(!result.journal_entries.is_empty());
13577 }
13578
13579 #[test]
13580 fn test_master_data_generation() {
13581 let config = create_test_config();
13582 let phase_config = PhaseConfig {
13583 generate_master_data: true,
13584 generate_document_flows: false,
13585 generate_journal_entries: false,
13586 inject_anomalies: false,
13587 show_progress: false,
13588 vendors_per_company: 5,
13589 customers_per_company: 5,
13590 materials_per_company: 10,
13591 assets_per_company: 5,
13592 employees_per_company: 10,
13593 ..Default::default()
13594 };
13595
13596 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13597 let result = orchestrator.generate().unwrap();
13598
13599 assert!(!result.master_data.vendors.is_empty());
13600 assert!(!result.master_data.customers.is_empty());
13601 assert!(!result.master_data.materials.is_empty());
13602 }
13603
13604 #[test]
13605 fn test_document_flow_generation() {
13606 let config = create_test_config();
13607 let phase_config = PhaseConfig {
13608 generate_master_data: true,
13609 generate_document_flows: true,
13610 generate_journal_entries: false,
13611 inject_anomalies: false,
13612 inject_data_quality: false,
13613 validate_balances: false,
13614 generate_ocpm_events: false,
13615 show_progress: false,
13616 vendors_per_company: 5,
13617 customers_per_company: 5,
13618 materials_per_company: 10,
13619 assets_per_company: 5,
13620 employees_per_company: 10,
13621 p2p_chains: 5,
13622 o2c_chains: 5,
13623 ..Default::default()
13624 };
13625
13626 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13627 let result = orchestrator.generate().unwrap();
13628
13629 assert!(!result.document_flows.p2p_chains.is_empty());
13631 assert!(!result.document_flows.o2c_chains.is_empty());
13632
13633 assert!(!result.document_flows.purchase_orders.is_empty());
13635 assert!(!result.document_flows.sales_orders.is_empty());
13636 }
13637
13638 #[test]
13639 fn test_anomaly_injection() {
13640 let config = create_test_config();
13641 let phase_config = PhaseConfig {
13642 generate_master_data: false,
13643 generate_document_flows: false,
13644 generate_journal_entries: true,
13645 inject_anomalies: true,
13646 show_progress: false,
13647 ..Default::default()
13648 };
13649
13650 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13651 let result = orchestrator.generate().unwrap();
13652
13653 assert!(!result.journal_entries.is_empty());
13655
13656 assert!(result.anomaly_labels.summary.is_some());
13659 }
13660
13661 #[test]
13662 fn test_full_generation_pipeline() {
13663 let config = create_test_config();
13664 let phase_config = PhaseConfig {
13665 generate_master_data: true,
13666 generate_document_flows: true,
13667 generate_journal_entries: true,
13668 inject_anomalies: false,
13669 inject_data_quality: false,
13670 validate_balances: true,
13671 generate_ocpm_events: false,
13672 show_progress: false,
13673 vendors_per_company: 3,
13674 customers_per_company: 3,
13675 materials_per_company: 5,
13676 assets_per_company: 3,
13677 employees_per_company: 5,
13678 p2p_chains: 3,
13679 o2c_chains: 3,
13680 ..Default::default()
13681 };
13682
13683 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13684 let result = orchestrator.generate().unwrap();
13685
13686 assert!(!result.master_data.vendors.is_empty());
13688 assert!(!result.master_data.customers.is_empty());
13689 assert!(!result.document_flows.p2p_chains.is_empty());
13690 assert!(!result.document_flows.o2c_chains.is_empty());
13691 assert!(!result.journal_entries.is_empty());
13692 assert!(result.statistics.accounts_count > 0);
13693
13694 assert!(!result.subledger.ap_invoices.is_empty());
13696 assert!(!result.subledger.ar_invoices.is_empty());
13697
13698 assert!(result.balance_validation.validated);
13700 assert!(result.balance_validation.entries_processed > 0);
13701 }
13702
13703 #[test]
13704 fn test_subledger_linking() {
13705 let config = create_test_config();
13706 let phase_config = PhaseConfig {
13707 generate_master_data: true,
13708 generate_document_flows: true,
13709 generate_journal_entries: false,
13710 inject_anomalies: false,
13711 inject_data_quality: false,
13712 validate_balances: false,
13713 generate_ocpm_events: false,
13714 show_progress: false,
13715 vendors_per_company: 5,
13716 customers_per_company: 5,
13717 materials_per_company: 10,
13718 assets_per_company: 3,
13719 employees_per_company: 5,
13720 p2p_chains: 5,
13721 o2c_chains: 5,
13722 ..Default::default()
13723 };
13724
13725 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13726 let result = orchestrator.generate().unwrap();
13727
13728 assert!(!result.document_flows.vendor_invoices.is_empty());
13730 assert!(!result.document_flows.customer_invoices.is_empty());
13731
13732 assert!(!result.subledger.ap_invoices.is_empty());
13734 assert!(!result.subledger.ar_invoices.is_empty());
13735
13736 assert_eq!(
13738 result.subledger.ap_invoices.len(),
13739 result.document_flows.vendor_invoices.len()
13740 );
13741
13742 assert_eq!(
13744 result.subledger.ar_invoices.len(),
13745 result.document_flows.customer_invoices.len()
13746 );
13747
13748 assert_eq!(
13750 result.statistics.ap_invoice_count,
13751 result.subledger.ap_invoices.len()
13752 );
13753 assert_eq!(
13754 result.statistics.ar_invoice_count,
13755 result.subledger.ar_invoices.len()
13756 );
13757 }
13758
13759 #[test]
13760 fn test_balance_validation() {
13761 let config = create_test_config();
13762 let phase_config = PhaseConfig {
13763 generate_master_data: false,
13764 generate_document_flows: false,
13765 generate_journal_entries: true,
13766 inject_anomalies: false,
13767 validate_balances: true,
13768 show_progress: false,
13769 ..Default::default()
13770 };
13771
13772 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13773 let result = orchestrator.generate().unwrap();
13774
13775 assert!(result.balance_validation.validated);
13777 assert!(result.balance_validation.entries_processed > 0);
13778
13779 assert!(!result.balance_validation.has_unbalanced_entries);
13781
13782 assert_eq!(
13784 result.balance_validation.total_debits,
13785 result.balance_validation.total_credits
13786 );
13787 }
13788
13789 #[test]
13790 fn test_statistics_accuracy() {
13791 let config = create_test_config();
13792 let phase_config = PhaseConfig {
13793 generate_master_data: true,
13794 generate_document_flows: false,
13795 generate_journal_entries: true,
13796 inject_anomalies: false,
13797 show_progress: false,
13798 vendors_per_company: 10,
13799 customers_per_company: 20,
13800 materials_per_company: 15,
13801 assets_per_company: 5,
13802 employees_per_company: 8,
13803 ..Default::default()
13804 };
13805
13806 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13807 let result = orchestrator.generate().unwrap();
13808
13809 assert_eq!(
13811 result.statistics.vendor_count,
13812 result.master_data.vendors.len()
13813 );
13814 assert_eq!(
13815 result.statistics.customer_count,
13816 result.master_data.customers.len()
13817 );
13818 assert_eq!(
13819 result.statistics.material_count,
13820 result.master_data.materials.len()
13821 );
13822 assert_eq!(
13823 result.statistics.total_entries as usize,
13824 result.journal_entries.len()
13825 );
13826 }
13827
13828 #[test]
13829 fn test_phase_config_defaults() {
13830 let config = PhaseConfig::default();
13831 assert!(config.generate_master_data);
13832 assert!(config.generate_document_flows);
13833 assert!(config.generate_journal_entries);
13834 assert!(!config.inject_anomalies);
13835 assert!(config.validate_balances);
13836 assert!(config.show_progress);
13837 assert!(config.vendors_per_company > 0);
13838 assert!(config.customers_per_company > 0);
13839 }
13840
13841 #[test]
13842 fn test_get_coa_before_generation() {
13843 let config = create_test_config();
13844 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13845
13846 assert!(orchestrator.get_coa().is_none());
13848 }
13849
13850 #[test]
13851 fn test_get_coa_after_generation() {
13852 let config = create_test_config();
13853 let phase_config = PhaseConfig {
13854 generate_master_data: false,
13855 generate_document_flows: false,
13856 generate_journal_entries: true,
13857 inject_anomalies: false,
13858 show_progress: false,
13859 ..Default::default()
13860 };
13861
13862 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13863 let _ = orchestrator.generate().unwrap();
13864
13865 assert!(orchestrator.get_coa().is_some());
13867 }
13868
13869 #[test]
13870 fn test_get_master_data() {
13871 let config = create_test_config();
13872 let phase_config = PhaseConfig {
13873 generate_master_data: true,
13874 generate_document_flows: false,
13875 generate_journal_entries: false,
13876 inject_anomalies: false,
13877 show_progress: false,
13878 vendors_per_company: 5,
13879 customers_per_company: 5,
13880 materials_per_company: 5,
13881 assets_per_company: 5,
13882 employees_per_company: 5,
13883 ..Default::default()
13884 };
13885
13886 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13887 let result = orchestrator.generate().unwrap();
13888
13889 assert!(!result.master_data.vendors.is_empty());
13891 }
13892
13893 #[test]
13894 fn test_with_progress_builder() {
13895 let config = create_test_config();
13896 let orchestrator = EnhancedOrchestrator::with_defaults(config)
13897 .unwrap()
13898 .with_progress(false);
13899
13900 assert!(!orchestrator.phase_config.show_progress);
13902 }
13903
13904 #[test]
13905 fn test_multi_company_generation() {
13906 let mut config = create_test_config();
13907 config.companies.push(CompanyConfig {
13908 code: "2000".to_string(),
13909 name: "Subsidiary".to_string(),
13910 currency: "EUR".to_string(),
13911 functional_currency: None,
13912 country: "DE".to_string(),
13913 annual_transaction_volume: TransactionVolume::TenK,
13914 volume_weight: 0.5,
13915 fiscal_year_variant: "K4".to_string(),
13916 });
13917
13918 let phase_config = PhaseConfig {
13919 generate_master_data: true,
13920 generate_document_flows: false,
13921 generate_journal_entries: true,
13922 inject_anomalies: false,
13923 show_progress: false,
13924 vendors_per_company: 5,
13925 customers_per_company: 5,
13926 materials_per_company: 5,
13927 assets_per_company: 5,
13928 employees_per_company: 5,
13929 ..Default::default()
13930 };
13931
13932 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13933 let result = orchestrator.generate().unwrap();
13934
13935 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
13938 assert!(result.statistics.companies_count == 2);
13939 }
13940
13941 #[test]
13942 fn test_empty_master_data_skips_document_flows() {
13943 let config = create_test_config();
13944 let phase_config = PhaseConfig {
13945 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
13948 inject_anomalies: false,
13949 show_progress: false,
13950 ..Default::default()
13951 };
13952
13953 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13954 let result = orchestrator.generate().unwrap();
13955
13956 assert!(result.document_flows.p2p_chains.is_empty());
13958 assert!(result.document_flows.o2c_chains.is_empty());
13959 }
13960
13961 #[test]
13962 fn test_journal_entry_line_item_count() {
13963 let config = create_test_config();
13964 let phase_config = PhaseConfig {
13965 generate_master_data: false,
13966 generate_document_flows: false,
13967 generate_journal_entries: true,
13968 inject_anomalies: false,
13969 show_progress: false,
13970 ..Default::default()
13971 };
13972
13973 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13974 let result = orchestrator.generate().unwrap();
13975
13976 let calculated_line_items: u64 = result
13978 .journal_entries
13979 .iter()
13980 .map(|e| e.line_count() as u64)
13981 .sum();
13982 assert_eq!(result.statistics.total_line_items, calculated_line_items);
13983 }
13984
13985 #[test]
13986 fn test_audit_generation() {
13987 let config = create_test_config();
13988 let phase_config = PhaseConfig {
13989 generate_master_data: false,
13990 generate_document_flows: false,
13991 generate_journal_entries: true,
13992 inject_anomalies: false,
13993 show_progress: false,
13994 generate_audit: true,
13995 audit_engagements: 2,
13996 workpapers_per_engagement: 5,
13997 evidence_per_workpaper: 2,
13998 risks_per_engagement: 3,
13999 findings_per_engagement: 2,
14000 judgments_per_engagement: 2,
14001 ..Default::default()
14002 };
14003
14004 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14005 let result = orchestrator.generate().unwrap();
14006
14007 assert_eq!(result.audit.engagements.len(), 2);
14009 assert!(!result.audit.workpapers.is_empty());
14010 assert!(!result.audit.evidence.is_empty());
14011 assert!(!result.audit.risk_assessments.is_empty());
14012 assert!(!result.audit.findings.is_empty());
14013 assert!(!result.audit.judgments.is_empty());
14014
14015 assert!(
14017 !result.audit.confirmations.is_empty(),
14018 "ISA 505 confirmations should be generated"
14019 );
14020 assert!(
14021 !result.audit.confirmation_responses.is_empty(),
14022 "ISA 505 confirmation responses should be generated"
14023 );
14024 assert!(
14025 !result.audit.procedure_steps.is_empty(),
14026 "ISA 330 procedure steps should be generated"
14027 );
14028 assert!(
14030 !result.audit.analytical_results.is_empty(),
14031 "ISA 520 analytical procedures should be generated"
14032 );
14033 assert!(
14034 !result.audit.ia_functions.is_empty(),
14035 "ISA 610 IA functions should be generated (one per engagement)"
14036 );
14037 assert!(
14038 !result.audit.related_parties.is_empty(),
14039 "ISA 550 related parties should be generated"
14040 );
14041
14042 assert_eq!(
14044 result.statistics.audit_engagement_count,
14045 result.audit.engagements.len()
14046 );
14047 assert_eq!(
14048 result.statistics.audit_workpaper_count,
14049 result.audit.workpapers.len()
14050 );
14051 assert_eq!(
14052 result.statistics.audit_evidence_count,
14053 result.audit.evidence.len()
14054 );
14055 assert_eq!(
14056 result.statistics.audit_risk_count,
14057 result.audit.risk_assessments.len()
14058 );
14059 assert_eq!(
14060 result.statistics.audit_finding_count,
14061 result.audit.findings.len()
14062 );
14063 assert_eq!(
14064 result.statistics.audit_judgment_count,
14065 result.audit.judgments.len()
14066 );
14067 assert_eq!(
14068 result.statistics.audit_confirmation_count,
14069 result.audit.confirmations.len()
14070 );
14071 assert_eq!(
14072 result.statistics.audit_confirmation_response_count,
14073 result.audit.confirmation_responses.len()
14074 );
14075 assert_eq!(
14076 result.statistics.audit_procedure_step_count,
14077 result.audit.procedure_steps.len()
14078 );
14079 assert_eq!(
14080 result.statistics.audit_sample_count,
14081 result.audit.samples.len()
14082 );
14083 assert_eq!(
14084 result.statistics.audit_analytical_result_count,
14085 result.audit.analytical_results.len()
14086 );
14087 assert_eq!(
14088 result.statistics.audit_ia_function_count,
14089 result.audit.ia_functions.len()
14090 );
14091 assert_eq!(
14092 result.statistics.audit_ia_report_count,
14093 result.audit.ia_reports.len()
14094 );
14095 assert_eq!(
14096 result.statistics.audit_related_party_count,
14097 result.audit.related_parties.len()
14098 );
14099 assert_eq!(
14100 result.statistics.audit_related_party_transaction_count,
14101 result.audit.related_party_transactions.len()
14102 );
14103 }
14104
14105 #[test]
14106 fn test_new_phases_disabled_by_default() {
14107 let config = create_test_config();
14108 assert!(!config.llm.enabled);
14110 assert!(!config.diffusion.enabled);
14111 assert!(!config.causal.enabled);
14112
14113 let phase_config = PhaseConfig {
14114 generate_master_data: false,
14115 generate_document_flows: false,
14116 generate_journal_entries: true,
14117 inject_anomalies: false,
14118 show_progress: false,
14119 ..Default::default()
14120 };
14121
14122 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14123 let result = orchestrator.generate().unwrap();
14124
14125 assert_eq!(result.statistics.llm_enrichment_ms, 0);
14127 assert_eq!(result.statistics.llm_vendors_enriched, 0);
14128 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14129 assert_eq!(result.statistics.diffusion_samples_generated, 0);
14130 assert_eq!(result.statistics.causal_generation_ms, 0);
14131 assert_eq!(result.statistics.causal_samples_generated, 0);
14132 assert!(result.statistics.causal_validation_passed.is_none());
14133 assert_eq!(result.statistics.counterfactual_pair_count, 0);
14134 assert!(result.counterfactual_pairs.is_empty());
14135 }
14136
14137 #[test]
14138 fn test_counterfactual_generation_enabled() {
14139 let config = create_test_config();
14140 let phase_config = PhaseConfig {
14141 generate_master_data: false,
14142 generate_document_flows: false,
14143 generate_journal_entries: true,
14144 inject_anomalies: false,
14145 show_progress: false,
14146 generate_counterfactuals: true,
14147 generate_period_close: false, ..Default::default()
14149 };
14150
14151 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14152 let result = orchestrator.generate().unwrap();
14153
14154 if !result.journal_entries.is_empty() {
14156 assert_eq!(
14157 result.counterfactual_pairs.len(),
14158 result.journal_entries.len()
14159 );
14160 assert_eq!(
14161 result.statistics.counterfactual_pair_count,
14162 result.journal_entries.len()
14163 );
14164 let ids: std::collections::HashSet<_> = result
14166 .counterfactual_pairs
14167 .iter()
14168 .map(|p| p.pair_id.clone())
14169 .collect();
14170 assert_eq!(ids.len(), result.counterfactual_pairs.len());
14171 }
14172 }
14173
14174 #[test]
14175 fn test_llm_enrichment_enabled() {
14176 let mut config = create_test_config();
14177 config.llm.enabled = true;
14178 config.llm.max_vendor_enrichments = 3;
14179
14180 let phase_config = PhaseConfig {
14181 generate_master_data: true,
14182 generate_document_flows: false,
14183 generate_journal_entries: false,
14184 inject_anomalies: false,
14185 show_progress: false,
14186 vendors_per_company: 5,
14187 customers_per_company: 3,
14188 materials_per_company: 3,
14189 assets_per_company: 3,
14190 employees_per_company: 3,
14191 ..Default::default()
14192 };
14193
14194 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14195 let result = orchestrator.generate().unwrap();
14196
14197 assert!(result.statistics.llm_vendors_enriched > 0);
14199 assert!(result.statistics.llm_vendors_enriched <= 3);
14200 }
14201
14202 #[test]
14203 fn test_diffusion_enhancement_enabled() {
14204 let mut config = create_test_config();
14205 config.diffusion.enabled = true;
14206 config.diffusion.n_steps = 50;
14207 config.diffusion.sample_size = 20;
14208
14209 let phase_config = PhaseConfig {
14210 generate_master_data: false,
14211 generate_document_flows: false,
14212 generate_journal_entries: true,
14213 inject_anomalies: false,
14214 show_progress: false,
14215 ..Default::default()
14216 };
14217
14218 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14219 let result = orchestrator.generate().unwrap();
14220
14221 assert_eq!(result.statistics.diffusion_samples_generated, 20);
14223 }
14224
14225 #[test]
14226 fn test_causal_overlay_enabled() {
14227 let mut config = create_test_config();
14228 config.causal.enabled = true;
14229 config.causal.template = "fraud_detection".to_string();
14230 config.causal.sample_size = 100;
14231 config.causal.validate = true;
14232
14233 let phase_config = PhaseConfig {
14234 generate_master_data: false,
14235 generate_document_flows: false,
14236 generate_journal_entries: true,
14237 inject_anomalies: false,
14238 show_progress: false,
14239 ..Default::default()
14240 };
14241
14242 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14243 let result = orchestrator.generate().unwrap();
14244
14245 assert_eq!(result.statistics.causal_samples_generated, 100);
14247 assert!(result.statistics.causal_validation_passed.is_some());
14249 }
14250
14251 #[test]
14252 fn test_causal_overlay_revenue_cycle_template() {
14253 let mut config = create_test_config();
14254 config.causal.enabled = true;
14255 config.causal.template = "revenue_cycle".to_string();
14256 config.causal.sample_size = 50;
14257 config.causal.validate = false;
14258
14259 let phase_config = PhaseConfig {
14260 generate_master_data: false,
14261 generate_document_flows: false,
14262 generate_journal_entries: true,
14263 inject_anomalies: false,
14264 show_progress: false,
14265 ..Default::default()
14266 };
14267
14268 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14269 let result = orchestrator.generate().unwrap();
14270
14271 assert_eq!(result.statistics.causal_samples_generated, 50);
14273 assert!(result.statistics.causal_validation_passed.is_none());
14275 }
14276
14277 #[test]
14278 fn test_all_new_phases_enabled_together() {
14279 let mut config = create_test_config();
14280 config.llm.enabled = true;
14281 config.llm.max_vendor_enrichments = 2;
14282 config.diffusion.enabled = true;
14283 config.diffusion.n_steps = 20;
14284 config.diffusion.sample_size = 10;
14285 config.causal.enabled = true;
14286 config.causal.sample_size = 50;
14287 config.causal.validate = true;
14288
14289 let phase_config = PhaseConfig {
14290 generate_master_data: true,
14291 generate_document_flows: false,
14292 generate_journal_entries: true,
14293 inject_anomalies: false,
14294 show_progress: false,
14295 vendors_per_company: 5,
14296 customers_per_company: 3,
14297 materials_per_company: 3,
14298 assets_per_company: 3,
14299 employees_per_company: 3,
14300 ..Default::default()
14301 };
14302
14303 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14304 let result = orchestrator.generate().unwrap();
14305
14306 assert!(result.statistics.llm_vendors_enriched > 0);
14308 assert_eq!(result.statistics.diffusion_samples_generated, 10);
14309 assert_eq!(result.statistics.causal_samples_generated, 50);
14310 assert!(result.statistics.causal_validation_passed.is_some());
14311 }
14312
14313 #[test]
14314 fn test_statistics_serialization_with_new_fields() {
14315 let stats = EnhancedGenerationStatistics {
14316 total_entries: 100,
14317 total_line_items: 500,
14318 llm_enrichment_ms: 42,
14319 llm_vendors_enriched: 10,
14320 diffusion_enhancement_ms: 100,
14321 diffusion_samples_generated: 50,
14322 causal_generation_ms: 200,
14323 causal_samples_generated: 100,
14324 causal_validation_passed: Some(true),
14325 ..Default::default()
14326 };
14327
14328 let json = serde_json::to_string(&stats).unwrap();
14329 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14330
14331 assert_eq!(deserialized.llm_enrichment_ms, 42);
14332 assert_eq!(deserialized.llm_vendors_enriched, 10);
14333 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14334 assert_eq!(deserialized.diffusion_samples_generated, 50);
14335 assert_eq!(deserialized.causal_generation_ms, 200);
14336 assert_eq!(deserialized.causal_samples_generated, 100);
14337 assert_eq!(deserialized.causal_validation_passed, Some(true));
14338 }
14339
14340 #[test]
14341 fn test_statistics_backward_compat_deserialization() {
14342 let old_json = r#"{
14344 "total_entries": 100,
14345 "total_line_items": 500,
14346 "accounts_count": 50,
14347 "companies_count": 1,
14348 "period_months": 12,
14349 "vendor_count": 10,
14350 "customer_count": 20,
14351 "material_count": 15,
14352 "asset_count": 5,
14353 "employee_count": 8,
14354 "p2p_chain_count": 5,
14355 "o2c_chain_count": 5,
14356 "ap_invoice_count": 5,
14357 "ar_invoice_count": 5,
14358 "ocpm_event_count": 0,
14359 "ocpm_object_count": 0,
14360 "ocpm_case_count": 0,
14361 "audit_engagement_count": 0,
14362 "audit_workpaper_count": 0,
14363 "audit_evidence_count": 0,
14364 "audit_risk_count": 0,
14365 "audit_finding_count": 0,
14366 "audit_judgment_count": 0,
14367 "anomalies_injected": 0,
14368 "data_quality_issues": 0,
14369 "banking_customer_count": 0,
14370 "banking_account_count": 0,
14371 "banking_transaction_count": 0,
14372 "banking_suspicious_count": 0,
14373 "graph_export_count": 0,
14374 "graph_node_count": 0,
14375 "graph_edge_count": 0
14376 }"#;
14377
14378 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14379
14380 assert_eq!(stats.llm_enrichment_ms, 0);
14382 assert_eq!(stats.llm_vendors_enriched, 0);
14383 assert_eq!(stats.diffusion_enhancement_ms, 0);
14384 assert_eq!(stats.diffusion_samples_generated, 0);
14385 assert_eq!(stats.causal_generation_ms, 0);
14386 assert_eq!(stats.causal_samples_generated, 0);
14387 assert!(stats.causal_validation_passed.is_none());
14388 }
14389}