1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338 fn default() -> Self {
339 Self {
340 generate_master_data: true,
341 generate_document_flows: true,
342 generate_ocpm_events: false, generate_journal_entries: true,
344 inject_anomalies: false,
345 inject_data_quality: false, validate_balances: true,
347 show_progress: true,
348 vendors_per_company: 50,
349 customers_per_company: 100,
350 materials_per_company: 200,
351 assets_per_company: 50,
352 employees_per_company: 100,
353 p2p_chains: 100,
354 o2c_chains: 100,
355 generate_audit: false, audit_engagements: 5,
357 workpapers_per_engagement: 20,
358 evidence_per_workpaper: 5,
359 risks_per_engagement: 15,
360 findings_per_engagement: 8,
361 judgments_per_engagement: 10,
362 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
381 }
382}
383
384impl PhaseConfig {
385 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390 Self {
391 generate_master_data: true,
393 generate_document_flows: true,
394 generate_journal_entries: true,
395 validate_balances: true,
396 generate_period_close: true,
397 generate_evolution_events: true,
398 show_progress: true,
399
400 generate_audit: cfg.audit.enabled,
402 generate_banking: cfg.banking.enabled,
403 generate_graph_export: cfg.graph_export.enabled,
404 generate_sourcing: cfg.source_to_pay.enabled,
405 generate_intercompany: cfg.intercompany.enabled,
406 generate_financial_statements: cfg.financial_reporting.enabled,
407 generate_bank_reconciliation: cfg.financial_reporting.enabled,
408 generate_accounting_standards: cfg.accounting_standards.enabled,
409 generate_manufacturing: cfg.manufacturing.enabled,
410 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411 generate_tax: cfg.tax.enabled,
412 generate_esg: cfg.esg.enabled,
413 generate_ocpm_events: cfg.ocpm.enabled,
414 generate_compliance_regulations: cfg.compliance_regulations.enabled,
415 generate_hr: cfg.hr.enabled,
416 generate_treasury: cfg.treasury.enabled,
417 generate_project_accounting: cfg.project_accounting.enabled,
418
419 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423 inject_data_quality: cfg.data_quality.enabled,
424
425 vendors_per_company: 50,
427 customers_per_company: 100,
428 materials_per_company: 200,
429 assets_per_company: 50,
430 employees_per_company: 100,
431 p2p_chains: 100,
432 o2c_chains: 100,
433 audit_engagements: 5,
434 workpapers_per_engagement: 20,
435 evidence_per_workpaper: 5,
436 risks_per_engagement: 15,
437 findings_per_engagement: 8,
438 judgments_per_engagement: 10,
439 }
440 }
441}
442
443#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446 pub vendors: Vec<Vendor>,
448 pub customers: Vec<Customer>,
450 pub materials: Vec<Material>,
452 pub assets: Vec<FixedAsset>,
454 pub employees: Vec<Employee>,
456 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465 pub node_count: usize,
467 pub edge_count: usize,
469 pub hyperedge_count: usize,
471 pub output_path: PathBuf,
473}
474
475#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478 pub p2p_chains: Vec<P2PDocumentChain>,
480 pub o2c_chains: Vec<O2CDocumentChain>,
482 pub purchase_orders: Vec<documents::PurchaseOrder>,
484 pub goods_receipts: Vec<documents::GoodsReceipt>,
486 pub vendor_invoices: Vec<documents::VendorInvoice>,
488 pub sales_orders: Vec<documents::SalesOrder>,
490 pub deliveries: Vec<documents::Delivery>,
492 pub customer_invoices: Vec<documents::CustomerInvoice>,
494 pub payments: Vec<documents::Payment>,
496 pub document_references: Vec<documents::DocumentReference>,
499}
500
501#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504 pub ap_invoices: Vec<APInvoice>,
506 pub ar_invoices: Vec<ARInvoice>,
508 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514 pub ar_aging_reports: Vec<ARAgingReport>,
516 pub ap_aging_reports: Vec<APAgingReport>,
518 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531 pub event_log: Option<OcpmEventLog>,
533 pub event_count: usize,
535 pub object_count: usize,
537 pub case_count: usize,
539}
540
541#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544 pub engagements: Vec<AuditEngagement>,
546 pub workpapers: Vec<Workpaper>,
548 pub evidence: Vec<AuditEvidence>,
550 pub risk_assessments: Vec<RiskAssessment>,
552 pub findings: Vec<AuditFinding>,
554 pub judgments: Vec<ProfessionalJudgment>,
556 pub confirmations: Vec<ExternalConfirmation>,
558 pub confirmation_responses: Vec<ConfirmationResponse>,
560 pub procedure_steps: Vec<AuditProcedureStep>,
562 pub samples: Vec<AuditSample>,
564 pub analytical_results: Vec<AnalyticalProcedureResult>,
566 pub ia_functions: Vec<InternalAuditFunction>,
568 pub ia_reports: Vec<InternalAuditReport>,
570 pub related_parties: Vec<RelatedParty>,
572 pub related_party_transactions: Vec<RelatedPartyTransaction>,
574 pub component_auditors: Vec<ComponentAuditor>,
577 pub group_audit_plan: Option<GroupAuditPlan>,
579 pub component_instructions: Vec<ComponentInstruction>,
581 pub component_reports: Vec<ComponentAuditorReport>,
583 pub engagement_letters: Vec<EngagementLetter>,
586 pub subsequent_events: Vec<SubsequentEvent>,
589 pub service_organizations: Vec<ServiceOrganization>,
592 pub soc_reports: Vec<SocReport>,
594 pub user_entity_controls: Vec<UserEntityControl>,
596 pub going_concern_assessments:
599 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600 pub accounting_estimates:
603 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614 pub materiality_calculations:
617 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618 pub combined_risk_assessments:
621 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627 pub significant_transaction_classes:
630 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634 pub analytical_relationships:
637 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657 pub customers: Vec<BankingCustomer>,
659 pub accounts: Vec<BankAccount>,
661 pub transactions: Vec<BankTransaction>,
663 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673 pub suspicious_count: usize,
675 pub scenario_count: usize,
677}
678
679#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682 pub exported: bool,
684 pub graph_count: usize,
686 pub exports: HashMap<String, GraphExportInfo>,
688}
689
690#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693 pub name: String,
695 pub format: String,
697 pub output_path: PathBuf,
699 pub node_count: usize,
701 pub edge_count: usize,
703}
704
705#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708 pub spend_analyses: Vec<SpendAnalysis>,
710 pub sourcing_projects: Vec<SourcingProject>,
712 pub qualifications: Vec<SupplierQualification>,
714 pub rfx_events: Vec<RfxEvent>,
716 pub bids: Vec<SupplierBid>,
718 pub bid_evaluations: Vec<BidEvaluation>,
720 pub contracts: Vec<ProcurementContract>,
722 pub catalog_items: Vec<CatalogItem>,
724 pub scorecards: Vec<SupplierScorecard>,
726}
727
728#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731 pub fiscal_year: u16,
733 pub fiscal_period: u8,
735 pub period_start: NaiveDate,
737 pub period_end: NaiveDate,
739 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746 pub financial_statements: Vec<FinancialStatement>,
749 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752 pub consolidated_statements: Vec<FinancialStatement>,
754 pub consolidation_schedules: Vec<ConsolidationSchedule>,
756 pub bank_reconciliations: Vec<BankReconciliation>,
758 pub trial_balances: Vec<PeriodTrialBalance>,
760 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771 pub payroll_runs: Vec<PayrollRun>,
773 pub payroll_line_items: Vec<PayrollLineItem>,
775 pub time_entries: Vec<TimeEntry>,
777 pub expense_reports: Vec<ExpenseReport>,
779 pub benefit_enrollments: Vec<BenefitEnrollment>,
781 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789 pub pension_journal_entries: Vec<JournalEntry>,
791 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795 pub stock_comp_journal_entries: Vec<JournalEntry>,
797 pub payroll_run_count: usize,
799 pub payroll_line_item_count: usize,
801 pub time_entry_count: usize,
803 pub expense_report_count: usize,
805 pub benefit_enrollment_count: usize,
807 pub pension_plan_count: usize,
809 pub stock_grant_count: usize,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820 pub business_combinations:
822 Vec<datasynth_core::models::business_combination::BusinessCombination>,
823 pub business_combination_journal_entries: Vec<JournalEntry>,
825 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827 pub ecl_provision_movements:
829 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830 pub ecl_journal_entries: Vec<JournalEntry>,
832 pub provisions: Vec<datasynth_core::models::provision::Provision>,
834 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838 pub provision_journal_entries: Vec<JournalEntry>,
840 pub currency_translation_results:
842 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843 pub revenue_contract_count: usize,
845 pub impairment_test_count: usize,
847 pub business_combination_count: usize,
849 pub ecl_model_count: usize,
851 pub provision_count: usize,
853 pub currency_translation_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872 pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879 pub production_orders: Vec<ProductionOrder>,
881 pub quality_inspections: Vec<QualityInspection>,
883 pub cycle_counts: Vec<CycleCount>,
885 pub bom_components: Vec<BomComponent>,
887 pub inventory_movements: Vec<InventoryMovement>,
889 pub production_order_count: usize,
891 pub quality_inspection_count: usize,
893 pub cycle_count_count: usize,
895 pub bom_component_count: usize,
897 pub inventory_movement_count: usize,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904 pub sales_quotes: Vec<SalesQuote>,
906 pub kpis: Vec<ManagementKpi>,
908 pub budgets: Vec<Budget>,
910 pub sales_quote_count: usize,
912 pub kpi_count: usize,
914 pub budget_line_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921 pub labels: Vec<LabeledAnomaly>,
923 pub summary: Option<AnomalySummary>,
925 pub by_type: HashMap<String, usize>,
927}
928
929#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932 pub validated: bool,
934 pub is_balanced: bool,
936 pub entries_processed: u64,
938 pub total_debits: rust_decimal::Decimal,
940 pub total_credits: rust_decimal::Decimal,
942 pub accounts_tracked: usize,
944 pub companies_tracked: usize,
946 pub validation_errors: Vec<ValidationError>,
948 pub has_unbalanced_entries: bool,
950}
951
952#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955 pub jurisdictions: Vec<TaxJurisdiction>,
957 pub codes: Vec<TaxCode>,
959 pub tax_lines: Vec<TaxLine>,
961 pub tax_returns: Vec<TaxReturn>,
963 pub tax_provisions: Vec<TaxProvision>,
965 pub withholding_records: Vec<WithholdingTaxRecord>,
967 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969 pub jurisdiction_count: usize,
971 pub code_count: usize,
973 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975 pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986 pub seller_journal_entries: Vec<JournalEntry>,
988 pub buyer_journal_entries: Vec<JournalEntry>,
990 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994 #[serde(skip)]
996 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997 pub matched_pair_count: usize,
999 pub elimination_entry_count: usize,
1001 pub match_rate: f64,
1003}
1004
1005#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008 pub emissions: Vec<EmissionRecord>,
1010 pub energy: Vec<EnergyConsumption>,
1012 pub water: Vec<WaterUsage>,
1014 pub waste: Vec<WasteRecord>,
1016 pub diversity: Vec<WorkforceDiversityMetric>,
1018 pub pay_equity: Vec<PayEquityMetric>,
1020 pub safety_incidents: Vec<SafetyIncident>,
1022 pub safety_metrics: Vec<SafetyMetric>,
1024 pub governance: Vec<GovernanceMetric>,
1026 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028 pub materiality: Vec<MaterialityAssessment>,
1030 pub disclosures: Vec<EsgDisclosure>,
1032 pub climate_scenarios: Vec<ClimateScenario>,
1034 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036 pub emission_count: usize,
1038 pub disclosure_count: usize,
1040}
1041
1042#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045 pub cash_positions: Vec<CashPosition>,
1047 pub cash_forecasts: Vec<CashForecast>,
1049 pub cash_pools: Vec<CashPool>,
1051 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053 pub hedging_instruments: Vec<HedgingInstrument>,
1055 pub hedge_relationships: Vec<HedgeRelationship>,
1057 pub debt_instruments: Vec<DebtInstrument>,
1059 pub bank_guarantees: Vec<BankGuarantee>,
1061 pub netting_runs: Vec<NettingRun>,
1063 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065 pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073 pub projects: Vec<Project>,
1075 pub cost_lines: Vec<ProjectCostLine>,
1077 pub revenue_records: Vec<ProjectRevenue>,
1079 pub earned_value_metrics: Vec<EarnedValueMetric>,
1081 pub change_orders: Vec<ChangeOrder>,
1083 pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090 pub chart_of_accounts: ChartOfAccounts,
1092 pub master_data: MasterDataSnapshot,
1094 pub document_flows: DocumentFlowSnapshot,
1096 pub subledger: SubledgerSnapshot,
1098 pub ocpm: OcpmSnapshot,
1100 pub audit: AuditSnapshot,
1102 pub banking: BankingSnapshot,
1104 pub graph_export: GraphExportSnapshot,
1106 pub sourcing: SourcingSnapshot,
1108 pub financial_reporting: FinancialReportingSnapshot,
1110 pub hr: HrSnapshot,
1112 pub accounting_standards: AccountingStandardsSnapshot,
1114 pub manufacturing: ManufacturingSnapshot,
1116 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118 pub tax: TaxSnapshot,
1120 pub esg: EsgSnapshot,
1122 pub treasury: TreasurySnapshot,
1124 pub project_accounting: ProjectAccountingSnapshot,
1126 pub process_evolution: Vec<ProcessEvolutionEvent>,
1128 pub organizational_events: Vec<OrganizationalEvent>,
1130 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132 pub intercompany: IntercompanySnapshot,
1134 pub journal_entries: Vec<JournalEntry>,
1136 pub anomaly_labels: AnomalyLabels,
1138 pub balance_validation: BalanceValidationResult,
1140 pub data_quality_stats: DataQualityStats,
1142 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144 pub statistics: EnhancedGenerationStatistics,
1146 pub lineage: Option<super::lineage::LineageGraph>,
1148 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150 pub internal_controls: Vec<InternalControl>,
1152 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156 pub opening_balances: Vec<GeneratedOpeningBalance>,
1158 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166 pub temporal_vendor_chains:
1168 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175 pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182 pub total_entries: u64,
1184 pub total_line_items: u64,
1186 pub accounts_count: usize,
1188 pub companies_count: usize,
1190 pub period_months: u32,
1192 pub vendor_count: usize,
1194 pub customer_count: usize,
1195 pub material_count: usize,
1196 pub asset_count: usize,
1197 pub employee_count: usize,
1198 pub p2p_chain_count: usize,
1200 pub o2c_chain_count: usize,
1201 pub ap_invoice_count: usize,
1203 pub ar_invoice_count: usize,
1204 pub ocpm_event_count: usize,
1206 pub ocpm_object_count: usize,
1207 pub ocpm_case_count: usize,
1208 pub audit_engagement_count: usize,
1210 pub audit_workpaper_count: usize,
1211 pub audit_evidence_count: usize,
1212 pub audit_risk_count: usize,
1213 pub audit_finding_count: usize,
1214 pub audit_judgment_count: usize,
1215 #[serde(default)]
1217 pub audit_confirmation_count: usize,
1218 #[serde(default)]
1219 pub audit_confirmation_response_count: usize,
1220 #[serde(default)]
1222 pub audit_procedure_step_count: usize,
1223 #[serde(default)]
1224 pub audit_sample_count: usize,
1225 #[serde(default)]
1227 pub audit_analytical_result_count: usize,
1228 #[serde(default)]
1230 pub audit_ia_function_count: usize,
1231 #[serde(default)]
1232 pub audit_ia_report_count: usize,
1233 #[serde(default)]
1235 pub audit_related_party_count: usize,
1236 #[serde(default)]
1237 pub audit_related_party_transaction_count: usize,
1238 pub anomalies_injected: usize,
1240 pub data_quality_issues: usize,
1242 pub banking_customer_count: usize,
1244 pub banking_account_count: usize,
1245 pub banking_transaction_count: usize,
1246 pub banking_suspicious_count: usize,
1247 pub graph_export_count: usize,
1249 pub graph_node_count: usize,
1250 pub graph_edge_count: usize,
1251 #[serde(default)]
1253 pub llm_enrichment_ms: u64,
1254 #[serde(default)]
1256 pub llm_vendors_enriched: usize,
1257 #[serde(default)]
1259 pub diffusion_enhancement_ms: u64,
1260 #[serde(default)]
1262 pub diffusion_samples_generated: usize,
1263 #[serde(default)]
1265 pub causal_generation_ms: u64,
1266 #[serde(default)]
1268 pub causal_samples_generated: usize,
1269 #[serde(default)]
1271 pub causal_validation_passed: Option<bool>,
1272 #[serde(default)]
1274 pub sourcing_project_count: usize,
1275 #[serde(default)]
1276 pub rfx_event_count: usize,
1277 #[serde(default)]
1278 pub bid_count: usize,
1279 #[serde(default)]
1280 pub contract_count: usize,
1281 #[serde(default)]
1282 pub catalog_item_count: usize,
1283 #[serde(default)]
1284 pub scorecard_count: usize,
1285 #[serde(default)]
1287 pub financial_statement_count: usize,
1288 #[serde(default)]
1289 pub bank_reconciliation_count: usize,
1290 #[serde(default)]
1292 pub payroll_run_count: usize,
1293 #[serde(default)]
1294 pub time_entry_count: usize,
1295 #[serde(default)]
1296 pub expense_report_count: usize,
1297 #[serde(default)]
1298 pub benefit_enrollment_count: usize,
1299 #[serde(default)]
1300 pub pension_plan_count: usize,
1301 #[serde(default)]
1302 pub stock_grant_count: usize,
1303 #[serde(default)]
1305 pub revenue_contract_count: usize,
1306 #[serde(default)]
1307 pub impairment_test_count: usize,
1308 #[serde(default)]
1309 pub business_combination_count: usize,
1310 #[serde(default)]
1311 pub ecl_model_count: usize,
1312 #[serde(default)]
1313 pub provision_count: usize,
1314 #[serde(default)]
1316 pub production_order_count: usize,
1317 #[serde(default)]
1318 pub quality_inspection_count: usize,
1319 #[serde(default)]
1320 pub cycle_count_count: usize,
1321 #[serde(default)]
1322 pub bom_component_count: usize,
1323 #[serde(default)]
1324 pub inventory_movement_count: usize,
1325 #[serde(default)]
1327 pub sales_quote_count: usize,
1328 #[serde(default)]
1329 pub kpi_count: usize,
1330 #[serde(default)]
1331 pub budget_line_count: usize,
1332 #[serde(default)]
1334 pub tax_jurisdiction_count: usize,
1335 #[serde(default)]
1336 pub tax_code_count: usize,
1337 #[serde(default)]
1339 pub esg_emission_count: usize,
1340 #[serde(default)]
1341 pub esg_disclosure_count: usize,
1342 #[serde(default)]
1344 pub ic_matched_pair_count: usize,
1345 #[serde(default)]
1346 pub ic_elimination_count: usize,
1347 #[serde(default)]
1349 pub ic_transaction_count: usize,
1350 #[serde(default)]
1352 pub fa_subledger_count: usize,
1353 #[serde(default)]
1355 pub inventory_subledger_count: usize,
1356 #[serde(default)]
1358 pub treasury_debt_instrument_count: usize,
1359 #[serde(default)]
1361 pub treasury_hedging_instrument_count: usize,
1362 #[serde(default)]
1364 pub project_count: usize,
1365 #[serde(default)]
1367 pub project_change_order_count: usize,
1368 #[serde(default)]
1370 pub tax_provision_count: usize,
1371 #[serde(default)]
1373 pub opening_balance_count: usize,
1374 #[serde(default)]
1376 pub subledger_reconciliation_count: usize,
1377 #[serde(default)]
1379 pub tax_line_count: usize,
1380 #[serde(default)]
1382 pub project_cost_line_count: usize,
1383 #[serde(default)]
1385 pub cash_position_count: usize,
1386 #[serde(default)]
1388 pub cash_forecast_count: usize,
1389 #[serde(default)]
1391 pub cash_pool_count: usize,
1392 #[serde(default)]
1394 pub process_evolution_event_count: usize,
1395 #[serde(default)]
1397 pub organizational_event_count: usize,
1398 #[serde(default)]
1400 pub counterfactual_pair_count: usize,
1401 #[serde(default)]
1403 pub red_flag_count: usize,
1404 #[serde(default)]
1406 pub collusion_ring_count: usize,
1407 #[serde(default)]
1409 pub temporal_version_chain_count: usize,
1410 #[serde(default)]
1412 pub entity_relationship_node_count: usize,
1413 #[serde(default)]
1415 pub entity_relationship_edge_count: usize,
1416 #[serde(default)]
1418 pub cross_process_link_count: usize,
1419 #[serde(default)]
1421 pub disruption_event_count: usize,
1422 #[serde(default)]
1424 pub industry_gl_account_count: usize,
1425 #[serde(default)]
1427 pub period_close_je_count: usize,
1428}
1429
1430pub struct EnhancedOrchestrator {
1432 config: GeneratorConfig,
1433 phase_config: PhaseConfig,
1434 coa: Option<Arc<ChartOfAccounts>>,
1435 master_data: MasterDataSnapshot,
1436 seed: u64,
1437 multi_progress: Option<MultiProgress>,
1438 resource_guard: ResourceGuard,
1440 output_path: Option<PathBuf>,
1442 copula_generators: Vec<CopulaGeneratorSpec>,
1444 country_pack_registry: datasynth_core::CountryPackRegistry,
1446 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453 datasynth_config::validate_config(&config)?;
1454
1455 let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457 let resource_guard = Self::build_resource_guard(&config, None);
1459
1460 let country_pack_registry = match &config.country_packs {
1462 Some(cp) => {
1463 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464 .map_err(|e| SynthError::config(e.to_string()))?
1465 }
1466 None => datasynth_core::CountryPackRegistry::builtin_only()
1467 .map_err(|e| SynthError::config(e.to_string()))?,
1468 };
1469
1470 Ok(Self {
1471 config,
1472 phase_config,
1473 coa: None,
1474 master_data: MasterDataSnapshot::default(),
1475 seed,
1476 multi_progress: None,
1477 resource_guard,
1478 output_path: None,
1479 copula_generators: Vec::new(),
1480 country_pack_registry,
1481 phase_sink: None,
1482 })
1483 }
1484
1485 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487 Self::new(config, PhaseConfig::default())
1488 }
1489
1490 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492 self.phase_sink = Some(sink);
1493 self
1494 }
1495
1496 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498 self.phase_sink = Some(sink);
1499 }
1500
1501 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503 if let Some(ref sink) = self.phase_sink {
1504 for item in items {
1505 if let Ok(value) = serde_json::to_value(item) {
1506 if let Err(e) = sink.emit(phase, type_name, &value) {
1507 warn!(
1508 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509 );
1510 }
1511 }
1512 }
1513 if let Err(e) = sink.phase_complete(phase) {
1514 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515 }
1516 }
1517 }
1518
1519 pub fn with_progress(mut self, show: bool) -> Self {
1521 self.phase_config.show_progress = show;
1522 if show {
1523 self.multi_progress = Some(MultiProgress::new());
1524 }
1525 self
1526 }
1527
1528 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530 let path = path.into();
1531 self.output_path = Some(path.clone());
1532 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534 self
1535 }
1536
1537 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539 &self.country_pack_registry
1540 }
1541
1542 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544 self.country_pack_registry.get_by_str(country)
1545 }
1546
1547 fn primary_country_code(&self) -> &str {
1550 self.config
1551 .companies
1552 .first()
1553 .map(|c| c.country.as_str())
1554 .unwrap_or("US")
1555 }
1556
1557 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559 self.country_pack_for(self.primary_country_code())
1560 }
1561
1562 fn resolve_coa_framework(&self) -> CoAFramework {
1564 if self.config.accounting_standards.enabled {
1565 match self.config.accounting_standards.framework {
1566 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567 return CoAFramework::FrenchPcg;
1568 }
1569 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570 return CoAFramework::GermanSkr04;
1571 }
1572 _ => {}
1573 }
1574 }
1575 let pack = self.primary_pack();
1577 match pack.accounting.framework.as_str() {
1578 "french_gaap" => CoAFramework::FrenchPcg,
1579 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580 _ => CoAFramework::UsGaap,
1581 }
1582 }
1583
1584 pub fn has_copulas(&self) -> bool {
1589 !self.copula_generators.is_empty()
1590 }
1591
1592 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598 &self.copula_generators
1599 }
1600
1601 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605 &mut self.copula_generators
1606 }
1607
1608 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612 self.copula_generators
1613 .iter_mut()
1614 .find(|c| c.name == copula_name)
1615 .map(|c| c.generator.sample())
1616 }
1617
1618 pub fn from_fingerprint(
1641 fingerprint_path: &std::path::Path,
1642 phase_config: PhaseConfig,
1643 scale: f64,
1644 ) -> SynthResult<Self> {
1645 info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647 let reader = FingerprintReader::new();
1649 let fingerprint = reader
1650 .read_from_file(fingerprint_path)
1651 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654 }
1655
1656 pub fn from_fingerprint_data(
1663 fingerprint: Fingerprint,
1664 phase_config: PhaseConfig,
1665 scale: f64,
1666 ) -> SynthResult<Self> {
1667 info!(
1668 "Synthesizing config from fingerprint (version: {}, tables: {})",
1669 fingerprint.manifest.version,
1670 fingerprint.schema.tables.len()
1671 );
1672
1673 let seed: u64 = rand::random();
1675 info!("Fingerprint synthesis seed: {}", seed);
1676
1677 let options = SynthesisOptions {
1679 scale,
1680 seed: Some(seed),
1681 preserve_correlations: true,
1682 inject_anomalies: true,
1683 };
1684 let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686 let synthesis_result = synthesizer
1688 .synthesize_full(&fingerprint, seed)
1689 .map_err(|e| {
1690 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691 })?;
1692
1693 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695 Self::base_config_for_industry(industry)
1696 } else {
1697 Self::base_config_for_industry("manufacturing")
1698 };
1699
1700 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703 info!(
1705 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706 fingerprint.schema.tables.len(),
1707 scale,
1708 synthesis_result.copula_generators.len()
1709 );
1710
1711 if !synthesis_result.copula_generators.is_empty() {
1712 for spec in &synthesis_result.copula_generators {
1713 info!(
1714 " Copula '{}' for table '{}': {} columns",
1715 spec.name,
1716 spec.table,
1717 spec.columns.len()
1718 );
1719 }
1720 }
1721
1722 let mut orchestrator = Self::new(config, phase_config)?;
1724
1725 orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728 Ok(orchestrator)
1729 }
1730
1731 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733 use datasynth_config::presets::create_preset;
1734 use datasynth_config::TransactionVolume;
1735 use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737 let sector = match industry.to_lowercase().as_str() {
1738 "manufacturing" => IndustrySector::Manufacturing,
1739 "retail" => IndustrySector::Retail,
1740 "financial" | "financial_services" => IndustrySector::FinancialServices,
1741 "healthcare" => IndustrySector::Healthcare,
1742 "technology" | "tech" => IndustrySector::Technology,
1743 _ => IndustrySector::Manufacturing,
1744 };
1745
1746 create_preset(
1748 sector,
1749 1, 12, CoAComplexity::Medium,
1752 TransactionVolume::TenK,
1753 )
1754 }
1755
1756 fn apply_config_patch(
1758 mut config: GeneratorConfig,
1759 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760 ) -> GeneratorConfig {
1761 use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763 for (key, value) in patch.values() {
1764 match (key.as_str(), value) {
1765 ("transactions.count", ConfigValue::Integer(n)) => {
1768 info!(
1769 "Fingerprint suggests {} transactions (apply via company volumes)",
1770 n
1771 );
1772 }
1773 ("global.period_months", ConfigValue::Integer(n)) => {
1774 config.global.period_months = (*n).clamp(1, 120) as u32;
1775 }
1776 ("global.start_date", ConfigValue::String(s)) => {
1777 config.global.start_date = s.clone();
1778 }
1779 ("global.seed", ConfigValue::Integer(n)) => {
1780 config.global.seed = Some(*n as u64);
1781 }
1782 ("fraud.enabled", ConfigValue::Bool(b)) => {
1783 config.fraud.enabled = *b;
1784 }
1785 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786 config.fraud.fraud_rate = *f;
1787 }
1788 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789 config.data_quality.enabled = *b;
1790 }
1791 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793 config.fraud.enabled = *b;
1794 }
1795 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796 config.fraud.fraud_rate = *f;
1797 }
1798 _ => {
1799 debug!("Ignoring unknown config patch key: {}", key);
1800 }
1801 }
1802 }
1803
1804 config
1805 }
1806
1807 fn build_resource_guard(
1809 config: &GeneratorConfig,
1810 output_path: Option<PathBuf>,
1811 ) -> ResourceGuard {
1812 let mut builder = ResourceGuardBuilder::new();
1813
1814 if config.global.memory_limit_mb > 0 {
1816 builder = builder.memory_limit(config.global.memory_limit_mb);
1817 }
1818
1819 if let Some(path) = output_path {
1821 builder = builder.output_path(path).min_free_disk(100); }
1823
1824 builder = builder.conservative();
1826
1827 builder.build()
1828 }
1829
1830 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835 self.resource_guard.check()
1836 }
1837
1838 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840 let level = self.resource_guard.check()?;
1841
1842 if level != DegradationLevel::Normal {
1843 warn!(
1844 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845 phase,
1846 level,
1847 self.resource_guard.current_memory_mb(),
1848 self.resource_guard.available_disk_mb()
1849 );
1850 }
1851
1852 Ok(level)
1853 }
1854
1855 fn get_degradation_actions(&self) -> DegradationActions {
1857 self.resource_guard.get_actions()
1858 }
1859
1860 fn check_memory_limit(&self) -> SynthResult<()> {
1862 self.check_resources()?;
1863 Ok(())
1864 }
1865
1866 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868 info!("Starting enhanced generation workflow");
1869 info!(
1870 "Config: industry={:?}, period_months={}, companies={}",
1871 self.config.global.industry,
1872 self.config.global.period_months,
1873 self.config.companies.len()
1874 );
1875
1876 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879 datasynth_core::serde_decimal::set_numeric_native(is_native);
1880 struct NumericModeGuard;
1881 impl Drop for NumericModeGuard {
1882 fn drop(&mut self) {
1883 datasynth_core::serde_decimal::set_numeric_native(false);
1884 }
1885 }
1886 let _numeric_guard = if is_native {
1887 Some(NumericModeGuard)
1888 } else {
1889 None
1890 };
1891
1892 let initial_level = self.check_resources_with_log("initial")?;
1894 if initial_level == DegradationLevel::Emergency {
1895 return Err(SynthError::resource(
1896 "Insufficient resources to start generation",
1897 ));
1898 }
1899
1900 let mut stats = EnhancedGenerationStatistics {
1901 companies_count: self.config.companies.len(),
1902 period_months: self.config.global.period_months,
1903 ..Default::default()
1904 };
1905
1906 let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909 self.phase_master_data(&mut stats)?;
1911
1912 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917 let (mut document_flows, mut subledger, fa_journal_entries) =
1919 self.phase_document_flows(&mut stats)?;
1920
1921 self.emit_phase_items(
1923 "document_flows",
1924 "PurchaseOrder",
1925 &document_flows.purchase_orders,
1926 );
1927 self.emit_phase_items(
1928 "document_flows",
1929 "GoodsReceipt",
1930 &document_flows.goods_receipts,
1931 );
1932 self.emit_phase_items(
1933 "document_flows",
1934 "VendorInvoice",
1935 &document_flows.vendor_invoices,
1936 );
1937 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948 .iter()
1949 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950 .collect();
1951 if !opening_balance_jes.is_empty() {
1952 debug!(
1953 "Prepending {} opening balance JEs to entries",
1954 opening_balance_jes.len()
1955 );
1956 }
1957
1958 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961 if !opening_balance_jes.is_empty() {
1964 let mut combined = opening_balance_jes;
1965 combined.extend(entries);
1966 entries = combined;
1967 }
1968
1969 if !fa_journal_entries.is_empty() {
1971 debug!(
1972 "Appending {} FA acquisition JEs to main entries",
1973 fa_journal_entries.len()
1974 );
1975 entries.extend(fa_journal_entries);
1976 }
1977
1978 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981 let actions = self.get_degradation_actions();
1983
1984 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987 if !sourcing.contracts.is_empty() {
1990 let mut linked_count = 0usize;
1991 let po_vendor_pairs: Vec<(String, String)> = document_flows
1993 .p2p_chains
1994 .iter()
1995 .map(|chain| {
1996 (
1997 chain.purchase_order.vendor_id.clone(),
1998 chain.purchase_order.header.document_id.clone(),
1999 )
2000 })
2001 .collect();
2002
2003 for chain in &mut document_flows.p2p_chains {
2004 if chain.purchase_order.contract_id.is_none() {
2005 if let Some(contract) = sourcing
2006 .contracts
2007 .iter()
2008 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009 {
2010 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011 linked_count += 1;
2012 }
2013 }
2014 }
2015
2016 for contract in &mut sourcing.contracts {
2018 let po_ids: Vec<String> = po_vendor_pairs
2019 .iter()
2020 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021 .map(|(_, po_id)| po_id.clone())
2022 .collect();
2023 if !po_ids.is_empty() {
2024 contract.purchase_order_ids = po_ids;
2025 }
2026 }
2027
2028 if linked_count > 0 {
2029 debug!(
2030 "Linked {} purchase orders to S2C contracts by vendor match",
2031 linked_count
2032 );
2033 }
2034 }
2035
2036 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039 if !intercompany.seller_journal_entries.is_empty()
2041 || !intercompany.buyer_journal_entries.is_empty()
2042 {
2043 let ic_je_count = intercompany.seller_journal_entries.len()
2044 + intercompany.buyer_journal_entries.len();
2045 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047 debug!(
2048 "Appended {} IC journal entries to main entries",
2049 ic_je_count
2050 );
2051 }
2052
2053 if !intercompany.elimination_entries.is_empty() {
2055 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056 &intercompany.elimination_entries,
2057 );
2058 if !elim_jes.is_empty() {
2059 debug!(
2060 "Appended {} elimination journal entries to main entries",
2061 elim_jes.len()
2062 );
2063 let elim_debit: rust_decimal::Decimal =
2065 elim_jes.iter().map(|je| je.total_debit()).sum();
2066 let elim_credit: rust_decimal::Decimal =
2067 elim_jes.iter().map(|je| je.total_credit()).sum();
2068 if elim_debit != elim_credit {
2069 warn!(
2070 "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2071 elim_debit,
2072 elim_credit,
2073 elim_debit - elim_credit
2074 );
2075 }
2076 entries.extend(elim_jes);
2077 }
2078 }
2079
2080 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2082 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2083 document_flows
2084 .customer_invoices
2085 .extend(ic_docs.seller_invoices.iter().cloned());
2086 document_flows
2087 .purchase_orders
2088 .extend(ic_docs.buyer_orders.iter().cloned());
2089 document_flows
2090 .goods_receipts
2091 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2092 document_flows
2093 .vendor_invoices
2094 .extend(ic_docs.buyer_invoices.iter().cloned());
2095 debug!(
2096 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2097 ic_docs.seller_invoices.len(),
2098 ic_docs.buyer_orders.len(),
2099 ic_docs.buyer_goods_receipts.len(),
2100 ic_docs.buyer_invoices.len(),
2101 );
2102 }
2103 }
2104
2105 let hr = self.phase_hr_data(&mut stats)?;
2107
2108 if !hr.payroll_runs.is_empty() {
2110 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2111 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2112 entries.extend(payroll_jes);
2113 }
2114
2115 if !hr.pension_journal_entries.is_empty() {
2117 debug!(
2118 "Generated {} JEs from pension plans",
2119 hr.pension_journal_entries.len()
2120 );
2121 entries.extend(hr.pension_journal_entries.iter().cloned());
2122 }
2123
2124 if !hr.stock_comp_journal_entries.is_empty() {
2126 debug!(
2127 "Generated {} JEs from stock-based compensation",
2128 hr.stock_comp_journal_entries.len()
2129 );
2130 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2131 }
2132
2133 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2135
2136 if !manufacturing_snap.production_orders.is_empty() {
2138 let currency = self
2139 .config
2140 .companies
2141 .first()
2142 .map(|c| c.currency.as_str())
2143 .unwrap_or("USD");
2144 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2145 &manufacturing_snap.production_orders,
2146 &manufacturing_snap.quality_inspections,
2147 currency,
2148 );
2149 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2150 entries.extend(mfg_jes);
2151 }
2152
2153 if !manufacturing_snap.quality_inspections.is_empty() {
2155 let framework = match self.config.accounting_standards.framework {
2156 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2157 _ => "US_GAAP",
2158 };
2159 for company in &self.config.companies {
2160 let company_orders: Vec<_> = manufacturing_snap
2161 .production_orders
2162 .iter()
2163 .filter(|o| o.company_code == company.code)
2164 .cloned()
2165 .collect();
2166 let company_inspections: Vec<_> = manufacturing_snap
2167 .quality_inspections
2168 .iter()
2169 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2170 .cloned()
2171 .collect();
2172 if company_inspections.is_empty() {
2173 continue;
2174 }
2175 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2176 let warranty_result = warranty_gen.generate(
2177 &company.code,
2178 &company_orders,
2179 &company_inspections,
2180 &company.currency,
2181 framework,
2182 );
2183 if !warranty_result.journal_entries.is_empty() {
2184 debug!(
2185 "Generated {} warranty provision JEs for {}",
2186 warranty_result.journal_entries.len(),
2187 company.code
2188 );
2189 entries.extend(warranty_result.journal_entries);
2190 }
2191 }
2192 }
2193
2194 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2196 {
2197 let cogs_currency = self
2198 .config
2199 .companies
2200 .first()
2201 .map(|c| c.currency.as_str())
2202 .unwrap_or("USD");
2203 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2204 &document_flows.deliveries,
2205 &manufacturing_snap.production_orders,
2206 cogs_currency,
2207 );
2208 if !cogs_jes.is_empty() {
2209 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2210 entries.extend(cogs_jes);
2211 }
2212 }
2213
2214 if !manufacturing_snap.inventory_movements.is_empty()
2220 && !subledger.inventory_positions.is_empty()
2221 {
2222 use datasynth_core::models::MovementType as MfgMovementType;
2223 let mut receipt_count = 0usize;
2224 let mut issue_count = 0usize;
2225 for movement in &manufacturing_snap.inventory_movements {
2226 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2228 p.material_id == movement.material_code
2229 && p.company_code == movement.entity_code
2230 }) {
2231 match movement.movement_type {
2232 MfgMovementType::GoodsReceipt => {
2233 pos.add_quantity(
2235 movement.quantity,
2236 movement.value,
2237 movement.movement_date,
2238 );
2239 receipt_count += 1;
2240 }
2241 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2242 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2244 issue_count += 1;
2245 }
2246 _ => {}
2247 }
2248 }
2249 }
2250 debug!(
2251 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2252 manufacturing_snap.inventory_movements.len(),
2253 receipt_count,
2254 issue_count,
2255 );
2256 }
2257
2258 if !entries.is_empty() {
2261 stats.total_entries = entries.len() as u64;
2262 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2263 debug!(
2264 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2265 stats.total_entries, stats.total_line_items
2266 );
2267 }
2268
2269 if self.config.internal_controls.enabled && !entries.is_empty() {
2271 info!("Phase 7b: Applying internal controls to journal entries");
2272 let control_config = ControlGeneratorConfig {
2273 exception_rate: self.config.internal_controls.exception_rate,
2274 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2275 enable_sox_marking: true,
2276 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2277 self.config.internal_controls.sox_materiality_threshold,
2278 )
2279 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2280 ..Default::default()
2281 };
2282 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2283 for entry in &mut entries {
2284 control_gen.apply_controls(entry, &coa);
2285 }
2286 let with_controls = entries
2287 .iter()
2288 .filter(|e| !e.header.control_ids.is_empty())
2289 .count();
2290 info!(
2291 "Applied controls to {} entries ({} with control IDs assigned)",
2292 entries.len(),
2293 with_controls
2294 );
2295 }
2296
2297 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2301 .iter()
2302 .filter(|e| e.header.sod_violation)
2303 .filter_map(|e| {
2304 e.header.sod_conflict_type.map(|ct| {
2305 use datasynth_core::models::{RiskLevel, SodViolation};
2306 let severity = match ct {
2307 datasynth_core::models::SodConflictType::PaymentReleaser
2308 | datasynth_core::models::SodConflictType::RequesterApprover => {
2309 RiskLevel::Critical
2310 }
2311 datasynth_core::models::SodConflictType::PreparerApprover
2312 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2313 | datasynth_core::models::SodConflictType::JournalEntryPoster
2314 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2315 RiskLevel::High
2316 }
2317 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2318 RiskLevel::Medium
2319 }
2320 };
2321 let action = format!(
2322 "SoD conflict {:?} on entry {} ({})",
2323 ct, e.header.document_id, e.header.company_code
2324 );
2325 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2326 })
2327 })
2328 .collect();
2329 if !sod_violations.is_empty() {
2330 info!(
2331 "Phase 7c: Extracted {} SoD violations from {} entries",
2332 sod_violations.len(),
2333 entries.len()
2334 );
2335 }
2336
2337 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2339
2340 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2342
2343 self.emit_phase_items(
2345 "anomaly_injection",
2346 "LabeledAnomaly",
2347 &anomaly_labels.labels,
2348 );
2349
2350 {
2354 use std::collections::HashMap;
2355 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2368 for je in &entries {
2369 if je.header.is_fraud {
2370 if let Some(ref fraud_type) = je.header.fraud_type {
2371 if let Some(ref reference) = je.header.reference {
2372 fraud_map.insert(reference.clone(), *fraud_type);
2374 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2377 if !bare.is_empty() {
2378 fraud_map.insert(bare.to_string(), *fraud_type);
2379 }
2380 }
2381 }
2382 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2384 }
2385 }
2386 }
2387 if !fraud_map.is_empty() {
2388 let mut propagated = 0usize;
2389 macro_rules! propagate_to {
2391 ($collection:expr) => {
2392 for doc in &mut $collection {
2393 if doc.header.propagate_fraud(&fraud_map) {
2394 propagated += 1;
2395 }
2396 }
2397 };
2398 }
2399 propagate_to!(document_flows.purchase_orders);
2400 propagate_to!(document_flows.goods_receipts);
2401 propagate_to!(document_flows.vendor_invoices);
2402 propagate_to!(document_flows.payments);
2403 propagate_to!(document_flows.sales_orders);
2404 propagate_to!(document_flows.deliveries);
2405 propagate_to!(document_flows.customer_invoices);
2406 if propagated > 0 {
2407 info!(
2408 "Propagated fraud labels to {} document flow records",
2409 propagated
2410 );
2411 }
2412 }
2413 }
2414
2415 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2417
2418 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2420
2421 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2423
2424 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2426
2427 let balance_validation = self.phase_balance_validation(&entries)?;
2429
2430 let subledger_reconciliation =
2432 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2433
2434 let (data_quality_stats, quality_issues) =
2436 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2437
2438 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2440
2441 let audit = self.phase_audit_data(&entries, &mut stats)?;
2443
2444 let mut banking = self.phase_banking_data(&mut stats)?;
2446
2447 if self.phase_config.generate_banking
2452 && !document_flows.payments.is_empty()
2453 && !banking.accounts.is_empty()
2454 {
2455 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2456 if bridge_rate > 0.0 {
2457 let mut bridge =
2458 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2459 self.seed,
2460 );
2461 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2462 &document_flows.payments,
2463 &banking.customers,
2464 &banking.accounts,
2465 bridge_rate,
2466 );
2467 info!(
2468 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2469 bridge_stats.bridged_count,
2470 bridge_stats.transactions_emitted,
2471 bridge_stats.fraud_propagated,
2472 );
2473 let bridged_count = bridged_txns.len();
2474 banking.transactions.extend(bridged_txns);
2475
2476 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2479 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2480 &mut banking.transactions,
2481 );
2482 }
2483
2484 banking.suspicious_count = banking
2486 .transactions
2487 .iter()
2488 .filter(|t| t.is_suspicious)
2489 .count();
2490 stats.banking_transaction_count = banking.transactions.len();
2491 stats.banking_suspicious_count = banking.suspicious_count;
2492 }
2493 }
2494
2495 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2497
2498 self.phase_llm_enrichment(&mut stats);
2500
2501 self.phase_diffusion_enhancement(&mut stats);
2503
2504 self.phase_causal_overlay(&mut stats);
2506
2507 let mut financial_reporting = self.phase_financial_reporting(
2511 &document_flows,
2512 &entries,
2513 &coa,
2514 &hr,
2515 &audit,
2516 &mut stats,
2517 )?;
2518
2519 {
2521 use datasynth_core::models::StatementType;
2522 for stmt in &financial_reporting.consolidated_statements {
2523 if stmt.statement_type == StatementType::BalanceSheet {
2524 let total_assets: rust_decimal::Decimal = stmt
2525 .line_items
2526 .iter()
2527 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2528 .map(|li| li.amount)
2529 .sum();
2530 let total_le: rust_decimal::Decimal = stmt
2531 .line_items
2532 .iter()
2533 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2534 .map(|li| li.amount)
2535 .sum();
2536 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2537 warn!(
2538 "BS equation imbalance: assets={}, L+E={}",
2539 total_assets, total_le
2540 );
2541 }
2542 }
2543 }
2544 }
2545
2546 let accounting_standards =
2548 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2549
2550 if !accounting_standards.ecl_journal_entries.is_empty() {
2552 debug!(
2553 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2554 accounting_standards.ecl_journal_entries.len()
2555 );
2556 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2557 }
2558
2559 if !accounting_standards.provision_journal_entries.is_empty() {
2561 debug!(
2562 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2563 accounting_standards.provision_journal_entries.len()
2564 );
2565 entries.extend(
2566 accounting_standards
2567 .provision_journal_entries
2568 .iter()
2569 .cloned(),
2570 );
2571 }
2572
2573 let ocpm = self.phase_ocpm_events(
2575 &document_flows,
2576 &sourcing,
2577 &hr,
2578 &manufacturing_snap,
2579 &banking,
2580 &audit,
2581 &financial_reporting,
2582 &mut stats,
2583 )?;
2584
2585 if let Some(ref event_log) = ocpm.event_log {
2587 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2588 }
2589
2590 if let Some(ref event_log) = ocpm.event_log {
2592 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2594 std::collections::HashMap::new();
2595 for (idx, event) in event_log.events.iter().enumerate() {
2596 if let Some(ref doc_ref) = event.document_ref {
2597 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2598 }
2599 }
2600
2601 if !doc_index.is_empty() {
2602 let mut annotated = 0usize;
2603 for entry in &mut entries {
2604 let doc_id_str = entry.header.document_id.to_string();
2605 let mut matched_indices: Vec<usize> = Vec::new();
2607 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2608 matched_indices.extend(indices);
2609 }
2610 if let Some(ref reference) = entry.header.reference {
2611 let bare_ref = reference
2612 .find(':')
2613 .map(|i| &reference[i + 1..])
2614 .unwrap_or(reference.as_str());
2615 if let Some(indices) = doc_index.get(bare_ref) {
2616 for &idx in indices {
2617 if !matched_indices.contains(&idx) {
2618 matched_indices.push(idx);
2619 }
2620 }
2621 }
2622 }
2623 if !matched_indices.is_empty() {
2625 for &idx in &matched_indices {
2626 let event = &event_log.events[idx];
2627 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2628 entry.header.ocpm_event_ids.push(event.event_id);
2629 }
2630 for obj_ref in &event.object_refs {
2631 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2632 entry.header.ocpm_object_ids.push(obj_ref.object_id);
2633 }
2634 }
2635 if entry.header.ocpm_case_id.is_none() {
2636 entry.header.ocpm_case_id = event.case_id;
2637 }
2638 }
2639 annotated += 1;
2640 }
2641 }
2642 debug!(
2643 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2644 annotated
2645 );
2646 }
2647 }
2648
2649 let sales_kpi_budgets =
2651 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2652
2653 let treasury =
2657 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2658
2659 if !treasury.journal_entries.is_empty() {
2661 debug!(
2662 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2663 treasury.journal_entries.len()
2664 );
2665 entries.extend(treasury.journal_entries.iter().cloned());
2666 }
2667
2668 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2670
2671 if !tax.tax_posting_journal_entries.is_empty() {
2673 debug!(
2674 "Merging {} tax posting JEs into GL",
2675 tax.tax_posting_journal_entries.len()
2676 );
2677 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2678 }
2679
2680 {
2684 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2685
2686 let framework_str = {
2687 use datasynth_config::schema::AccountingFrameworkConfig;
2688 match self
2689 .config
2690 .accounting_standards
2691 .framework
2692 .unwrap_or_default()
2693 {
2694 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2695 "IFRS"
2696 }
2697 _ => "US_GAAP",
2698 }
2699 };
2700
2701 let depreciation_total: rust_decimal::Decimal = entries
2703 .iter()
2704 .filter(|je| je.header.document_type == "CL")
2705 .flat_map(|je| je.lines.iter())
2706 .filter(|l| l.gl_account.starts_with("6000"))
2707 .map(|l| l.debit_amount)
2708 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2709
2710 let interest_paid: rust_decimal::Decimal = entries
2712 .iter()
2713 .flat_map(|je| je.lines.iter())
2714 .filter(|l| l.gl_account.starts_with("7100"))
2715 .map(|l| l.debit_amount)
2716 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2717
2718 let tax_paid: rust_decimal::Decimal = entries
2720 .iter()
2721 .flat_map(|je| je.lines.iter())
2722 .filter(|l| l.gl_account.starts_with("8000"))
2723 .map(|l| l.debit_amount)
2724 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2725
2726 let capex: rust_decimal::Decimal = entries
2728 .iter()
2729 .flat_map(|je| je.lines.iter())
2730 .filter(|l| l.gl_account.starts_with("1500"))
2731 .map(|l| l.debit_amount)
2732 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2733
2734 let dividends_paid: rust_decimal::Decimal = entries
2736 .iter()
2737 .flat_map(|je| je.lines.iter())
2738 .filter(|l| l.gl_account == "2170")
2739 .map(|l| l.debit_amount)
2740 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2741
2742 let cf_data = CashFlowSourceData {
2743 depreciation_total,
2744 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
2746 delta_ap: rust_decimal::Decimal::ZERO,
2747 delta_inventory: rust_decimal::Decimal::ZERO,
2748 capex,
2749 debt_issuance: rust_decimal::Decimal::ZERO,
2750 debt_repayment: rust_decimal::Decimal::ZERO,
2751 interest_paid,
2752 tax_paid,
2753 dividends_paid,
2754 framework: framework_str.to_string(),
2755 };
2756
2757 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2758 if !enhanced_cf_items.is_empty() {
2759 use datasynth_core::models::StatementType;
2761 let merge_count = enhanced_cf_items.len();
2762 for stmt in financial_reporting
2763 .financial_statements
2764 .iter_mut()
2765 .chain(financial_reporting.consolidated_statements.iter_mut())
2766 .chain(
2767 financial_reporting
2768 .standalone_statements
2769 .values_mut()
2770 .flat_map(|v| v.iter_mut()),
2771 )
2772 {
2773 if stmt.statement_type == StatementType::CashFlowStatement {
2774 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2775 }
2776 }
2777 info!(
2778 "Enhanced cash flow: {} supplementary items merged into CF statements",
2779 merge_count
2780 );
2781 }
2782 }
2783
2784 self.generate_notes_to_financial_statements(
2787 &mut financial_reporting,
2788 &accounting_standards,
2789 &tax,
2790 &hr,
2791 &audit,
2792 &treasury,
2793 );
2794
2795 if self.config.companies.len() >= 2 && !entries.is_empty() {
2799 let companies: Vec<(String, String)> = self
2800 .config
2801 .companies
2802 .iter()
2803 .map(|c| (c.code.clone(), c.name.clone()))
2804 .collect();
2805 let ic_elim: rust_decimal::Decimal =
2806 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2807 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2808 .unwrap_or(NaiveDate::MIN);
2809 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2810 let period_label = format!(
2811 "{}-{:02}",
2812 end_date.year(),
2813 (end_date - chrono::Days::new(1)).month()
2814 );
2815
2816 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2817 let (je_segments, je_recon) =
2818 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2819 if !je_segments.is_empty() {
2820 info!(
2821 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2822 je_segments.len(),
2823 ic_elim,
2824 );
2825 if financial_reporting.segment_reports.is_empty() {
2827 financial_reporting.segment_reports = je_segments;
2828 financial_reporting.segment_reconciliations = vec![je_recon];
2829 } else {
2830 financial_reporting.segment_reports.extend(je_segments);
2831 financial_reporting.segment_reconciliations.push(je_recon);
2832 }
2833 }
2834 }
2835
2836 let esg_snap =
2838 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2839
2840 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2842
2843 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2845
2846 let disruption_events = self.phase_disruption_events(&mut stats)?;
2848
2849 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2851
2852 let (entity_relationship_graph, cross_process_links) =
2854 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2855
2856 let industry_output = self.phase_industry_data(&mut stats);
2858
2859 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2861
2862 self.phase_hypergraph_export(
2864 &coa,
2865 &entries,
2866 &document_flows,
2867 &sourcing,
2868 &hr,
2869 &manufacturing_snap,
2870 &banking,
2871 &audit,
2872 &financial_reporting,
2873 &ocpm,
2874 &compliance_regulations,
2875 &mut stats,
2876 )?;
2877
2878 if self.phase_config.generate_graph_export {
2881 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2882 }
2883
2884 if self.config.streaming.enabled {
2886 info!("Note: streaming config is enabled but batch mode does not use it");
2887 }
2888 if self.config.vendor_network.enabled {
2889 debug!("Vendor network config available; relationship graph generation is partial");
2890 }
2891 if self.config.customer_segmentation.enabled {
2892 debug!("Customer segmentation config available; segment-aware generation is partial");
2893 }
2894
2895 let resource_stats = self.resource_guard.stats();
2897 info!(
2898 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2899 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2900 resource_stats.disk.estimated_bytes_written,
2901 resource_stats.degradation_level
2902 );
2903
2904 if let Some(ref sink) = self.phase_sink {
2906 if let Err(e) = sink.flush() {
2907 warn!("Stream sink flush failed: {e}");
2908 }
2909 }
2910
2911 let lineage = self.build_lineage_graph();
2913
2914 let gate_result = if self.config.quality_gates.enabled {
2916 let profile_name = &self.config.quality_gates.profile;
2917 match datasynth_eval::gates::get_profile(profile_name) {
2918 Some(profile) => {
2919 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2921
2922 if balance_validation.validated {
2924 eval.coherence.balance =
2925 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2926 equation_balanced: balance_validation.is_balanced,
2927 max_imbalance: (balance_validation.total_debits
2928 - balance_validation.total_credits)
2929 .abs(),
2930 periods_evaluated: 1,
2931 periods_imbalanced: if balance_validation.is_balanced {
2932 0
2933 } else {
2934 1
2935 },
2936 period_results: Vec::new(),
2937 companies_evaluated: self.config.companies.len(),
2938 });
2939 }
2940
2941 eval.coherence.passes = balance_validation.is_balanced;
2943 if !balance_validation.is_balanced {
2944 eval.coherence
2945 .failures
2946 .push("Balance sheet equation not satisfied".to_string());
2947 }
2948
2949 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2951 eval.statistical.passes = !entries.is_empty();
2952
2953 eval.quality.overall_score = 0.9; eval.quality.passes = true;
2956
2957 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2958 info!(
2959 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2960 profile_name, result.gates_passed, result.gates_total, result.summary
2961 );
2962 Some(result)
2963 }
2964 None => {
2965 warn!(
2966 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2967 profile_name
2968 );
2969 None
2970 }
2971 }
2972 } else {
2973 None
2974 };
2975
2976 let internal_controls = if self.config.internal_controls.enabled {
2978 InternalControl::standard_controls()
2979 } else {
2980 Vec::new()
2981 };
2982
2983 Ok(EnhancedGenerationResult {
2984 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2985 master_data: std::mem::take(&mut self.master_data),
2986 document_flows,
2987 subledger,
2988 ocpm,
2989 audit,
2990 banking,
2991 graph_export,
2992 sourcing,
2993 financial_reporting,
2994 hr,
2995 accounting_standards,
2996 manufacturing: manufacturing_snap,
2997 sales_kpi_budgets,
2998 tax,
2999 esg: esg_snap,
3000 treasury,
3001 project_accounting,
3002 process_evolution,
3003 organizational_events,
3004 disruption_events,
3005 intercompany,
3006 journal_entries: entries,
3007 anomaly_labels,
3008 balance_validation,
3009 data_quality_stats,
3010 quality_issues,
3011 statistics: stats,
3012 lineage: Some(lineage),
3013 gate_result,
3014 internal_controls,
3015 sod_violations,
3016 opening_balances,
3017 subledger_reconciliation,
3018 counterfactual_pairs,
3019 red_flags,
3020 collusion_rings,
3021 temporal_vendor_chains,
3022 entity_relationship_graph,
3023 cross_process_links,
3024 industry_output,
3025 compliance_regulations,
3026 })
3027 }
3028
3029 fn phase_chart_of_accounts(
3035 &mut self,
3036 stats: &mut EnhancedGenerationStatistics,
3037 ) -> SynthResult<Arc<ChartOfAccounts>> {
3038 info!("Phase 1: Generating Chart of Accounts");
3039 let coa = self.generate_coa()?;
3040 stats.accounts_count = coa.account_count();
3041 info!(
3042 "Chart of Accounts generated: {} accounts",
3043 stats.accounts_count
3044 );
3045 self.check_resources_with_log("post-coa")?;
3046 Ok(coa)
3047 }
3048
3049 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3051 if self.phase_config.generate_master_data {
3052 info!("Phase 2: Generating Master Data");
3053 self.generate_master_data()?;
3054 stats.vendor_count = self.master_data.vendors.len();
3055 stats.customer_count = self.master_data.customers.len();
3056 stats.material_count = self.master_data.materials.len();
3057 stats.asset_count = self.master_data.assets.len();
3058 stats.employee_count = self.master_data.employees.len();
3059 info!(
3060 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3061 stats.vendor_count, stats.customer_count, stats.material_count,
3062 stats.asset_count, stats.employee_count
3063 );
3064 self.check_resources_with_log("post-master-data")?;
3065 } else {
3066 debug!("Phase 2: Skipped (master data generation disabled)");
3067 }
3068 Ok(())
3069 }
3070
3071 fn phase_document_flows(
3073 &mut self,
3074 stats: &mut EnhancedGenerationStatistics,
3075 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3076 let mut document_flows = DocumentFlowSnapshot::default();
3077 let mut subledger = SubledgerSnapshot::default();
3078 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3081
3082 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3083 info!("Phase 3: Generating Document Flows");
3084 self.generate_document_flows(&mut document_flows)?;
3085 stats.p2p_chain_count = document_flows.p2p_chains.len();
3086 stats.o2c_chain_count = document_flows.o2c_chains.len();
3087 info!(
3088 "Document flows generated: {} P2P chains, {} O2C chains",
3089 stats.p2p_chain_count, stats.o2c_chain_count
3090 );
3091
3092 debug!("Phase 3b: Linking document flows to subledgers");
3094 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3095 stats.ap_invoice_count = subledger.ap_invoices.len();
3096 stats.ar_invoice_count = subledger.ar_invoices.len();
3097 debug!(
3098 "Subledgers linked: {} AP invoices, {} AR invoices",
3099 stats.ap_invoice_count, stats.ar_invoice_count
3100 );
3101
3102 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3107 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3108 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3109 debug!("Payment settlements applied to AP and AR subledgers");
3110
3111 if let Ok(start_date) =
3114 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3115 {
3116 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3117 - chrono::Days::new(1);
3118 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3119 for company in &self.config.companies {
3126 let ar_report = ARAgingReport::from_invoices(
3127 company.code.clone(),
3128 &subledger.ar_invoices,
3129 as_of_date,
3130 );
3131 subledger.ar_aging_reports.push(ar_report);
3132
3133 let ap_report = APAgingReport::from_invoices(
3134 company.code.clone(),
3135 &subledger.ap_invoices,
3136 as_of_date,
3137 );
3138 subledger.ap_aging_reports.push(ap_report);
3139 }
3140 debug!(
3141 "AR/AP aging reports built: {} AR, {} AP",
3142 subledger.ar_aging_reports.len(),
3143 subledger.ap_aging_reports.len()
3144 );
3145
3146 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3148 {
3149 use datasynth_generators::DunningGenerator;
3150 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3151 for company in &self.config.companies {
3152 let currency = company.currency.as_str();
3153 let mut company_invoices: Vec<
3156 datasynth_core::models::subledger::ar::ARInvoice,
3157 > = subledger
3158 .ar_invoices
3159 .iter()
3160 .filter(|inv| inv.company_code == company.code)
3161 .cloned()
3162 .collect();
3163
3164 if company_invoices.is_empty() {
3165 continue;
3166 }
3167
3168 let result = dunning_gen.execute_dunning_run(
3169 &company.code,
3170 as_of_date,
3171 &mut company_invoices,
3172 currency,
3173 );
3174
3175 for updated in &company_invoices {
3177 if let Some(orig) = subledger
3178 .ar_invoices
3179 .iter_mut()
3180 .find(|i| i.invoice_number == updated.invoice_number)
3181 {
3182 orig.dunning_info = updated.dunning_info.clone();
3183 }
3184 }
3185
3186 subledger.dunning_runs.push(result.dunning_run);
3187 subledger.dunning_letters.extend(result.letters);
3188 dunning_journal_entries.extend(result.journal_entries);
3190 }
3191 debug!(
3192 "Dunning runs complete: {} runs, {} letters",
3193 subledger.dunning_runs.len(),
3194 subledger.dunning_letters.len()
3195 );
3196 }
3197 }
3198
3199 self.check_resources_with_log("post-document-flows")?;
3200 } else {
3201 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3202 }
3203
3204 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3206 if !self.master_data.assets.is_empty() {
3207 debug!("Generating FA subledger records");
3208 let company_code = self
3209 .config
3210 .companies
3211 .first()
3212 .map(|c| c.code.as_str())
3213 .unwrap_or("1000");
3214 let currency = self
3215 .config
3216 .companies
3217 .first()
3218 .map(|c| c.currency.as_str())
3219 .unwrap_or("USD");
3220
3221 let mut fa_gen = datasynth_generators::FAGenerator::new(
3222 datasynth_generators::FAGeneratorConfig::default(),
3223 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3224 );
3225
3226 for asset in &self.master_data.assets {
3227 let (record, je) = fa_gen.generate_asset_acquisition(
3228 company_code,
3229 &format!("{:?}", asset.asset_class),
3230 &asset.description,
3231 asset.acquisition_date,
3232 currency,
3233 asset.cost_center.as_deref(),
3234 );
3235 subledger.fa_records.push(record);
3236 fa_journal_entries.push(je);
3237 }
3238
3239 stats.fa_subledger_count = subledger.fa_records.len();
3240 debug!(
3241 "FA subledger records generated: {} (with {} acquisition JEs)",
3242 stats.fa_subledger_count,
3243 fa_journal_entries.len()
3244 );
3245 }
3246
3247 if !self.master_data.materials.is_empty() {
3249 debug!("Generating Inventory subledger records");
3250 let first_company = self.config.companies.first();
3251 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3252 let inv_currency = first_company
3253 .map(|c| c.currency.clone())
3254 .unwrap_or_else(|| "USD".to_string());
3255
3256 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3257 datasynth_generators::InventoryGeneratorConfig::default(),
3258 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3259 inv_currency.clone(),
3260 );
3261
3262 for (i, material) in self.master_data.materials.iter().enumerate() {
3263 let plant = format!("PLANT{:02}", (i % 3) + 1);
3264 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3265 let initial_qty = rust_decimal::Decimal::from(
3266 material
3267 .safety_stock
3268 .to_string()
3269 .parse::<i64>()
3270 .unwrap_or(100),
3271 );
3272
3273 let position = inv_gen.generate_position(
3274 company_code,
3275 &plant,
3276 &storage_loc,
3277 &material.material_id,
3278 &material.description,
3279 initial_qty,
3280 Some(material.standard_cost),
3281 &inv_currency,
3282 );
3283 subledger.inventory_positions.push(position);
3284 }
3285
3286 stats.inventory_subledger_count = subledger.inventory_positions.len();
3287 debug!(
3288 "Inventory subledger records generated: {}",
3289 stats.inventory_subledger_count
3290 );
3291 }
3292
3293 if !subledger.fa_records.is_empty() {
3295 if let Ok(start_date) =
3296 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3297 {
3298 let company_code = self
3299 .config
3300 .companies
3301 .first()
3302 .map(|c| c.code.as_str())
3303 .unwrap_or("1000");
3304 let fiscal_year = start_date.year();
3305 let start_period = start_date.month();
3306 let end_period =
3307 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3308
3309 let depr_cfg = FaDepreciationScheduleConfig {
3310 fiscal_year,
3311 start_period,
3312 end_period,
3313 seed_offset: 800,
3314 };
3315 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3316 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3317 let run_count = runs.len();
3318 subledger.depreciation_runs = runs;
3319 debug!(
3320 "Depreciation runs generated: {} runs for {} periods",
3321 run_count, self.config.global.period_months
3322 );
3323 }
3324 }
3325
3326 if !subledger.inventory_positions.is_empty() {
3328 if let Ok(start_date) =
3329 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3330 {
3331 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3332 - chrono::Days::new(1);
3333
3334 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3335 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3336
3337 for company in &self.config.companies {
3338 let result = inv_val_gen.generate(
3339 &company.code,
3340 &subledger.inventory_positions,
3341 as_of_date,
3342 );
3343 subledger.inventory_valuations.push(result);
3344 }
3345 debug!(
3346 "Inventory valuations generated: {} company reports",
3347 subledger.inventory_valuations.len()
3348 );
3349 }
3350 }
3351
3352 Ok((document_flows, subledger, fa_journal_entries))
3353 }
3354
3355 #[allow(clippy::too_many_arguments)]
3357 fn phase_ocpm_events(
3358 &mut self,
3359 document_flows: &DocumentFlowSnapshot,
3360 sourcing: &SourcingSnapshot,
3361 hr: &HrSnapshot,
3362 manufacturing: &ManufacturingSnapshot,
3363 banking: &BankingSnapshot,
3364 audit: &AuditSnapshot,
3365 financial_reporting: &FinancialReportingSnapshot,
3366 stats: &mut EnhancedGenerationStatistics,
3367 ) -> SynthResult<OcpmSnapshot> {
3368 let degradation = self.check_resources()?;
3369 if degradation >= DegradationLevel::Reduced {
3370 debug!(
3371 "Phase skipped due to resource pressure (degradation: {:?})",
3372 degradation
3373 );
3374 return Ok(OcpmSnapshot::default());
3375 }
3376 if self.phase_config.generate_ocpm_events {
3377 info!("Phase 3c: Generating OCPM Events");
3378 let ocpm_snapshot = self.generate_ocpm_events(
3379 document_flows,
3380 sourcing,
3381 hr,
3382 manufacturing,
3383 banking,
3384 audit,
3385 financial_reporting,
3386 )?;
3387 stats.ocpm_event_count = ocpm_snapshot.event_count;
3388 stats.ocpm_object_count = ocpm_snapshot.object_count;
3389 stats.ocpm_case_count = ocpm_snapshot.case_count;
3390 info!(
3391 "OCPM events generated: {} events, {} objects, {} cases",
3392 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3393 );
3394 self.check_resources_with_log("post-ocpm")?;
3395 Ok(ocpm_snapshot)
3396 } else {
3397 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3398 Ok(OcpmSnapshot::default())
3399 }
3400 }
3401
3402 fn phase_journal_entries(
3404 &mut self,
3405 coa: &Arc<ChartOfAccounts>,
3406 document_flows: &DocumentFlowSnapshot,
3407 _stats: &mut EnhancedGenerationStatistics,
3408 ) -> SynthResult<Vec<JournalEntry>> {
3409 let mut entries = Vec::new();
3410
3411 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3413 debug!("Phase 4a: Generating JEs from document flows");
3414 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3415 debug!("Generated {} JEs from document flows", flow_entries.len());
3416 entries.extend(flow_entries);
3417 }
3418
3419 if self.phase_config.generate_journal_entries {
3421 info!("Phase 4: Generating Journal Entries");
3422 let je_entries = self.generate_journal_entries(coa)?;
3423 info!("Generated {} standalone journal entries", je_entries.len());
3424 entries.extend(je_entries);
3425 } else {
3426 debug!("Phase 4: Skipped (journal entry generation disabled)");
3427 }
3428
3429 if !entries.is_empty() {
3430 self.check_resources_with_log("post-journal-entries")?;
3433 }
3434
3435 Ok(entries)
3436 }
3437
3438 fn phase_anomaly_injection(
3440 &mut self,
3441 entries: &mut [JournalEntry],
3442 actions: &DegradationActions,
3443 stats: &mut EnhancedGenerationStatistics,
3444 ) -> SynthResult<AnomalyLabels> {
3445 if self.phase_config.inject_anomalies
3446 && !entries.is_empty()
3447 && !actions.skip_anomaly_injection
3448 {
3449 info!("Phase 5: Injecting Anomalies");
3450 let result = self.inject_anomalies(entries)?;
3451 stats.anomalies_injected = result.labels.len();
3452 info!("Injected {} anomalies", stats.anomalies_injected);
3453 self.check_resources_with_log("post-anomaly-injection")?;
3454 Ok(result)
3455 } else if actions.skip_anomaly_injection {
3456 warn!("Phase 5: Skipped due to resource degradation");
3457 Ok(AnomalyLabels::default())
3458 } else {
3459 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3460 Ok(AnomalyLabels::default())
3461 }
3462 }
3463
3464 fn phase_balance_validation(
3466 &mut self,
3467 entries: &[JournalEntry],
3468 ) -> SynthResult<BalanceValidationResult> {
3469 if self.phase_config.validate_balances && !entries.is_empty() {
3470 debug!("Phase 6: Validating Balances");
3471 let balance_validation = self.validate_journal_entries(entries)?;
3472 if balance_validation.is_balanced {
3473 debug!("Balance validation passed");
3474 } else {
3475 warn!(
3476 "Balance validation found {} errors",
3477 balance_validation.validation_errors.len()
3478 );
3479 }
3480 Ok(balance_validation)
3481 } else {
3482 Ok(BalanceValidationResult::default())
3483 }
3484 }
3485
3486 fn phase_data_quality_injection(
3488 &mut self,
3489 entries: &mut [JournalEntry],
3490 actions: &DegradationActions,
3491 stats: &mut EnhancedGenerationStatistics,
3492 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3493 if self.phase_config.inject_data_quality
3494 && !entries.is_empty()
3495 && !actions.skip_data_quality
3496 {
3497 info!("Phase 7: Injecting Data Quality Variations");
3498 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3499 stats.data_quality_issues = dq_stats.records_with_issues;
3500 info!("Injected {} data quality issues", stats.data_quality_issues);
3501 self.check_resources_with_log("post-data-quality")?;
3502 Ok((dq_stats, quality_issues))
3503 } else if actions.skip_data_quality {
3504 warn!("Phase 7: Skipped due to resource degradation");
3505 Ok((DataQualityStats::default(), Vec::new()))
3506 } else {
3507 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3508 Ok((DataQualityStats::default(), Vec::new()))
3509 }
3510 }
3511
3512 fn phase_period_close(
3522 &mut self,
3523 entries: &mut Vec<JournalEntry>,
3524 subledger: &SubledgerSnapshot,
3525 stats: &mut EnhancedGenerationStatistics,
3526 ) -> SynthResult<()> {
3527 if !self.phase_config.generate_period_close || entries.is_empty() {
3528 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3529 return Ok(());
3530 }
3531
3532 info!("Phase 10b: Generating period-close journal entries");
3533
3534 use datasynth_core::accounts::{
3535 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3536 };
3537 use rust_decimal::Decimal;
3538
3539 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3540 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3541 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3542 let close_date = end_date - chrono::Days::new(1);
3544
3545 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3550 .config
3551 .companies
3552 .iter()
3553 .map(|c| c.code.clone())
3554 .collect();
3555
3556 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3558 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3559
3560 let period_months = self.config.global.period_months;
3564 for asset in &subledger.fa_records {
3565 use datasynth_core::models::subledger::fa::AssetStatus;
3567 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3568 continue;
3569 }
3570 let useful_life_months = asset.useful_life_months();
3571 if useful_life_months == 0 {
3572 continue;
3574 }
3575 let salvage_value = asset.salvage_value();
3576 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3577 if depreciable_base == Decimal::ZERO {
3578 continue;
3579 }
3580 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3581 * Decimal::from(period_months))
3582 .round_dp(2);
3583 if period_depr <= Decimal::ZERO {
3584 continue;
3585 }
3586
3587 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3588 depr_header.document_type = "CL".to_string();
3589 depr_header.header_text = Some(format!(
3590 "Depreciation - {} {}",
3591 asset.asset_number, asset.description
3592 ));
3593 depr_header.created_by = "CLOSE_ENGINE".to_string();
3594 depr_header.source = TransactionSource::Automated;
3595 depr_header.business_process = Some(BusinessProcess::R2R);
3596
3597 let doc_id = depr_header.document_id;
3598 let mut depr_je = JournalEntry::new(depr_header);
3599
3600 depr_je.add_line(JournalEntryLine::debit(
3602 doc_id,
3603 1,
3604 expense_accounts::DEPRECIATION.to_string(),
3605 period_depr,
3606 ));
3607 depr_je.add_line(JournalEntryLine::credit(
3609 doc_id,
3610 2,
3611 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3612 period_depr,
3613 ));
3614
3615 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3616 close_jes.push(depr_je);
3617 }
3618
3619 if !subledger.fa_records.is_empty() {
3620 debug!(
3621 "Generated {} depreciation JEs from {} FA records",
3622 close_jes.len(),
3623 subledger.fa_records.len()
3624 );
3625 }
3626
3627 {
3631 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3632 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3633
3634 let accrual_items: &[(&str, &str, &str)] = &[
3636 ("Accrued Utilities", "6200", "2100"),
3637 ("Accrued Rent", "6300", "2100"),
3638 ("Accrued Interest", "6100", "2150"),
3639 ];
3640
3641 for company_code in &company_codes {
3642 let company_revenue: Decimal = entries
3644 .iter()
3645 .filter(|e| e.header.company_code == *company_code)
3646 .flat_map(|e| e.lines.iter())
3647 .filter(|l| l.gl_account.starts_with('4'))
3648 .map(|l| l.credit_amount - l.debit_amount)
3649 .fold(Decimal::ZERO, |acc, v| acc + v);
3650
3651 if company_revenue <= Decimal::ZERO {
3652 continue;
3653 }
3654
3655 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3657 if accrual_base <= Decimal::ZERO {
3658 continue;
3659 }
3660
3661 for (description, expense_acct, liability_acct) in accrual_items {
3662 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3663 company_code,
3664 description,
3665 accrual_base,
3666 expense_acct,
3667 liability_acct,
3668 close_date,
3669 None,
3670 );
3671 close_jes.push(accrual_je);
3672 if let Some(rev_je) = reversal_je {
3673 close_jes.push(rev_je);
3674 }
3675 }
3676 }
3677
3678 debug!(
3679 "Generated accrual entries for {} companies",
3680 company_codes.len()
3681 );
3682 }
3683
3684 for company_code in &company_codes {
3685 let mut total_revenue = Decimal::ZERO;
3690 let mut total_expenses = Decimal::ZERO;
3691
3692 for entry in entries.iter() {
3693 if entry.header.company_code != *company_code {
3694 continue;
3695 }
3696 for line in &entry.lines {
3697 let category = AccountCategory::from_account(&line.gl_account);
3698 match category {
3699 AccountCategory::Revenue => {
3700 total_revenue += line.credit_amount - line.debit_amount;
3702 }
3703 AccountCategory::Cogs
3704 | AccountCategory::OperatingExpense
3705 | AccountCategory::OtherIncomeExpense
3706 | AccountCategory::Tax => {
3707 total_expenses += line.debit_amount - line.credit_amount;
3709 }
3710 _ => {}
3711 }
3712 }
3713 }
3714
3715 let pre_tax_income = total_revenue - total_expenses;
3716
3717 if pre_tax_income == Decimal::ZERO {
3719 debug!(
3720 "Company {}: no pre-tax income, skipping period close",
3721 company_code
3722 );
3723 continue;
3724 }
3725
3726 if pre_tax_income > Decimal::ZERO {
3728 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3730
3731 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3732 tax_header.document_type = "CL".to_string();
3733 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3734 tax_header.created_by = "CLOSE_ENGINE".to_string();
3735 tax_header.source = TransactionSource::Automated;
3736 tax_header.business_process = Some(BusinessProcess::R2R);
3737
3738 let doc_id = tax_header.document_id;
3739 let mut tax_je = JournalEntry::new(tax_header);
3740
3741 tax_je.add_line(JournalEntryLine::debit(
3743 doc_id,
3744 1,
3745 tax_accounts::TAX_EXPENSE.to_string(),
3746 tax_amount,
3747 ));
3748 tax_je.add_line(JournalEntryLine::credit(
3750 doc_id,
3751 2,
3752 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3753 tax_amount,
3754 ));
3755
3756 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3757 close_jes.push(tax_je);
3758 } else {
3759 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3762 if dta_amount > Decimal::ZERO {
3763 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3764 dta_header.document_type = "CL".to_string();
3765 dta_header.header_text =
3766 Some(format!("Deferred tax asset (DTA) - {}", company_code));
3767 dta_header.created_by = "CLOSE_ENGINE".to_string();
3768 dta_header.source = TransactionSource::Automated;
3769 dta_header.business_process = Some(BusinessProcess::R2R);
3770
3771 let doc_id = dta_header.document_id;
3772 let mut dta_je = JournalEntry::new(dta_header);
3773
3774 dta_je.add_line(JournalEntryLine::debit(
3776 doc_id,
3777 1,
3778 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3779 dta_amount,
3780 ));
3781 dta_je.add_line(JournalEntryLine::credit(
3784 doc_id,
3785 2,
3786 tax_accounts::TAX_EXPENSE.to_string(),
3787 dta_amount,
3788 ));
3789
3790 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3791 close_jes.push(dta_je);
3792 debug!(
3793 "Company {}: loss year — recognised DTA of {}",
3794 company_code, dta_amount
3795 );
3796 }
3797 }
3798
3799 let tax_provision = if pre_tax_income > Decimal::ZERO {
3805 (pre_tax_income * tax_rate).round_dp(2)
3806 } else {
3807 Decimal::ZERO
3808 };
3809 let net_income = pre_tax_income - tax_provision;
3810
3811 if net_income > Decimal::ZERO {
3812 use datasynth_generators::DividendGenerator;
3813 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
3815 let currency_str = self
3816 .config
3817 .companies
3818 .iter()
3819 .find(|c| c.code == *company_code)
3820 .map(|c| c.currency.as_str())
3821 .unwrap_or("USD");
3822 let div_result = div_gen.generate(
3823 company_code,
3824 close_date,
3825 Decimal::new(1, 0), dividend_amount,
3827 currency_str,
3828 );
3829 let div_je_count = div_result.journal_entries.len();
3830 close_jes.extend(div_result.journal_entries);
3831 debug!(
3832 "Company {}: declared dividend of {} ({} JEs)",
3833 company_code, dividend_amount, div_je_count
3834 );
3835 }
3836
3837 if net_income != Decimal::ZERO {
3842 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3843 close_header.document_type = "CL".to_string();
3844 close_header.header_text =
3845 Some(format!("Income statement close - {}", company_code));
3846 close_header.created_by = "CLOSE_ENGINE".to_string();
3847 close_header.source = TransactionSource::Automated;
3848 close_header.business_process = Some(BusinessProcess::R2R);
3849
3850 let doc_id = close_header.document_id;
3851 let mut close_je = JournalEntry::new(close_header);
3852
3853 let abs_net_income = net_income.abs();
3854
3855 if net_income > Decimal::ZERO {
3856 close_je.add_line(JournalEntryLine::debit(
3858 doc_id,
3859 1,
3860 equity_accounts::INCOME_SUMMARY.to_string(),
3861 abs_net_income,
3862 ));
3863 close_je.add_line(JournalEntryLine::credit(
3864 doc_id,
3865 2,
3866 equity_accounts::RETAINED_EARNINGS.to_string(),
3867 abs_net_income,
3868 ));
3869 } else {
3870 close_je.add_line(JournalEntryLine::debit(
3872 doc_id,
3873 1,
3874 equity_accounts::RETAINED_EARNINGS.to_string(),
3875 abs_net_income,
3876 ));
3877 close_je.add_line(JournalEntryLine::credit(
3878 doc_id,
3879 2,
3880 equity_accounts::INCOME_SUMMARY.to_string(),
3881 abs_net_income,
3882 ));
3883 }
3884
3885 debug_assert!(
3886 close_je.is_balanced(),
3887 "Income statement closing JE must be balanced"
3888 );
3889 close_jes.push(close_je);
3890 }
3891 }
3892
3893 let close_count = close_jes.len();
3894 if close_count > 0 {
3895 info!("Generated {} period-close journal entries", close_count);
3896 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3897 entries.extend(close_jes);
3898 stats.period_close_je_count = close_count;
3899
3900 stats.total_entries = entries.len() as u64;
3902 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3903 } else {
3904 debug!("No period-close entries generated (no income statement activity)");
3905 }
3906
3907 Ok(())
3908 }
3909
3910 fn phase_audit_data(
3912 &mut self,
3913 entries: &[JournalEntry],
3914 stats: &mut EnhancedGenerationStatistics,
3915 ) -> SynthResult<AuditSnapshot> {
3916 if self.phase_config.generate_audit {
3917 info!("Phase 8: Generating Audit Data");
3918 let audit_snapshot = self.generate_audit_data(entries)?;
3919 stats.audit_engagement_count = audit_snapshot.engagements.len();
3920 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3921 stats.audit_evidence_count = audit_snapshot.evidence.len();
3922 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3923 stats.audit_finding_count = audit_snapshot.findings.len();
3924 stats.audit_judgment_count = audit_snapshot.judgments.len();
3925 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3926 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3927 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3928 stats.audit_sample_count = audit_snapshot.samples.len();
3929 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3930 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3931 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3932 stats.audit_related_party_count = audit_snapshot.related_parties.len();
3933 stats.audit_related_party_transaction_count =
3934 audit_snapshot.related_party_transactions.len();
3935 info!(
3936 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3937 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3938 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3939 {} RP transactions",
3940 stats.audit_engagement_count,
3941 stats.audit_workpaper_count,
3942 stats.audit_evidence_count,
3943 stats.audit_risk_count,
3944 stats.audit_finding_count,
3945 stats.audit_judgment_count,
3946 stats.audit_confirmation_count,
3947 stats.audit_procedure_step_count,
3948 stats.audit_sample_count,
3949 stats.audit_analytical_result_count,
3950 stats.audit_ia_function_count,
3951 stats.audit_ia_report_count,
3952 stats.audit_related_party_count,
3953 stats.audit_related_party_transaction_count,
3954 );
3955 self.check_resources_with_log("post-audit")?;
3956 Ok(audit_snapshot)
3957 } else {
3958 debug!("Phase 8: Skipped (audit generation disabled)");
3959 Ok(AuditSnapshot::default())
3960 }
3961 }
3962
3963 fn phase_banking_data(
3965 &mut self,
3966 stats: &mut EnhancedGenerationStatistics,
3967 ) -> SynthResult<BankingSnapshot> {
3968 if self.phase_config.generate_banking {
3969 info!("Phase 9: Generating Banking KYC/AML Data");
3970 let banking_snapshot = self.generate_banking_data()?;
3971 stats.banking_customer_count = banking_snapshot.customers.len();
3972 stats.banking_account_count = banking_snapshot.accounts.len();
3973 stats.banking_transaction_count = banking_snapshot.transactions.len();
3974 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3975 info!(
3976 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3977 stats.banking_customer_count, stats.banking_account_count,
3978 stats.banking_transaction_count, stats.banking_suspicious_count
3979 );
3980 self.check_resources_with_log("post-banking")?;
3981 Ok(banking_snapshot)
3982 } else {
3983 debug!("Phase 9: Skipped (banking generation disabled)");
3984 Ok(BankingSnapshot::default())
3985 }
3986 }
3987
3988 fn phase_graph_export(
3990 &mut self,
3991 entries: &[JournalEntry],
3992 coa: &Arc<ChartOfAccounts>,
3993 stats: &mut EnhancedGenerationStatistics,
3994 ) -> SynthResult<GraphExportSnapshot> {
3995 if self.phase_config.generate_graph_export && !entries.is_empty() {
3996 info!("Phase 10: Exporting Accounting Network Graphs");
3997 match self.export_graphs(entries, coa, stats) {
3998 Ok(snapshot) => {
3999 info!(
4000 "Graph export complete: {} graphs ({} nodes, {} edges)",
4001 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4002 );
4003 Ok(snapshot)
4004 }
4005 Err(e) => {
4006 warn!("Phase 10: Graph export failed: {}", e);
4007 Ok(GraphExportSnapshot::default())
4008 }
4009 }
4010 } else {
4011 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4012 Ok(GraphExportSnapshot::default())
4013 }
4014 }
4015
4016 #[allow(clippy::too_many_arguments)]
4018 fn phase_hypergraph_export(
4019 &self,
4020 coa: &Arc<ChartOfAccounts>,
4021 entries: &[JournalEntry],
4022 document_flows: &DocumentFlowSnapshot,
4023 sourcing: &SourcingSnapshot,
4024 hr: &HrSnapshot,
4025 manufacturing: &ManufacturingSnapshot,
4026 banking: &BankingSnapshot,
4027 audit: &AuditSnapshot,
4028 financial_reporting: &FinancialReportingSnapshot,
4029 ocpm: &OcpmSnapshot,
4030 compliance: &ComplianceRegulationsSnapshot,
4031 stats: &mut EnhancedGenerationStatistics,
4032 ) -> SynthResult<()> {
4033 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4034 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4035 match self.export_hypergraph(
4036 coa,
4037 entries,
4038 document_flows,
4039 sourcing,
4040 hr,
4041 manufacturing,
4042 banking,
4043 audit,
4044 financial_reporting,
4045 ocpm,
4046 compliance,
4047 stats,
4048 ) {
4049 Ok(info) => {
4050 info!(
4051 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4052 info.node_count, info.edge_count, info.hyperedge_count
4053 );
4054 }
4055 Err(e) => {
4056 warn!("Phase 10b: Hypergraph export failed: {}", e);
4057 }
4058 }
4059 } else {
4060 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4061 }
4062 Ok(())
4063 }
4064
4065 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4071 if !self.config.llm.enabled {
4072 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4073 return;
4074 }
4075
4076 info!("Phase 11: Starting LLM Enrichment");
4077 let start = std::time::Instant::now();
4078
4079 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4080 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4083 let schema_provider = &self.config.llm.provider;
4084 let api_key_env = match schema_provider.as_str() {
4085 "openai" => Some("OPENAI_API_KEY"),
4086 "anthropic" => Some("ANTHROPIC_API_KEY"),
4087 "custom" => Some("LLM_API_KEY"),
4088 _ => None,
4089 };
4090 if let Some(key_env) = api_key_env {
4091 if std::env::var(key_env).is_ok() {
4092 let llm_config = datasynth_core::llm::LlmConfig {
4093 model: self.config.llm.model.clone(),
4094 api_key_env: key_env.to_string(),
4095 ..datasynth_core::llm::LlmConfig::default()
4096 };
4097 match HttpLlmProvider::new(llm_config) {
4098 Ok(p) => Arc::new(p),
4099 Err(e) => {
4100 warn!(
4101 "Failed to create HttpLlmProvider: {}; falling back to mock",
4102 e
4103 );
4104 Arc::new(MockLlmProvider::new(self.seed))
4105 }
4106 }
4107 } else {
4108 Arc::new(MockLlmProvider::new(self.seed))
4109 }
4110 } else {
4111 Arc::new(MockLlmProvider::new(self.seed))
4112 }
4113 };
4114 let enricher = VendorLlmEnricher::new(provider);
4115
4116 let industry = format!("{:?}", self.config.global.industry);
4117 let max_enrichments = self
4118 .config
4119 .llm
4120 .max_vendor_enrichments
4121 .min(self.master_data.vendors.len());
4122
4123 let mut enriched_count = 0usize;
4124 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4125 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4126 Ok(name) => {
4127 vendor.name = name;
4128 enriched_count += 1;
4129 }
4130 Err(e) => {
4131 warn!(
4132 "LLM vendor enrichment failed for {}: {}",
4133 vendor.vendor_id, e
4134 );
4135 }
4136 }
4137 }
4138
4139 enriched_count
4140 }));
4141
4142 match result {
4143 Ok(enriched_count) => {
4144 stats.llm_vendors_enriched = enriched_count;
4145 let elapsed = start.elapsed();
4146 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4147 info!(
4148 "Phase 11 complete: {} vendors enriched in {}ms",
4149 enriched_count, stats.llm_enrichment_ms
4150 );
4151 }
4152 Err(_) => {
4153 let elapsed = start.elapsed();
4154 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4155 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4156 }
4157 }
4158 }
4159
4160 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4166 if !self.config.diffusion.enabled {
4167 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4168 return;
4169 }
4170
4171 info!("Phase 12: Starting Diffusion Enhancement");
4172 let start = std::time::Instant::now();
4173
4174 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4175 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4178
4179 let diffusion_config = DiffusionConfig {
4180 n_steps: self.config.diffusion.n_steps,
4181 seed: self.seed,
4182 ..Default::default()
4183 };
4184
4185 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4186
4187 let n_samples = self.config.diffusion.sample_size;
4188 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
4190
4191 samples.len()
4192 }));
4193
4194 match result {
4195 Ok(sample_count) => {
4196 stats.diffusion_samples_generated = sample_count;
4197 let elapsed = start.elapsed();
4198 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4199 info!(
4200 "Phase 12 complete: {} diffusion samples generated in {}ms",
4201 sample_count, stats.diffusion_enhancement_ms
4202 );
4203 }
4204 Err(_) => {
4205 let elapsed = start.elapsed();
4206 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4207 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4208 }
4209 }
4210 }
4211
4212 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4219 if !self.config.causal.enabled {
4220 debug!("Phase 13: Skipped (causal generation disabled)");
4221 return;
4222 }
4223
4224 info!("Phase 13: Starting Causal Overlay");
4225 let start = std::time::Instant::now();
4226
4227 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4228 let graph = match self.config.causal.template.as_str() {
4230 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4231 _ => CausalGraph::fraud_detection_template(),
4232 };
4233
4234 let scm = StructuralCausalModel::new(graph.clone())
4235 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4236
4237 let n_samples = self.config.causal.sample_size;
4238 let samples = scm
4239 .generate(n_samples, self.seed)
4240 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4241
4242 let validation_passed = if self.config.causal.validate {
4244 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4245 if report.valid {
4246 info!(
4247 "Causal validation passed: all {} checks OK",
4248 report.checks.len()
4249 );
4250 } else {
4251 warn!(
4252 "Causal validation: {} violations detected: {:?}",
4253 report.violations.len(),
4254 report.violations
4255 );
4256 }
4257 Some(report.valid)
4258 } else {
4259 None
4260 };
4261
4262 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4263 }));
4264
4265 match result {
4266 Ok(Ok((sample_count, validation_passed))) => {
4267 stats.causal_samples_generated = sample_count;
4268 stats.causal_validation_passed = validation_passed;
4269 let elapsed = start.elapsed();
4270 stats.causal_generation_ms = elapsed.as_millis() as u64;
4271 info!(
4272 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4273 sample_count, stats.causal_generation_ms, validation_passed,
4274 );
4275 }
4276 Ok(Err(e)) => {
4277 let elapsed = start.elapsed();
4278 stats.causal_generation_ms = elapsed.as_millis() as u64;
4279 warn!("Phase 13: Causal generation failed: {}", e);
4280 }
4281 Err(_) => {
4282 let elapsed = start.elapsed();
4283 stats.causal_generation_ms = elapsed.as_millis() as u64;
4284 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4285 }
4286 }
4287 }
4288
4289 fn phase_sourcing_data(
4291 &mut self,
4292 stats: &mut EnhancedGenerationStatistics,
4293 ) -> SynthResult<SourcingSnapshot> {
4294 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4295 debug!("Phase 14: Skipped (sourcing generation disabled)");
4296 return Ok(SourcingSnapshot::default());
4297 }
4298 let degradation = self.check_resources()?;
4299 if degradation >= DegradationLevel::Reduced {
4300 debug!(
4301 "Phase skipped due to resource pressure (degradation: {:?})",
4302 degradation
4303 );
4304 return Ok(SourcingSnapshot::default());
4305 }
4306
4307 info!("Phase 14: Generating S2C Sourcing Data");
4308 let seed = self.seed;
4309
4310 let vendor_ids: Vec<String> = self
4312 .master_data
4313 .vendors
4314 .iter()
4315 .map(|v| v.vendor_id.clone())
4316 .collect();
4317 if vendor_ids.is_empty() {
4318 debug!("Phase 14: Skipped (no vendors available)");
4319 return Ok(SourcingSnapshot::default());
4320 }
4321
4322 let categories: Vec<(String, String)> = vec![
4323 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4324 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4325 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4326 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4327 ("CAT-LOG".to_string(), "Logistics".to_string()),
4328 ];
4329 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4330 .iter()
4331 .map(|(id, name)| {
4332 (
4333 id.clone(),
4334 name.clone(),
4335 rust_decimal::Decimal::from(100_000),
4336 )
4337 })
4338 .collect();
4339
4340 let company_code = self
4341 .config
4342 .companies
4343 .first()
4344 .map(|c| c.code.as_str())
4345 .unwrap_or("1000");
4346 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4347 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4348 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4349 let fiscal_year = start_date.year() as u16;
4350 let owner_ids: Vec<String> = self
4351 .master_data
4352 .employees
4353 .iter()
4354 .take(5)
4355 .map(|e| e.employee_id.clone())
4356 .collect();
4357 let owner_id = owner_ids
4358 .first()
4359 .map(std::string::String::as_str)
4360 .unwrap_or("BUYER-001");
4361
4362 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4364 let spend_analyses =
4365 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4366
4367 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4369 let sourcing_projects = if owner_ids.is_empty() {
4370 Vec::new()
4371 } else {
4372 project_gen.generate(
4373 company_code,
4374 &categories_with_spend,
4375 &owner_ids,
4376 start_date,
4377 self.config.global.period_months,
4378 )
4379 };
4380 stats.sourcing_project_count = sourcing_projects.len();
4381
4382 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4384 let mut qual_gen = QualificationGenerator::new(seed + 2);
4385 let qualifications = qual_gen.generate(
4386 company_code,
4387 &qual_vendor_ids,
4388 sourcing_projects.first().map(|p| p.project_id.as_str()),
4389 owner_id,
4390 start_date,
4391 );
4392
4393 let mut rfx_gen = RfxGenerator::new(seed + 3);
4395 let rfx_events: Vec<RfxEvent> = sourcing_projects
4396 .iter()
4397 .map(|proj| {
4398 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4399 rfx_gen.generate(
4400 company_code,
4401 &proj.project_id,
4402 &proj.category_id,
4403 &qualified_vids,
4404 owner_id,
4405 start_date,
4406 50000.0,
4407 )
4408 })
4409 .collect();
4410 stats.rfx_event_count = rfx_events.len();
4411
4412 let mut bid_gen = BidGenerator::new(seed + 4);
4414 let mut all_bids = Vec::new();
4415 for rfx in &rfx_events {
4416 let bidder_count = vendor_ids.len().clamp(2, 5);
4417 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4418 let bids = bid_gen.generate(rfx, &responding, start_date);
4419 all_bids.extend(bids);
4420 }
4421 stats.bid_count = all_bids.len();
4422
4423 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4425 let bid_evaluations: Vec<BidEvaluation> = rfx_events
4426 .iter()
4427 .map(|rfx| {
4428 let rfx_bids: Vec<SupplierBid> = all_bids
4429 .iter()
4430 .filter(|b| b.rfx_id == rfx.rfx_id)
4431 .cloned()
4432 .collect();
4433 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4434 })
4435 .collect();
4436
4437 let mut contract_gen = ContractGenerator::new(seed + 6);
4439 let contracts: Vec<ProcurementContract> = bid_evaluations
4440 .iter()
4441 .zip(rfx_events.iter())
4442 .filter_map(|(eval, rfx)| {
4443 eval.ranked_bids.first().and_then(|winner| {
4444 all_bids
4445 .iter()
4446 .find(|b| b.bid_id == winner.bid_id)
4447 .map(|winning_bid| {
4448 contract_gen.generate_from_bid(
4449 winning_bid,
4450 Some(&rfx.sourcing_project_id),
4451 &rfx.category_id,
4452 owner_id,
4453 start_date,
4454 )
4455 })
4456 })
4457 })
4458 .collect();
4459 stats.contract_count = contracts.len();
4460
4461 let mut catalog_gen = CatalogGenerator::new(seed + 7);
4463 let catalog_items = catalog_gen.generate(&contracts);
4464 stats.catalog_item_count = catalog_items.len();
4465
4466 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4468 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4469 .iter()
4470 .fold(
4471 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4472 |mut acc, c| {
4473 acc.entry(c.vendor_id.clone()).or_default().push(c);
4474 acc
4475 },
4476 )
4477 .into_iter()
4478 .collect();
4479 let scorecards = scorecard_gen.generate(
4480 company_code,
4481 &vendor_contracts,
4482 start_date,
4483 end_date,
4484 owner_id,
4485 );
4486 stats.scorecard_count = scorecards.len();
4487
4488 let mut sourcing_projects = sourcing_projects;
4491 for project in &mut sourcing_projects {
4492 project.rfx_ids = rfx_events
4494 .iter()
4495 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4496 .map(|rfx| rfx.rfx_id.clone())
4497 .collect();
4498
4499 project.contract_id = contracts
4501 .iter()
4502 .find(|c| {
4503 c.sourcing_project_id
4504 .as_deref()
4505 .is_some_and(|sp| sp == project.project_id)
4506 })
4507 .map(|c| c.contract_id.clone());
4508
4509 project.spend_analysis_id = spend_analyses
4511 .iter()
4512 .find(|sa| sa.category_id == project.category_id)
4513 .map(|sa| sa.category_id.clone());
4514 }
4515
4516 info!(
4517 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4518 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4519 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4520 );
4521 self.check_resources_with_log("post-sourcing")?;
4522
4523 Ok(SourcingSnapshot {
4524 spend_analyses,
4525 sourcing_projects,
4526 qualifications,
4527 rfx_events,
4528 bids: all_bids,
4529 bid_evaluations,
4530 contracts,
4531 catalog_items,
4532 scorecards,
4533 })
4534 }
4535
4536 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4542 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4543
4544 let parent_code = self
4545 .config
4546 .companies
4547 .first()
4548 .map(|c| c.code.clone())
4549 .unwrap_or_else(|| "PARENT".to_string());
4550
4551 let mut group = GroupStructure::new(parent_code);
4552
4553 for company in self.config.companies.iter().skip(1) {
4554 let sub =
4555 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4556 group.add_subsidiary(sub);
4557 }
4558
4559 group
4560 }
4561
4562 fn phase_intercompany(
4564 &mut self,
4565 journal_entries: &[JournalEntry],
4566 stats: &mut EnhancedGenerationStatistics,
4567 ) -> SynthResult<IntercompanySnapshot> {
4568 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4570 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4571 return Ok(IntercompanySnapshot::default());
4572 }
4573
4574 if self.config.companies.len() < 2 {
4576 debug!(
4577 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4578 self.config.companies.len()
4579 );
4580 return Ok(IntercompanySnapshot::default());
4581 }
4582
4583 info!("Phase 14b: Generating Intercompany Transactions");
4584
4585 let group_structure = self.build_group_structure();
4588 debug!(
4589 "Group structure built: parent={}, subsidiaries={}",
4590 group_structure.parent_entity,
4591 group_structure.subsidiaries.len()
4592 );
4593
4594 let seed = self.seed;
4595 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4596 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4597 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4598
4599 let parent_code = self.config.companies[0].code.clone();
4602 let mut ownership_structure =
4603 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4604
4605 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4606 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4607 format!("REL{:03}", i + 1),
4608 parent_code.clone(),
4609 company.code.clone(),
4610 rust_decimal::Decimal::from(100), start_date,
4612 );
4613 ownership_structure.add_relationship(relationship);
4614 }
4615
4616 let tp_method = match self.config.intercompany.transfer_pricing_method {
4618 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4619 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4620 }
4621 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4622 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4623 }
4624 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4625 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4626 }
4627 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4628 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4629 }
4630 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4631 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4632 }
4633 };
4634
4635 let ic_currency = self
4637 .config
4638 .companies
4639 .first()
4640 .map(|c| c.currency.clone())
4641 .unwrap_or_else(|| "USD".to_string());
4642 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4643 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4644 transfer_pricing_method: tp_method,
4645 markup_percent: rust_decimal::Decimal::from_f64_retain(
4646 self.config.intercompany.markup_percent,
4647 )
4648 .unwrap_or(rust_decimal::Decimal::from(5)),
4649 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4650 default_currency: ic_currency,
4651 ..Default::default()
4652 };
4653
4654 let mut ic_generator = datasynth_generators::ICGenerator::new(
4656 ic_gen_config,
4657 ownership_structure.clone(),
4658 seed + 50,
4659 );
4660
4661 let transactions_per_day = 3;
4664 let matched_pairs = ic_generator.generate_transactions_for_period(
4665 start_date,
4666 end_date,
4667 transactions_per_day,
4668 );
4669
4670 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4672 debug!(
4673 "Generated {} IC seller invoices, {} IC buyer POs",
4674 ic_doc_chains.seller_invoices.len(),
4675 ic_doc_chains.buyer_orders.len()
4676 );
4677
4678 let mut seller_entries = Vec::new();
4680 let mut buyer_entries = Vec::new();
4681 let fiscal_year = start_date.year();
4682
4683 for pair in &matched_pairs {
4684 let fiscal_period = pair.posting_date.month();
4685 let (seller_je, buyer_je) =
4686 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4687 seller_entries.push(seller_je);
4688 buyer_entries.push(buyer_je);
4689 }
4690
4691 let matching_config = datasynth_generators::ICMatchingConfig {
4693 base_currency: self
4694 .config
4695 .companies
4696 .first()
4697 .map(|c| c.currency.clone())
4698 .unwrap_or_else(|| "USD".to_string()),
4699 ..Default::default()
4700 };
4701 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4702 matching_engine.load_matched_pairs(&matched_pairs);
4703 let matching_result = matching_engine.run_matching(end_date);
4704
4705 let mut elimination_entries = Vec::new();
4707 if self.config.intercompany.generate_eliminations {
4708 let elim_config = datasynth_generators::EliminationConfig {
4709 consolidation_entity: "GROUP".to_string(),
4710 base_currency: self
4711 .config
4712 .companies
4713 .first()
4714 .map(|c| c.currency.clone())
4715 .unwrap_or_else(|| "USD".to_string()),
4716 ..Default::default()
4717 };
4718
4719 let mut elim_generator =
4720 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4721
4722 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4723 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4724 matching_result
4725 .matched_balances
4726 .iter()
4727 .chain(matching_result.unmatched_balances.iter())
4728 .cloned()
4729 .collect();
4730
4731 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4743 std::collections::HashMap::new();
4744 let mut equity_amounts: std::collections::HashMap<
4745 String,
4746 std::collections::HashMap<String, rust_decimal::Decimal>,
4747 > = std::collections::HashMap::new();
4748 {
4749 use rust_decimal::Decimal;
4750 let hundred = Decimal::from(100u32);
4751 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
4755 for sub in &group_structure.subsidiaries {
4756 let net_assets = {
4757 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4758 if na > Decimal::ZERO {
4759 na
4760 } else {
4761 Decimal::from(1_000_000u64)
4762 }
4763 };
4764 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4766 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4767
4768 let mut eq_map = std::collections::HashMap::new();
4771 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4772 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4773 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4774 equity_amounts.insert(sub.entity_code.clone(), eq_map);
4775 }
4776 }
4777
4778 let journal = elim_generator.generate_eliminations(
4779 &fiscal_period,
4780 end_date,
4781 &all_balances,
4782 &matched_pairs,
4783 &investment_amounts,
4784 &equity_amounts,
4785 );
4786
4787 elimination_entries = journal.entries.clone();
4788 }
4789
4790 let matched_pair_count = matched_pairs.len();
4791 let elimination_entry_count = elimination_entries.len();
4792 let match_rate = matching_result.match_rate;
4793
4794 stats.ic_matched_pair_count = matched_pair_count;
4795 stats.ic_elimination_count = elimination_entry_count;
4796 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4797
4798 info!(
4799 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4800 matched_pair_count,
4801 stats.ic_transaction_count,
4802 seller_entries.len(),
4803 buyer_entries.len(),
4804 elimination_entry_count,
4805 match_rate * 100.0
4806 );
4807 self.check_resources_with_log("post-intercompany")?;
4808
4809 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4813 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4814 use rust_decimal::Decimal;
4815
4816 let eight_pct = Decimal::new(8, 2); group_structure
4819 .subsidiaries
4820 .iter()
4821 .filter(|sub| {
4822 sub.nci_percentage > Decimal::ZERO
4823 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4824 })
4825 .map(|sub| {
4826 let net_assets_from_jes =
4830 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4831
4832 let net_assets = if net_assets_from_jes > Decimal::ZERO {
4833 net_assets_from_jes.round_dp(2)
4834 } else {
4835 Decimal::from(1_000_000u64)
4837 };
4838
4839 let net_income = (net_assets * eight_pct).round_dp(2);
4841
4842 NciMeasurement::compute(
4843 sub.entity_code.clone(),
4844 sub.nci_percentage,
4845 net_assets,
4846 net_income,
4847 )
4848 })
4849 .collect()
4850 };
4851
4852 if !nci_measurements.is_empty() {
4853 info!(
4854 "NCI measurements: {} subsidiaries with non-controlling interests",
4855 nci_measurements.len()
4856 );
4857 }
4858
4859 Ok(IntercompanySnapshot {
4860 group_structure: Some(group_structure),
4861 matched_pairs,
4862 seller_journal_entries: seller_entries,
4863 buyer_journal_entries: buyer_entries,
4864 elimination_entries,
4865 nci_measurements,
4866 ic_document_chains: Some(ic_doc_chains),
4867 matched_pair_count,
4868 elimination_entry_count,
4869 match_rate,
4870 })
4871 }
4872
4873 fn phase_financial_reporting(
4875 &mut self,
4876 document_flows: &DocumentFlowSnapshot,
4877 journal_entries: &[JournalEntry],
4878 coa: &Arc<ChartOfAccounts>,
4879 _hr: &HrSnapshot,
4880 _audit: &AuditSnapshot,
4881 stats: &mut EnhancedGenerationStatistics,
4882 ) -> SynthResult<FinancialReportingSnapshot> {
4883 let fs_enabled = self.phase_config.generate_financial_statements
4884 || self.config.financial_reporting.enabled;
4885 let br_enabled = self.phase_config.generate_bank_reconciliation;
4886
4887 if !fs_enabled && !br_enabled {
4888 debug!("Phase 15: Skipped (financial reporting disabled)");
4889 return Ok(FinancialReportingSnapshot::default());
4890 }
4891
4892 info!("Phase 15: Generating Financial Reporting Data");
4893
4894 let seed = self.seed;
4895 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4896 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4897
4898 let mut financial_statements = Vec::new();
4899 let mut bank_reconciliations = Vec::new();
4900 let mut trial_balances = Vec::new();
4901 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4902 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4903 Vec::new();
4904 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4906 std::collections::HashMap::new();
4907 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4909 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4911
4912 if fs_enabled {
4920 let has_journal_entries = !journal_entries.is_empty();
4921
4922 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4925 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4927
4928 let elimination_entries: Vec<&JournalEntry> = journal_entries
4930 .iter()
4931 .filter(|je| je.header.is_elimination)
4932 .collect();
4933
4934 for period in 0..self.config.global.period_months {
4936 let period_start = start_date + chrono::Months::new(period);
4937 let period_end =
4938 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4939 let fiscal_year = period_end.year() as u16;
4940 let fiscal_period = period_end.month() as u8;
4941 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4942
4943 let mut entity_tb_map: std::collections::HashMap<
4946 String,
4947 std::collections::HashMap<String, rust_decimal::Decimal>,
4948 > = std::collections::HashMap::new();
4949
4950 for (company_idx, company) in self.config.companies.iter().enumerate() {
4952 let company_code = company.code.as_str();
4953 let currency = company.currency.as_str();
4954 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4957 let mut company_fs_gen =
4958 FinancialStatementGenerator::new(seed + company_seed_offset);
4959
4960 if has_journal_entries {
4961 let tb_entries = Self::build_cumulative_trial_balance(
4962 journal_entries,
4963 coa,
4964 company_code,
4965 start_date,
4966 period_end,
4967 fiscal_year,
4968 fiscal_period,
4969 );
4970
4971 let entity_cat_map =
4973 entity_tb_map.entry(company_code.to_string()).or_default();
4974 for tb_entry in &tb_entries {
4975 let net = tb_entry.debit_balance - tb_entry.credit_balance;
4976 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4977 }
4978
4979 let stmts = company_fs_gen.generate(
4980 company_code,
4981 currency,
4982 &tb_entries,
4983 period_start,
4984 period_end,
4985 fiscal_year,
4986 fiscal_period,
4987 None,
4988 "SYS-AUTOCLOSE",
4989 );
4990
4991 let mut entity_stmts = Vec::new();
4992 for stmt in stmts {
4993 if stmt.statement_type == StatementType::CashFlowStatement {
4994 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4995 let cf_items = Self::build_cash_flow_from_trial_balances(
4996 &tb_entries,
4997 None,
4998 net_income,
4999 );
5000 entity_stmts.push(FinancialStatement {
5001 cash_flow_items: cf_items,
5002 ..stmt
5003 });
5004 } else {
5005 entity_stmts.push(stmt);
5006 }
5007 }
5008
5009 financial_statements.extend(entity_stmts.clone());
5011
5012 standalone_statements
5014 .entry(company_code.to_string())
5015 .or_default()
5016 .extend(entity_stmts);
5017
5018 if company_idx == 0 {
5021 trial_balances.push(PeriodTrialBalance {
5022 fiscal_year,
5023 fiscal_period,
5024 period_start,
5025 period_end,
5026 entries: tb_entries,
5027 });
5028 }
5029 } else {
5030 let tb_entries = Self::build_trial_balance_from_entries(
5032 journal_entries,
5033 coa,
5034 company_code,
5035 fiscal_year,
5036 fiscal_period,
5037 );
5038
5039 let stmts = company_fs_gen.generate(
5040 company_code,
5041 currency,
5042 &tb_entries,
5043 period_start,
5044 period_end,
5045 fiscal_year,
5046 fiscal_period,
5047 None,
5048 "SYS-AUTOCLOSE",
5049 );
5050 financial_statements.extend(stmts.clone());
5051 standalone_statements
5052 .entry(company_code.to_string())
5053 .or_default()
5054 .extend(stmts);
5055
5056 if company_idx == 0 && !tb_entries.is_empty() {
5057 trial_balances.push(PeriodTrialBalance {
5058 fiscal_year,
5059 fiscal_period,
5060 period_start,
5061 period_end,
5062 entries: tb_entries,
5063 });
5064 }
5065 }
5066 }
5067
5068 let group_currency = self
5071 .config
5072 .companies
5073 .first()
5074 .map(|c| c.currency.as_str())
5075 .unwrap_or("USD");
5076
5077 let period_eliminations: Vec<JournalEntry> = elimination_entries
5079 .iter()
5080 .filter(|je| {
5081 je.header.fiscal_year == fiscal_year
5082 && je.header.fiscal_period == fiscal_period
5083 })
5084 .map(|je| (*je).clone())
5085 .collect();
5086
5087 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5088 &entity_tb_map,
5089 &period_eliminations,
5090 &period_label,
5091 );
5092
5093 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5096 .line_items
5097 .iter()
5098 .map(|li| {
5099 let net = li.post_elimination_total;
5100 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5101 (net, rust_decimal::Decimal::ZERO)
5102 } else {
5103 (rust_decimal::Decimal::ZERO, -net)
5104 };
5105 datasynth_generators::TrialBalanceEntry {
5106 account_code: li.account_category.clone(),
5107 account_name: li.account_category.clone(),
5108 category: li.account_category.clone(),
5109 debit_balance: debit,
5110 credit_balance: credit,
5111 }
5112 })
5113 .collect();
5114
5115 let mut cons_stmts = cons_gen.generate(
5116 "GROUP",
5117 group_currency,
5118 &cons_tb,
5119 period_start,
5120 period_end,
5121 fiscal_year,
5122 fiscal_period,
5123 None,
5124 "SYS-AUTOCLOSE",
5125 );
5126
5127 let bs_categories: &[&str] = &[
5131 "CASH",
5132 "RECEIVABLES",
5133 "INVENTORY",
5134 "FIXEDASSETS",
5135 "PAYABLES",
5136 "ACCRUEDLIABILITIES",
5137 "LONGTERMDEBT",
5138 "EQUITY",
5139 ];
5140 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5141 cons_line_items.into_iter().partition(|li| {
5142 let upper = li.label.to_uppercase();
5143 bs_categories.iter().any(|c| upper == *c)
5144 });
5145
5146 for stmt in &mut cons_stmts {
5147 stmt.is_consolidated = true;
5148 match stmt.statement_type {
5149 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5150 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5151 _ => {} }
5153 }
5154
5155 consolidated_statements.extend(cons_stmts);
5156 consolidation_schedules.push(schedule);
5157 }
5158
5159 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5165 info!(
5166 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5167 stats.financial_statement_count,
5168 consolidated_statements.len(),
5169 has_journal_entries
5170 );
5171
5172 let entity_seeds: Vec<SegmentSeed> = self
5177 .config
5178 .companies
5179 .iter()
5180 .map(|c| SegmentSeed {
5181 code: c.code.clone(),
5182 name: c.name.clone(),
5183 currency: c.currency.clone(),
5184 })
5185 .collect();
5186
5187 let mut seg_gen = SegmentGenerator::new(seed + 30);
5188
5189 for period in 0..self.config.global.period_months {
5194 let period_end =
5195 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5196 let fiscal_year = period_end.year() as u16;
5197 let fiscal_period = period_end.month() as u8;
5198 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5199
5200 use datasynth_core::models::StatementType;
5201
5202 let cons_is = consolidated_statements.iter().find(|s| {
5204 s.fiscal_year == fiscal_year
5205 && s.fiscal_period == fiscal_period
5206 && s.statement_type == StatementType::IncomeStatement
5207 });
5208 let cons_bs = consolidated_statements.iter().find(|s| {
5209 s.fiscal_year == fiscal_year
5210 && s.fiscal_period == fiscal_period
5211 && s.statement_type == StatementType::BalanceSheet
5212 });
5213
5214 let is_stmt = cons_is.or_else(|| {
5216 financial_statements.iter().find(|s| {
5217 s.fiscal_year == fiscal_year
5218 && s.fiscal_period == fiscal_period
5219 && s.statement_type == StatementType::IncomeStatement
5220 })
5221 });
5222 let bs_stmt = cons_bs.or_else(|| {
5223 financial_statements.iter().find(|s| {
5224 s.fiscal_year == fiscal_year
5225 && s.fiscal_period == fiscal_period
5226 && s.statement_type == StatementType::BalanceSheet
5227 })
5228 });
5229
5230 let consolidated_revenue = is_stmt
5231 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5232 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5234
5235 let consolidated_profit = is_stmt
5236 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5237 .map(|li| li.amount)
5238 .unwrap_or(rust_decimal::Decimal::ZERO);
5239
5240 let consolidated_assets = bs_stmt
5241 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5242 .map(|li| li.amount)
5243 .unwrap_or(rust_decimal::Decimal::ZERO);
5244
5245 if consolidated_revenue == rust_decimal::Decimal::ZERO
5247 && consolidated_assets == rust_decimal::Decimal::ZERO
5248 {
5249 continue;
5250 }
5251
5252 let group_code = self
5253 .config
5254 .companies
5255 .first()
5256 .map(|c| c.code.as_str())
5257 .unwrap_or("GROUP");
5258
5259 let total_depr: rust_decimal::Decimal = journal_entries
5262 .iter()
5263 .filter(|je| je.header.document_type == "CL")
5264 .flat_map(|je| je.lines.iter())
5265 .filter(|l| l.gl_account.starts_with("6000"))
5266 .map(|l| l.debit_amount)
5267 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5268 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5269 Some(total_depr)
5270 } else {
5271 None
5272 };
5273
5274 let (segs, recon) = seg_gen.generate(
5275 group_code,
5276 &period_label,
5277 consolidated_revenue,
5278 consolidated_profit,
5279 consolidated_assets,
5280 &entity_seeds,
5281 depr_param,
5282 );
5283 segment_reports.extend(segs);
5284 segment_reconciliations.push(recon);
5285 }
5286
5287 info!(
5288 "Segment reports generated: {} segments, {} reconciliations",
5289 segment_reports.len(),
5290 segment_reconciliations.len()
5291 );
5292 }
5293
5294 if br_enabled && !document_flows.payments.is_empty() {
5296 let employee_ids: Vec<String> = self
5297 .master_data
5298 .employees
5299 .iter()
5300 .map(|e| e.employee_id.clone())
5301 .collect();
5302 let mut br_gen =
5303 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5304
5305 for company in &self.config.companies {
5307 let company_payments: Vec<PaymentReference> = document_flows
5308 .payments
5309 .iter()
5310 .filter(|p| p.header.company_code == company.code)
5311 .map(|p| PaymentReference {
5312 id: p.header.document_id.clone(),
5313 amount: if p.is_vendor { p.amount } else { -p.amount },
5314 date: p.header.document_date,
5315 reference: p
5316 .check_number
5317 .clone()
5318 .or_else(|| p.wire_reference.clone())
5319 .unwrap_or_else(|| p.header.document_id.clone()),
5320 })
5321 .collect();
5322
5323 if company_payments.is_empty() {
5324 continue;
5325 }
5326
5327 let bank_account_id = format!("{}-MAIN", company.code);
5328
5329 for period in 0..self.config.global.period_months {
5331 let period_start = start_date + chrono::Months::new(period);
5332 let period_end =
5333 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5334
5335 let period_payments: Vec<PaymentReference> = company_payments
5336 .iter()
5337 .filter(|p| p.date >= period_start && p.date <= period_end)
5338 .cloned()
5339 .collect();
5340
5341 let recon = br_gen.generate(
5342 &company.code,
5343 &bank_account_id,
5344 period_start,
5345 period_end,
5346 &company.currency,
5347 &period_payments,
5348 );
5349 bank_reconciliations.push(recon);
5350 }
5351 }
5352 info!(
5353 "Bank reconciliations generated: {} reconciliations",
5354 bank_reconciliations.len()
5355 );
5356 }
5357
5358 stats.bank_reconciliation_count = bank_reconciliations.len();
5359 self.check_resources_with_log("post-financial-reporting")?;
5360
5361 if !trial_balances.is_empty() {
5362 info!(
5363 "Period-close trial balances captured: {} periods",
5364 trial_balances.len()
5365 );
5366 }
5367
5368 let notes_to_financial_statements = Vec::new();
5372
5373 Ok(FinancialReportingSnapshot {
5374 financial_statements,
5375 standalone_statements,
5376 consolidated_statements,
5377 consolidation_schedules,
5378 bank_reconciliations,
5379 trial_balances,
5380 segment_reports,
5381 segment_reconciliations,
5382 notes_to_financial_statements,
5383 })
5384 }
5385
5386 fn generate_notes_to_financial_statements(
5393 &self,
5394 financial_reporting: &mut FinancialReportingSnapshot,
5395 accounting_standards: &AccountingStandardsSnapshot,
5396 tax: &TaxSnapshot,
5397 hr: &HrSnapshot,
5398 audit: &AuditSnapshot,
5399 treasury: &TreasurySnapshot,
5400 ) {
5401 use datasynth_config::schema::AccountingFrameworkConfig;
5402 use datasynth_core::models::StatementType;
5403 use datasynth_generators::period_close::notes_generator::{
5404 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5405 };
5406
5407 let seed = self.seed;
5408 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5409 {
5410 Ok(d) => d,
5411 Err(_) => return,
5412 };
5413
5414 let mut notes_gen = NotesGenerator::new(seed + 4235);
5415
5416 for company in &self.config.companies {
5417 let last_period_end = start_date
5418 + chrono::Months::new(self.config.global.period_months)
5419 - chrono::Days::new(1);
5420 let fiscal_year = last_period_end.year() as u16;
5421
5422 let entity_is = financial_reporting
5424 .standalone_statements
5425 .get(&company.code)
5426 .and_then(|stmts| {
5427 stmts.iter().find(|s| {
5428 s.fiscal_year == fiscal_year
5429 && s.statement_type == StatementType::IncomeStatement
5430 })
5431 });
5432 let entity_bs = financial_reporting
5433 .standalone_statements
5434 .get(&company.code)
5435 .and_then(|stmts| {
5436 stmts.iter().find(|s| {
5437 s.fiscal_year == fiscal_year
5438 && s.statement_type == StatementType::BalanceSheet
5439 })
5440 });
5441
5442 let revenue_amount = entity_is
5444 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5445 .map(|li| li.amount);
5446 let ppe_gross = entity_bs
5447 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5448 .map(|li| li.amount);
5449
5450 let framework = match self
5451 .config
5452 .accounting_standards
5453 .framework
5454 .unwrap_or_default()
5455 {
5456 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5457 "IFRS".to_string()
5458 }
5459 _ => "US GAAP".to_string(),
5460 };
5461
5462 let (entity_dta, entity_dtl) = {
5465 let mut dta = rust_decimal::Decimal::ZERO;
5466 let mut dtl = rust_decimal::Decimal::ZERO;
5467 for rf in &tax.deferred_tax.rollforwards {
5468 if rf.entity_code == company.code {
5469 dta += rf.closing_dta;
5470 dtl += rf.closing_dtl;
5471 }
5472 }
5473 (
5474 if dta > rust_decimal::Decimal::ZERO {
5475 Some(dta)
5476 } else {
5477 None
5478 },
5479 if dtl > rust_decimal::Decimal::ZERO {
5480 Some(dtl)
5481 } else {
5482 None
5483 },
5484 )
5485 };
5486
5487 let entity_provisions: Vec<_> = accounting_standards
5490 .provisions
5491 .iter()
5492 .filter(|p| p.entity_code == company.code)
5493 .collect();
5494 let provision_count = entity_provisions.len();
5495 let total_provisions = if provision_count > 0 {
5496 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5497 } else {
5498 None
5499 };
5500
5501 let entity_pension_plan_count = hr
5503 .pension_plans
5504 .iter()
5505 .filter(|p| p.entity_code == company.code)
5506 .count();
5507 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5508 let sum: rust_decimal::Decimal = hr
5509 .pension_disclosures
5510 .iter()
5511 .filter(|d| {
5512 hr.pension_plans
5513 .iter()
5514 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5515 })
5516 .map(|d| d.net_pension_liability)
5517 .sum();
5518 let plan_assets_sum: rust_decimal::Decimal = hr
5519 .pension_plan_assets
5520 .iter()
5521 .filter(|a| {
5522 hr.pension_plans
5523 .iter()
5524 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5525 })
5526 .map(|a| a.fair_value_closing)
5527 .sum();
5528 if entity_pension_plan_count > 0 {
5529 Some(sum + plan_assets_sum)
5530 } else {
5531 None
5532 }
5533 };
5534 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5535 let sum: rust_decimal::Decimal = hr
5536 .pension_plan_assets
5537 .iter()
5538 .filter(|a| {
5539 hr.pension_plans
5540 .iter()
5541 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5542 })
5543 .map(|a| a.fair_value_closing)
5544 .sum();
5545 if entity_pension_plan_count > 0 {
5546 Some(sum)
5547 } else {
5548 None
5549 }
5550 };
5551
5552 let rp_count = audit.related_party_transactions.len();
5555 let se_count = audit.subsequent_events.len();
5556 let adjusting_count = audit
5557 .subsequent_events
5558 .iter()
5559 .filter(|e| {
5560 matches!(
5561 e.classification,
5562 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5563 )
5564 })
5565 .count();
5566
5567 let ctx = NotesGeneratorContext {
5568 entity_code: company.code.clone(),
5569 framework,
5570 period: format!("FY{}", fiscal_year),
5571 period_end: last_period_end,
5572 currency: company.currency.clone(),
5573 revenue_amount,
5574 total_ppe_gross: ppe_gross,
5575 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5576 deferred_tax_asset: entity_dta,
5578 deferred_tax_liability: entity_dtl,
5579 provision_count,
5581 total_provisions,
5582 pension_plan_count: entity_pension_plan_count,
5584 total_dbo: entity_total_dbo,
5585 total_plan_assets: entity_total_plan_assets,
5586 related_party_transaction_count: rp_count,
5588 subsequent_event_count: se_count,
5589 adjusting_event_count: adjusting_count,
5590 ..NotesGeneratorContext::default()
5591 };
5592
5593 let entity_notes = notes_gen.generate(&ctx);
5594 let standard_note_count = entity_notes.len() as u32;
5595 info!(
5596 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5597 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5598 );
5599 financial_reporting
5600 .notes_to_financial_statements
5601 .extend(entity_notes);
5602
5603 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5605 .debt_instruments
5606 .iter()
5607 .filter(|d| d.entity_id == company.code)
5608 .map(|d| {
5609 (
5610 format!("{:?}", d.instrument_type),
5611 d.principal,
5612 d.maturity_date.to_string(),
5613 )
5614 })
5615 .collect();
5616
5617 let hedge_count = treasury.hedge_relationships.len();
5618 let effective_hedges = treasury
5619 .hedge_relationships
5620 .iter()
5621 .filter(|h| h.is_effective)
5622 .count();
5623 let total_notional: rust_decimal::Decimal = treasury
5624 .hedging_instruments
5625 .iter()
5626 .map(|h| h.notional_amount)
5627 .sum();
5628 let total_fair_value: rust_decimal::Decimal = treasury
5629 .hedging_instruments
5630 .iter()
5631 .map(|h| h.fair_value)
5632 .sum();
5633
5634 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5636 .provisions
5637 .iter()
5638 .filter(|p| p.entity_code == company.code)
5639 .map(|p| p.id.as_str())
5640 .collect();
5641 let provision_movements: Vec<(
5642 String,
5643 rust_decimal::Decimal,
5644 rust_decimal::Decimal,
5645 rust_decimal::Decimal,
5646 )> = accounting_standards
5647 .provision_movements
5648 .iter()
5649 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5650 .map(|m| {
5651 let prov_type = accounting_standards
5652 .provisions
5653 .iter()
5654 .find(|p| p.id == m.provision_id)
5655 .map(|p| format!("{:?}", p.provision_type))
5656 .unwrap_or_else(|| "Unknown".to_string());
5657 (prov_type, m.opening, m.additions, m.closing)
5658 })
5659 .collect();
5660
5661 let enhanced_ctx = EnhancedNotesContext {
5662 entity_code: company.code.clone(),
5663 period: format!("FY{}", fiscal_year),
5664 currency: company.currency.clone(),
5665 finished_goods_value: rust_decimal::Decimal::ZERO,
5667 wip_value: rust_decimal::Decimal::ZERO,
5668 raw_materials_value: rust_decimal::Decimal::ZERO,
5669 debt_instruments,
5670 hedge_count,
5671 effective_hedges,
5672 total_notional,
5673 total_fair_value,
5674 provision_movements,
5675 };
5676
5677 let enhanced_notes =
5678 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5679 if !enhanced_notes.is_empty() {
5680 info!(
5681 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5682 company.code,
5683 enhanced_notes.len(),
5684 enhanced_ctx.debt_instruments.len(),
5685 hedge_count,
5686 enhanced_ctx.provision_movements.len(),
5687 );
5688 financial_reporting
5689 .notes_to_financial_statements
5690 .extend(enhanced_notes);
5691 }
5692 }
5693 }
5694
5695 fn build_trial_balance_from_entries(
5701 journal_entries: &[JournalEntry],
5702 coa: &ChartOfAccounts,
5703 company_code: &str,
5704 fiscal_year: u16,
5705 fiscal_period: u8,
5706 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5707 use rust_decimal::Decimal;
5708
5709 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5711 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5712
5713 for je in journal_entries {
5714 if je.header.company_code != company_code
5716 || je.header.fiscal_year != fiscal_year
5717 || je.header.fiscal_period != fiscal_period
5718 {
5719 continue;
5720 }
5721
5722 for line in &je.lines {
5723 let acct = &line.gl_account;
5724 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5725 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5726 }
5727 }
5728
5729 let mut all_accounts: Vec<&String> = account_debits
5731 .keys()
5732 .chain(account_credits.keys())
5733 .collect::<std::collections::HashSet<_>>()
5734 .into_iter()
5735 .collect();
5736 all_accounts.sort();
5737
5738 let mut entries = Vec::new();
5739
5740 for acct_number in all_accounts {
5741 let debit = account_debits
5742 .get(acct_number)
5743 .copied()
5744 .unwrap_or(Decimal::ZERO);
5745 let credit = account_credits
5746 .get(acct_number)
5747 .copied()
5748 .unwrap_or(Decimal::ZERO);
5749
5750 if debit.is_zero() && credit.is_zero() {
5751 continue;
5752 }
5753
5754 let account_name = coa
5756 .get_account(acct_number)
5757 .map(|gl| gl.short_description.clone())
5758 .unwrap_or_else(|| format!("Account {acct_number}"));
5759
5760 let category = Self::category_from_account_code(acct_number);
5765
5766 entries.push(datasynth_generators::TrialBalanceEntry {
5767 account_code: acct_number.clone(),
5768 account_name,
5769 category,
5770 debit_balance: debit,
5771 credit_balance: credit,
5772 });
5773 }
5774
5775 entries
5776 }
5777
5778 fn build_cumulative_trial_balance(
5785 journal_entries: &[JournalEntry],
5786 coa: &ChartOfAccounts,
5787 company_code: &str,
5788 start_date: NaiveDate,
5789 period_end: NaiveDate,
5790 fiscal_year: u16,
5791 fiscal_period: u8,
5792 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5793 use rust_decimal::Decimal;
5794
5795 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5797 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5798
5799 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5801 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5802
5803 for je in journal_entries {
5804 if je.header.company_code != company_code {
5805 continue;
5806 }
5807
5808 for line in &je.lines {
5809 let acct = &line.gl_account;
5810 let category = Self::category_from_account_code(acct);
5811 let is_bs_account = matches!(
5812 category.as_str(),
5813 "Cash"
5814 | "Receivables"
5815 | "Inventory"
5816 | "FixedAssets"
5817 | "Payables"
5818 | "AccruedLiabilities"
5819 | "LongTermDebt"
5820 | "Equity"
5821 );
5822
5823 if is_bs_account {
5824 if je.header.document_date <= period_end
5826 && je.header.document_date >= start_date
5827 {
5828 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5829 line.debit_amount;
5830 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5831 line.credit_amount;
5832 }
5833 } else {
5834 if je.header.fiscal_year == fiscal_year
5836 && je.header.fiscal_period == fiscal_period
5837 {
5838 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5839 line.debit_amount;
5840 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5841 line.credit_amount;
5842 }
5843 }
5844 }
5845 }
5846
5847 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5849 all_accounts.extend(bs_debits.keys().cloned());
5850 all_accounts.extend(bs_credits.keys().cloned());
5851 all_accounts.extend(is_debits.keys().cloned());
5852 all_accounts.extend(is_credits.keys().cloned());
5853
5854 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5855 sorted_accounts.sort();
5856
5857 let mut entries = Vec::new();
5858
5859 for acct_number in &sorted_accounts {
5860 let category = Self::category_from_account_code(acct_number);
5861 let is_bs_account = matches!(
5862 category.as_str(),
5863 "Cash"
5864 | "Receivables"
5865 | "Inventory"
5866 | "FixedAssets"
5867 | "Payables"
5868 | "AccruedLiabilities"
5869 | "LongTermDebt"
5870 | "Equity"
5871 );
5872
5873 let (debit, credit) = if is_bs_account {
5874 (
5875 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5876 bs_credits
5877 .get(acct_number)
5878 .copied()
5879 .unwrap_or(Decimal::ZERO),
5880 )
5881 } else {
5882 (
5883 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5884 is_credits
5885 .get(acct_number)
5886 .copied()
5887 .unwrap_or(Decimal::ZERO),
5888 )
5889 };
5890
5891 if debit.is_zero() && credit.is_zero() {
5892 continue;
5893 }
5894
5895 let account_name = coa
5896 .get_account(acct_number)
5897 .map(|gl| gl.short_description.clone())
5898 .unwrap_or_else(|| format!("Account {acct_number}"));
5899
5900 entries.push(datasynth_generators::TrialBalanceEntry {
5901 account_code: acct_number.clone(),
5902 account_name,
5903 category,
5904 debit_balance: debit,
5905 credit_balance: credit,
5906 });
5907 }
5908
5909 entries
5910 }
5911
5912 fn build_cash_flow_from_trial_balances(
5917 current_tb: &[datasynth_generators::TrialBalanceEntry],
5918 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5919 net_income: rust_decimal::Decimal,
5920 ) -> Vec<CashFlowItem> {
5921 use rust_decimal::Decimal;
5922
5923 let aggregate =
5925 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5926 let mut map: HashMap<String, Decimal> = HashMap::new();
5927 for entry in tb {
5928 let net = entry.debit_balance - entry.credit_balance;
5929 *map.entry(entry.category.clone()).or_default() += net;
5930 }
5931 map
5932 };
5933
5934 let current = aggregate(current_tb);
5935 let prior = prior_tb.map(aggregate);
5936
5937 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5939 *map.get(key).unwrap_or(&Decimal::ZERO)
5940 };
5941
5942 let change = |key: &str| -> Decimal {
5944 let curr = get(¤t, key);
5945 match &prior {
5946 Some(p) => curr - get(p, key),
5947 None => curr,
5948 }
5949 };
5950
5951 let fixed_asset_change = change("FixedAssets");
5954 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5955 -fixed_asset_change
5956 } else {
5957 Decimal::ZERO
5958 };
5959
5960 let ar_change = change("Receivables");
5962 let inventory_change = change("Inventory");
5963 let ap_change = change("Payables");
5965 let accrued_change = change("AccruedLiabilities");
5966
5967 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5968 + (-ap_change)
5969 + (-accrued_change);
5970
5971 let capex = if fixed_asset_change > Decimal::ZERO {
5973 -fixed_asset_change
5974 } else {
5975 Decimal::ZERO
5976 };
5977 let investing_cf = capex;
5978
5979 let debt_change = -change("LongTermDebt");
5981 let equity_change = -change("Equity");
5982 let financing_cf = debt_change + equity_change;
5983
5984 let net_change = operating_cf + investing_cf + financing_cf;
5985
5986 vec![
5987 CashFlowItem {
5988 item_code: "CF-NI".to_string(),
5989 label: "Net Income".to_string(),
5990 category: CashFlowCategory::Operating,
5991 amount: net_income,
5992 amount_prior: None,
5993 sort_order: 1,
5994 is_total: false,
5995 },
5996 CashFlowItem {
5997 item_code: "CF-DEP".to_string(),
5998 label: "Depreciation & Amortization".to_string(),
5999 category: CashFlowCategory::Operating,
6000 amount: depreciation_addback,
6001 amount_prior: None,
6002 sort_order: 2,
6003 is_total: false,
6004 },
6005 CashFlowItem {
6006 item_code: "CF-AR".to_string(),
6007 label: "Change in Accounts Receivable".to_string(),
6008 category: CashFlowCategory::Operating,
6009 amount: -ar_change,
6010 amount_prior: None,
6011 sort_order: 3,
6012 is_total: false,
6013 },
6014 CashFlowItem {
6015 item_code: "CF-AP".to_string(),
6016 label: "Change in Accounts Payable".to_string(),
6017 category: CashFlowCategory::Operating,
6018 amount: -ap_change,
6019 amount_prior: None,
6020 sort_order: 4,
6021 is_total: false,
6022 },
6023 CashFlowItem {
6024 item_code: "CF-INV".to_string(),
6025 label: "Change in Inventory".to_string(),
6026 category: CashFlowCategory::Operating,
6027 amount: -inventory_change,
6028 amount_prior: None,
6029 sort_order: 5,
6030 is_total: false,
6031 },
6032 CashFlowItem {
6033 item_code: "CF-OP".to_string(),
6034 label: "Net Cash from Operating Activities".to_string(),
6035 category: CashFlowCategory::Operating,
6036 amount: operating_cf,
6037 amount_prior: None,
6038 sort_order: 6,
6039 is_total: true,
6040 },
6041 CashFlowItem {
6042 item_code: "CF-CAPEX".to_string(),
6043 label: "Capital Expenditures".to_string(),
6044 category: CashFlowCategory::Investing,
6045 amount: capex,
6046 amount_prior: None,
6047 sort_order: 7,
6048 is_total: false,
6049 },
6050 CashFlowItem {
6051 item_code: "CF-INV-T".to_string(),
6052 label: "Net Cash from Investing Activities".to_string(),
6053 category: CashFlowCategory::Investing,
6054 amount: investing_cf,
6055 amount_prior: None,
6056 sort_order: 8,
6057 is_total: true,
6058 },
6059 CashFlowItem {
6060 item_code: "CF-DEBT".to_string(),
6061 label: "Net Borrowings / (Repayments)".to_string(),
6062 category: CashFlowCategory::Financing,
6063 amount: debt_change,
6064 amount_prior: None,
6065 sort_order: 9,
6066 is_total: false,
6067 },
6068 CashFlowItem {
6069 item_code: "CF-EQ".to_string(),
6070 label: "Equity Changes".to_string(),
6071 category: CashFlowCategory::Financing,
6072 amount: equity_change,
6073 amount_prior: None,
6074 sort_order: 10,
6075 is_total: false,
6076 },
6077 CashFlowItem {
6078 item_code: "CF-FIN-T".to_string(),
6079 label: "Net Cash from Financing Activities".to_string(),
6080 category: CashFlowCategory::Financing,
6081 amount: financing_cf,
6082 amount_prior: None,
6083 sort_order: 11,
6084 is_total: true,
6085 },
6086 CashFlowItem {
6087 item_code: "CF-NET".to_string(),
6088 label: "Net Change in Cash".to_string(),
6089 category: CashFlowCategory::Operating,
6090 amount: net_change,
6091 amount_prior: None,
6092 sort_order: 12,
6093 is_total: true,
6094 },
6095 ]
6096 }
6097
6098 fn calculate_net_income_from_tb(
6102 tb: &[datasynth_generators::TrialBalanceEntry],
6103 ) -> rust_decimal::Decimal {
6104 use rust_decimal::Decimal;
6105
6106 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6107 for entry in tb {
6108 let net = entry.debit_balance - entry.credit_balance;
6109 *aggregated.entry(entry.category.clone()).or_default() += net;
6110 }
6111
6112 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6113 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6114 let opex = *aggregated
6115 .get("OperatingExpenses")
6116 .unwrap_or(&Decimal::ZERO);
6117 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6118 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6119
6120 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6123 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6125 operating_income - tax
6126 }
6127
6128 fn category_from_account_code(code: &str) -> String {
6135 let prefix: String = code.chars().take(2).collect();
6136 match prefix.as_str() {
6137 "10" => "Cash",
6138 "11" => "Receivables",
6139 "12" | "13" | "14" => "Inventory",
6140 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6141 "20" => "Payables",
6142 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6143 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6144 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6145 "40" | "41" | "42" | "43" | "44" => "Revenue",
6146 "50" | "51" | "52" => "CostOfSales",
6147 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6148 "OperatingExpenses"
6149 }
6150 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6151 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6152 _ => "OperatingExpenses",
6153 }
6154 .to_string()
6155 }
6156
6157 fn phase_hr_data(
6159 &mut self,
6160 stats: &mut EnhancedGenerationStatistics,
6161 ) -> SynthResult<HrSnapshot> {
6162 if !self.phase_config.generate_hr {
6163 debug!("Phase 16: Skipped (HR generation disabled)");
6164 return Ok(HrSnapshot::default());
6165 }
6166
6167 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6168
6169 let seed = self.seed;
6170 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6171 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6172 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6173 let company_code = self
6174 .config
6175 .companies
6176 .first()
6177 .map(|c| c.code.as_str())
6178 .unwrap_or("1000");
6179 let currency = self
6180 .config
6181 .companies
6182 .first()
6183 .map(|c| c.currency.as_str())
6184 .unwrap_or("USD");
6185
6186 let employee_ids: Vec<String> = self
6187 .master_data
6188 .employees
6189 .iter()
6190 .map(|e| e.employee_id.clone())
6191 .collect();
6192
6193 if employee_ids.is_empty() {
6194 debug!("Phase 16: Skipped (no employees available)");
6195 return Ok(HrSnapshot::default());
6196 }
6197
6198 let cost_center_ids: Vec<String> = self
6201 .master_data
6202 .employees
6203 .iter()
6204 .filter_map(|e| e.cost_center.clone())
6205 .collect::<std::collections::HashSet<_>>()
6206 .into_iter()
6207 .collect();
6208
6209 let mut snapshot = HrSnapshot::default();
6210
6211 if self.config.hr.payroll.enabled {
6213 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6214 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6215
6216 let payroll_pack = self.primary_pack();
6218
6219 payroll_gen.set_country_pack(payroll_pack.clone());
6222
6223 let employees_with_salary: Vec<(
6224 String,
6225 rust_decimal::Decimal,
6226 Option<String>,
6227 Option<String>,
6228 )> = self
6229 .master_data
6230 .employees
6231 .iter()
6232 .map(|e| {
6233 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6236 e.base_salary
6237 } else {
6238 rust_decimal::Decimal::from(60_000)
6239 };
6240 (
6241 e.employee_id.clone(),
6242 annual, e.cost_center.clone(),
6244 e.department_id.clone(),
6245 )
6246 })
6247 .collect();
6248
6249 let change_history = &self.master_data.employee_change_history;
6252 let has_changes = !change_history.is_empty();
6253 if has_changes {
6254 debug!(
6255 "Payroll will incorporate {} employee change events",
6256 change_history.len()
6257 );
6258 }
6259
6260 for month in 0..self.config.global.period_months {
6261 let period_start = start_date + chrono::Months::new(month);
6262 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6263 let (run, items) = if has_changes {
6264 payroll_gen.generate_with_changes(
6265 company_code,
6266 &employees_with_salary,
6267 period_start,
6268 period_end,
6269 currency,
6270 change_history,
6271 )
6272 } else {
6273 payroll_gen.generate(
6274 company_code,
6275 &employees_with_salary,
6276 period_start,
6277 period_end,
6278 currency,
6279 )
6280 };
6281 snapshot.payroll_runs.push(run);
6282 snapshot.payroll_run_count += 1;
6283 snapshot.payroll_line_item_count += items.len();
6284 snapshot.payroll_line_items.extend(items);
6285 }
6286 }
6287
6288 if self.config.hr.time_attendance.enabled {
6290 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6291 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6292 let entries = time_gen.generate(
6293 &employee_ids,
6294 start_date,
6295 end_date,
6296 &self.config.hr.time_attendance,
6297 );
6298 snapshot.time_entry_count = entries.len();
6299 snapshot.time_entries = entries;
6300 }
6301
6302 if self.config.hr.expenses.enabled {
6304 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6305 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6306 expense_gen.set_country_pack(self.primary_pack().clone());
6307 let company_currency = self
6308 .config
6309 .companies
6310 .first()
6311 .map(|c| c.currency.as_str())
6312 .unwrap_or("USD");
6313 let reports = expense_gen.generate_with_currency(
6314 &employee_ids,
6315 start_date,
6316 end_date,
6317 &self.config.hr.expenses,
6318 company_currency,
6319 );
6320 snapshot.expense_report_count = reports.len();
6321 snapshot.expense_reports = reports;
6322 }
6323
6324 if self.config.hr.payroll.enabled {
6326 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6327 let employee_pairs: Vec<(String, String)> = self
6328 .master_data
6329 .employees
6330 .iter()
6331 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6332 .collect();
6333 let enrollments =
6334 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6335 snapshot.benefit_enrollment_count = enrollments.len();
6336 snapshot.benefit_enrollments = enrollments;
6337 }
6338
6339 if self.phase_config.generate_hr {
6341 let entity_name = self
6342 .config
6343 .companies
6344 .first()
6345 .map(|c| c.name.as_str())
6346 .unwrap_or("Entity");
6347 let period_months = self.config.global.period_months;
6348 let period_label = {
6349 let y = start_date.year();
6350 let m = start_date.month();
6351 if period_months >= 12 {
6352 format!("FY{y}")
6353 } else {
6354 format!("{y}-{m:02}")
6355 }
6356 };
6357 let reporting_date =
6358 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6359
6360 let avg_salary: Option<rust_decimal::Decimal> = {
6365 let employee_count = employee_ids.len();
6366 if self.config.hr.payroll.enabled
6367 && employee_count > 0
6368 && !snapshot.payroll_runs.is_empty()
6369 {
6370 let total_gross: rust_decimal::Decimal = snapshot
6372 .payroll_runs
6373 .iter()
6374 .filter(|r| r.company_code == company_code)
6375 .map(|r| r.total_gross)
6376 .sum();
6377 if total_gross > rust_decimal::Decimal::ZERO {
6378 let annual_total = if period_months > 0 && period_months < 12 {
6380 total_gross * rust_decimal::Decimal::from(12u32)
6381 / rust_decimal::Decimal::from(period_months)
6382 } else {
6383 total_gross
6384 };
6385 Some(
6386 (annual_total / rust_decimal::Decimal::from(employee_count))
6387 .round_dp(2),
6388 )
6389 } else {
6390 None
6391 }
6392 } else {
6393 None
6394 }
6395 };
6396
6397 let mut pension_gen =
6398 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6399 let pension_snap = pension_gen.generate(
6400 company_code,
6401 entity_name,
6402 &period_label,
6403 reporting_date,
6404 employee_ids.len(),
6405 currency,
6406 avg_salary,
6407 period_months,
6408 );
6409 snapshot.pension_plan_count = pension_snap.plans.len();
6410 snapshot.pension_plans = pension_snap.plans;
6411 snapshot.pension_obligations = pension_snap.obligations;
6412 snapshot.pension_plan_assets = pension_snap.plan_assets;
6413 snapshot.pension_disclosures = pension_snap.disclosures;
6414 snapshot.pension_journal_entries = pension_snap.journal_entries;
6419 }
6420
6421 if self.phase_config.generate_hr && !employee_ids.is_empty() {
6423 let period_months = self.config.global.period_months;
6424 let period_label = {
6425 let y = start_date.year();
6426 let m = start_date.month();
6427 if period_months >= 12 {
6428 format!("FY{y}")
6429 } else {
6430 format!("{y}-{m:02}")
6431 }
6432 };
6433 let reporting_date =
6434 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6435
6436 let mut stock_comp_gen =
6437 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6438 let stock_snap = stock_comp_gen.generate(
6439 company_code,
6440 &employee_ids,
6441 start_date,
6442 &period_label,
6443 reporting_date,
6444 currency,
6445 );
6446 snapshot.stock_grant_count = stock_snap.grants.len();
6447 snapshot.stock_grants = stock_snap.grants;
6448 snapshot.stock_comp_expenses = stock_snap.expenses;
6449 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6450 }
6451
6452 stats.payroll_run_count = snapshot.payroll_run_count;
6453 stats.time_entry_count = snapshot.time_entry_count;
6454 stats.expense_report_count = snapshot.expense_report_count;
6455 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6456 stats.pension_plan_count = snapshot.pension_plan_count;
6457 stats.stock_grant_count = snapshot.stock_grant_count;
6458
6459 info!(
6460 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6461 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6462 snapshot.time_entry_count, snapshot.expense_report_count,
6463 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6464 snapshot.stock_grant_count
6465 );
6466 self.check_resources_with_log("post-hr")?;
6467
6468 Ok(snapshot)
6469 }
6470
6471 fn phase_accounting_standards(
6473 &mut self,
6474 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6475 journal_entries: &[JournalEntry],
6476 stats: &mut EnhancedGenerationStatistics,
6477 ) -> SynthResult<AccountingStandardsSnapshot> {
6478 if !self.phase_config.generate_accounting_standards {
6479 debug!("Phase 17: Skipped (accounting standards generation disabled)");
6480 return Ok(AccountingStandardsSnapshot::default());
6481 }
6482 info!("Phase 17: Generating Accounting Standards Data");
6483
6484 let seed = self.seed;
6485 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6486 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6487 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6488 let company_code = self
6489 .config
6490 .companies
6491 .first()
6492 .map(|c| c.code.as_str())
6493 .unwrap_or("1000");
6494 let currency = self
6495 .config
6496 .companies
6497 .first()
6498 .map(|c| c.currency.as_str())
6499 .unwrap_or("USD");
6500
6501 let framework = match self.config.accounting_standards.framework {
6506 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6507 datasynth_standards::framework::AccountingFramework::UsGaap
6508 }
6509 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6510 datasynth_standards::framework::AccountingFramework::Ifrs
6511 }
6512 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6513 datasynth_standards::framework::AccountingFramework::DualReporting
6514 }
6515 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6516 datasynth_standards::framework::AccountingFramework::FrenchGaap
6517 }
6518 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6519 datasynth_standards::framework::AccountingFramework::GermanGaap
6520 }
6521 None => {
6522 let pack = self.primary_pack();
6524 let pack_fw = pack.accounting.framework.as_str();
6525 match pack_fw {
6526 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6527 "dual_reporting" => {
6528 datasynth_standards::framework::AccountingFramework::DualReporting
6529 }
6530 "french_gaap" => {
6531 datasynth_standards::framework::AccountingFramework::FrenchGaap
6532 }
6533 "german_gaap" | "hgb" => {
6534 datasynth_standards::framework::AccountingFramework::GermanGaap
6535 }
6536 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6538 }
6539 }
6540 };
6541
6542 let mut snapshot = AccountingStandardsSnapshot::default();
6543
6544 if self.config.accounting_standards.revenue_recognition.enabled {
6546 let customer_ids: Vec<String> = self
6547 .master_data
6548 .customers
6549 .iter()
6550 .map(|c| c.customer_id.clone())
6551 .collect();
6552
6553 if !customer_ids.is_empty() {
6554 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6555 let contracts = rev_gen.generate(
6556 company_code,
6557 &customer_ids,
6558 start_date,
6559 end_date,
6560 currency,
6561 &self.config.accounting_standards.revenue_recognition,
6562 framework,
6563 );
6564 snapshot.revenue_contract_count = contracts.len();
6565 snapshot.contracts = contracts;
6566 }
6567 }
6568
6569 if self.config.accounting_standards.impairment.enabled {
6571 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6572 .master_data
6573 .assets
6574 .iter()
6575 .map(|a| {
6576 (
6577 a.asset_id.clone(),
6578 a.description.clone(),
6579 a.acquisition_cost,
6580 )
6581 })
6582 .collect();
6583
6584 if !asset_data.is_empty() {
6585 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6586 let tests = imp_gen.generate(
6587 company_code,
6588 &asset_data,
6589 end_date,
6590 &self.config.accounting_standards.impairment,
6591 framework,
6592 );
6593 snapshot.impairment_test_count = tests.len();
6594 snapshot.impairment_tests = tests;
6595 }
6596 }
6597
6598 if self
6600 .config
6601 .accounting_standards
6602 .business_combinations
6603 .enabled
6604 {
6605 let bc_config = &self.config.accounting_standards.business_combinations;
6606 let framework_str = match framework {
6607 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6608 _ => "US_GAAP",
6609 };
6610 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6611 let bc_snap = bc_gen.generate(
6612 company_code,
6613 currency,
6614 start_date,
6615 end_date,
6616 bc_config.acquisition_count,
6617 framework_str,
6618 );
6619 snapshot.business_combination_count = bc_snap.combinations.len();
6620 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6621 snapshot.business_combinations = bc_snap.combinations;
6622 }
6623
6624 if self
6626 .config
6627 .accounting_standards
6628 .expected_credit_loss
6629 .enabled
6630 {
6631 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6632 let framework_str = match framework {
6633 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6634 _ => "ASC_326",
6635 };
6636
6637 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6640
6641 let mut ecl_gen = EclGenerator::new(seed + 43);
6642
6643 let bucket_exposures: Vec<(
6645 datasynth_core::models::subledger::ar::AgingBucket,
6646 rust_decimal::Decimal,
6647 )> = if ar_aging_reports.is_empty() {
6648 use datasynth_core::models::subledger::ar::AgingBucket;
6650 vec![
6651 (
6652 AgingBucket::Current,
6653 rust_decimal::Decimal::from(500_000_u32),
6654 ),
6655 (
6656 AgingBucket::Days1To30,
6657 rust_decimal::Decimal::from(120_000_u32),
6658 ),
6659 (
6660 AgingBucket::Days31To60,
6661 rust_decimal::Decimal::from(45_000_u32),
6662 ),
6663 (
6664 AgingBucket::Days61To90,
6665 rust_decimal::Decimal::from(15_000_u32),
6666 ),
6667 (
6668 AgingBucket::Over90Days,
6669 rust_decimal::Decimal::from(8_000_u32),
6670 ),
6671 ]
6672 } else {
6673 use datasynth_core::models::subledger::ar::AgingBucket;
6674 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6676 std::collections::HashMap::new();
6677 for report in ar_aging_reports {
6678 for (bucket, amount) in &report.bucket_totals {
6679 *totals.entry(*bucket).or_default() += amount;
6680 }
6681 }
6682 AgingBucket::all()
6683 .into_iter()
6684 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6685 .collect()
6686 };
6687
6688 let ecl_snap = ecl_gen.generate(
6689 company_code,
6690 end_date,
6691 &bucket_exposures,
6692 ecl_config,
6693 &period_label,
6694 framework_str,
6695 );
6696
6697 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6698 snapshot.ecl_models = ecl_snap.ecl_models;
6699 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6700 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6701 }
6702
6703 {
6705 let framework_str = match framework {
6706 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6707 _ => "US_GAAP",
6708 };
6709
6710 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6715 .max(rust_decimal::Decimal::from(100_000_u32));
6716
6717 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6718
6719 let mut prov_gen = ProvisionGenerator::new(seed + 44);
6720 let prov_snap = prov_gen.generate(
6721 company_code,
6722 currency,
6723 revenue_proxy,
6724 end_date,
6725 &period_label,
6726 framework_str,
6727 None, );
6729
6730 snapshot.provision_count = prov_snap.provisions.len();
6731 snapshot.provisions = prov_snap.provisions;
6732 snapshot.provision_movements = prov_snap.movements;
6733 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6734 snapshot.provision_journal_entries = prov_snap.journal_entries;
6735 }
6736
6737 {
6741 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6742
6743 let presentation_currency = self
6744 .config
6745 .global
6746 .presentation_currency
6747 .clone()
6748 .unwrap_or_else(|| self.config.global.group_currency.clone());
6749
6750 let mut rate_table = FxRateTable::new(&presentation_currency);
6753
6754 let base_rates = base_rates_usd();
6758 for (ccy, rate) in &base_rates {
6759 rate_table.add_rate(FxRate::new(
6760 ccy,
6761 "USD",
6762 RateType::Closing,
6763 end_date,
6764 *rate,
6765 "SYNTHETIC",
6766 ));
6767 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6770 rate_table.add_rate(FxRate::new(
6771 ccy,
6772 "USD",
6773 RateType::Average,
6774 end_date,
6775 avg,
6776 "SYNTHETIC",
6777 ));
6778 }
6779
6780 let mut translation_results = Vec::new();
6781 for company in &self.config.companies {
6782 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6785 .max(rust_decimal::Decimal::from(100_000_u32));
6786
6787 let func_ccy = company
6788 .functional_currency
6789 .clone()
6790 .unwrap_or_else(|| company.currency.clone());
6791
6792 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6793 &company.code,
6794 &func_ccy,
6795 &presentation_currency,
6796 &ias21_period_label,
6797 end_date,
6798 company_revenue,
6799 &rate_table,
6800 );
6801 translation_results.push(result);
6802 }
6803
6804 snapshot.currency_translation_count = translation_results.len();
6805 snapshot.currency_translation_results = translation_results;
6806 }
6807
6808 stats.revenue_contract_count = snapshot.revenue_contract_count;
6809 stats.impairment_test_count = snapshot.impairment_test_count;
6810 stats.business_combination_count = snapshot.business_combination_count;
6811 stats.ecl_model_count = snapshot.ecl_model_count;
6812 stats.provision_count = snapshot.provision_count;
6813
6814 info!(
6815 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6816 snapshot.revenue_contract_count,
6817 snapshot.impairment_test_count,
6818 snapshot.business_combination_count,
6819 snapshot.ecl_model_count,
6820 snapshot.provision_count,
6821 snapshot.currency_translation_count
6822 );
6823 self.check_resources_with_log("post-accounting-standards")?;
6824
6825 Ok(snapshot)
6826 }
6827
6828 fn phase_manufacturing(
6830 &mut self,
6831 stats: &mut EnhancedGenerationStatistics,
6832 ) -> SynthResult<ManufacturingSnapshot> {
6833 if !self.phase_config.generate_manufacturing {
6834 debug!("Phase 18: Skipped (manufacturing generation disabled)");
6835 return Ok(ManufacturingSnapshot::default());
6836 }
6837 info!("Phase 18: Generating Manufacturing Data");
6838
6839 let seed = self.seed;
6840 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6841 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6842 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6843 let company_code = self
6844 .config
6845 .companies
6846 .first()
6847 .map(|c| c.code.as_str())
6848 .unwrap_or("1000");
6849
6850 let material_data: Vec<(String, String)> = self
6851 .master_data
6852 .materials
6853 .iter()
6854 .map(|m| (m.material_id.clone(), m.description.clone()))
6855 .collect();
6856
6857 if material_data.is_empty() {
6858 debug!("Phase 18: Skipped (no materials available)");
6859 return Ok(ManufacturingSnapshot::default());
6860 }
6861
6862 let mut snapshot = ManufacturingSnapshot::default();
6863
6864 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6866 let production_orders = prod_gen.generate(
6867 company_code,
6868 &material_data,
6869 start_date,
6870 end_date,
6871 &self.config.manufacturing.production_orders,
6872 &self.config.manufacturing.costing,
6873 &self.config.manufacturing.routing,
6874 );
6875 snapshot.production_order_count = production_orders.len();
6876
6877 let inspection_data: Vec<(String, String, String)> = production_orders
6879 .iter()
6880 .map(|po| {
6881 (
6882 po.order_id.clone(),
6883 po.material_id.clone(),
6884 po.material_description.clone(),
6885 )
6886 })
6887 .collect();
6888
6889 snapshot.production_orders = production_orders;
6890
6891 if !inspection_data.is_empty() {
6892 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6893 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6894 snapshot.quality_inspection_count = inspections.len();
6895 snapshot.quality_inspections = inspections;
6896 }
6897
6898 let storage_locations: Vec<(String, String)> = material_data
6900 .iter()
6901 .enumerate()
6902 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6903 .collect();
6904
6905 let employee_ids: Vec<String> = self
6906 .master_data
6907 .employees
6908 .iter()
6909 .map(|e| e.employee_id.clone())
6910 .collect();
6911 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
6912 .with_employee_pool(employee_ids);
6913 let mut cycle_count_total = 0usize;
6914 for month in 0..self.config.global.period_months {
6915 let count_date = start_date + chrono::Months::new(month);
6916 let items_per_count = storage_locations.len().clamp(10, 50);
6917 let cc = cc_gen.generate(
6918 company_code,
6919 &storage_locations,
6920 count_date,
6921 items_per_count,
6922 );
6923 snapshot.cycle_counts.push(cc);
6924 cycle_count_total += 1;
6925 }
6926 snapshot.cycle_count_count = cycle_count_total;
6927
6928 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
6930 let bom_components = bom_gen.generate(company_code, &material_data);
6931 snapshot.bom_component_count = bom_components.len();
6932 snapshot.bom_components = bom_components;
6933
6934 let currency = self
6936 .config
6937 .companies
6938 .first()
6939 .map(|c| c.currency.as_str())
6940 .unwrap_or("USD");
6941 let production_order_ids: Vec<String> = snapshot
6942 .production_orders
6943 .iter()
6944 .map(|po| po.order_id.clone())
6945 .collect();
6946 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
6947 let inventory_movements = inv_mov_gen.generate_with_production_orders(
6948 company_code,
6949 &material_data,
6950 start_date,
6951 end_date,
6952 2,
6953 currency,
6954 &production_order_ids,
6955 );
6956 snapshot.inventory_movement_count = inventory_movements.len();
6957 snapshot.inventory_movements = inventory_movements;
6958
6959 stats.production_order_count = snapshot.production_order_count;
6960 stats.quality_inspection_count = snapshot.quality_inspection_count;
6961 stats.cycle_count_count = snapshot.cycle_count_count;
6962 stats.bom_component_count = snapshot.bom_component_count;
6963 stats.inventory_movement_count = snapshot.inventory_movement_count;
6964
6965 info!(
6966 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6967 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6968 snapshot.bom_component_count, snapshot.inventory_movement_count
6969 );
6970 self.check_resources_with_log("post-manufacturing")?;
6971
6972 Ok(snapshot)
6973 }
6974
6975 fn phase_sales_kpi_budgets(
6977 &mut self,
6978 coa: &Arc<ChartOfAccounts>,
6979 financial_reporting: &FinancialReportingSnapshot,
6980 stats: &mut EnhancedGenerationStatistics,
6981 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6982 if !self.phase_config.generate_sales_kpi_budgets {
6983 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6984 return Ok(SalesKpiBudgetsSnapshot::default());
6985 }
6986 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6987
6988 let seed = self.seed;
6989 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6990 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6991 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6992 let company_code = self
6993 .config
6994 .companies
6995 .first()
6996 .map(|c| c.code.as_str())
6997 .unwrap_or("1000");
6998
6999 let mut snapshot = SalesKpiBudgetsSnapshot::default();
7000
7001 if self.config.sales_quotes.enabled {
7003 let customer_data: Vec<(String, String)> = self
7004 .master_data
7005 .customers
7006 .iter()
7007 .map(|c| (c.customer_id.clone(), c.name.clone()))
7008 .collect();
7009 let material_data: Vec<(String, String)> = self
7010 .master_data
7011 .materials
7012 .iter()
7013 .map(|m| (m.material_id.clone(), m.description.clone()))
7014 .collect();
7015
7016 if !customer_data.is_empty() && !material_data.is_empty() {
7017 let employee_ids: Vec<String> = self
7018 .master_data
7019 .employees
7020 .iter()
7021 .map(|e| e.employee_id.clone())
7022 .collect();
7023 let customer_ids: Vec<String> = self
7024 .master_data
7025 .customers
7026 .iter()
7027 .map(|c| c.customer_id.clone())
7028 .collect();
7029 let company_currency = self
7030 .config
7031 .companies
7032 .first()
7033 .map(|c| c.currency.as_str())
7034 .unwrap_or("USD");
7035
7036 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7037 .with_pools(employee_ids, customer_ids);
7038 let quotes = quote_gen.generate_with_currency(
7039 company_code,
7040 &customer_data,
7041 &material_data,
7042 start_date,
7043 end_date,
7044 &self.config.sales_quotes,
7045 company_currency,
7046 );
7047 snapshot.sales_quote_count = quotes.len();
7048 snapshot.sales_quotes = quotes;
7049 }
7050 }
7051
7052 if self.config.financial_reporting.management_kpis.enabled {
7054 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7055 let mut kpis = kpi_gen.generate(
7056 company_code,
7057 start_date,
7058 end_date,
7059 &self.config.financial_reporting.management_kpis,
7060 );
7061
7062 {
7064 use rust_decimal::Decimal;
7065
7066 if let Some(income_stmt) =
7067 financial_reporting.financial_statements.iter().find(|fs| {
7068 fs.statement_type == StatementType::IncomeStatement
7069 && fs.company_code == company_code
7070 })
7071 {
7072 let total_revenue: Decimal = income_stmt
7074 .line_items
7075 .iter()
7076 .filter(|li| li.section.contains("Revenue") && !li.is_total)
7077 .map(|li| li.amount)
7078 .sum();
7079 let total_cogs: Decimal = income_stmt
7080 .line_items
7081 .iter()
7082 .filter(|li| {
7083 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7084 && !li.is_total
7085 })
7086 .map(|li| li.amount.abs())
7087 .sum();
7088 let total_opex: Decimal = income_stmt
7089 .line_items
7090 .iter()
7091 .filter(|li| {
7092 li.section.contains("Expense")
7093 && !li.is_total
7094 && !li.section.contains("Cost")
7095 })
7096 .map(|li| li.amount.abs())
7097 .sum();
7098
7099 if total_revenue > Decimal::ZERO {
7100 let hundred = Decimal::from(100);
7101 let gross_margin_pct =
7102 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7103 let operating_income = total_revenue - total_cogs - total_opex;
7104 let op_margin_pct =
7105 (operating_income * hundred / total_revenue).round_dp(2);
7106
7107 for kpi in &mut kpis {
7109 if kpi.name == "Gross Margin" {
7110 kpi.value = gross_margin_pct;
7111 } else if kpi.name == "Operating Margin" {
7112 kpi.value = op_margin_pct;
7113 }
7114 }
7115 }
7116 }
7117
7118 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7120 fs.statement_type == StatementType::BalanceSheet
7121 && fs.company_code == company_code
7122 }) {
7123 let current_assets: Decimal = bs
7124 .line_items
7125 .iter()
7126 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7127 .map(|li| li.amount)
7128 .sum();
7129 let current_liabilities: Decimal = bs
7130 .line_items
7131 .iter()
7132 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7133 .map(|li| li.amount.abs())
7134 .sum();
7135
7136 if current_liabilities > Decimal::ZERO {
7137 let current_ratio = (current_assets / current_liabilities).round_dp(2);
7138 for kpi in &mut kpis {
7139 if kpi.name == "Current Ratio" {
7140 kpi.value = current_ratio;
7141 }
7142 }
7143 }
7144 }
7145 }
7146
7147 snapshot.kpi_count = kpis.len();
7148 snapshot.kpis = kpis;
7149 }
7150
7151 if self.config.financial_reporting.budgets.enabled {
7153 let account_data: Vec<(String, String)> = coa
7154 .accounts
7155 .iter()
7156 .map(|a| (a.account_number.clone(), a.short_description.clone()))
7157 .collect();
7158
7159 if !account_data.is_empty() {
7160 let fiscal_year = start_date.year() as u32;
7161 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7162 let budget = budget_gen.generate(
7163 company_code,
7164 fiscal_year,
7165 &account_data,
7166 &self.config.financial_reporting.budgets,
7167 );
7168 snapshot.budget_line_count = budget.line_items.len();
7169 snapshot.budgets.push(budget);
7170 }
7171 }
7172
7173 stats.sales_quote_count = snapshot.sales_quote_count;
7174 stats.kpi_count = snapshot.kpi_count;
7175 stats.budget_line_count = snapshot.budget_line_count;
7176
7177 info!(
7178 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7179 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7180 );
7181 self.check_resources_with_log("post-sales-kpi-budgets")?;
7182
7183 Ok(snapshot)
7184 }
7185
7186 fn compute_pre_tax_income(
7193 company_code: &str,
7194 journal_entries: &[JournalEntry],
7195 ) -> rust_decimal::Decimal {
7196 use datasynth_core::accounts::AccountCategory;
7197 use rust_decimal::Decimal;
7198
7199 let mut total_revenue = Decimal::ZERO;
7200 let mut total_expenses = Decimal::ZERO;
7201
7202 for je in journal_entries {
7203 if je.header.company_code != company_code {
7204 continue;
7205 }
7206 for line in &je.lines {
7207 let cat = AccountCategory::from_account(&line.gl_account);
7208 match cat {
7209 AccountCategory::Revenue => {
7210 total_revenue += line.credit_amount - line.debit_amount;
7211 }
7212 AccountCategory::Cogs
7213 | AccountCategory::OperatingExpense
7214 | AccountCategory::OtherIncomeExpense => {
7215 total_expenses += line.debit_amount - line.credit_amount;
7216 }
7217 _ => {}
7218 }
7219 }
7220 }
7221
7222 let pti = (total_revenue - total_expenses).round_dp(2);
7223 if pti == rust_decimal::Decimal::ZERO {
7224 rust_decimal::Decimal::from(1_000_000u32)
7227 } else {
7228 pti
7229 }
7230 }
7231
7232 fn phase_tax_generation(
7234 &mut self,
7235 document_flows: &DocumentFlowSnapshot,
7236 journal_entries: &[JournalEntry],
7237 stats: &mut EnhancedGenerationStatistics,
7238 ) -> SynthResult<TaxSnapshot> {
7239 if !self.phase_config.generate_tax {
7240 debug!("Phase 20: Skipped (tax generation disabled)");
7241 return Ok(TaxSnapshot::default());
7242 }
7243 info!("Phase 20: Generating Tax Data");
7244
7245 let seed = self.seed;
7246 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7247 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7248 let fiscal_year = start_date.year();
7249 let company_code = self
7250 .config
7251 .companies
7252 .first()
7253 .map(|c| c.code.as_str())
7254 .unwrap_or("1000");
7255
7256 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7257 seed + 370,
7258 self.config.tax.clone(),
7259 );
7260
7261 let pack = self.primary_pack().clone();
7262 let (jurisdictions, codes) =
7263 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7264
7265 let mut provisions = Vec::new();
7267 if self.config.tax.provisions.enabled {
7268 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7269 for company in &self.config.companies {
7270 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7271 let statutory_rate = rust_decimal::Decimal::new(
7272 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7273 2,
7274 );
7275 let provision = provision_gen.generate(
7276 &company.code,
7277 start_date,
7278 pre_tax_income,
7279 statutory_rate,
7280 );
7281 provisions.push(provision);
7282 }
7283 }
7284
7285 let mut tax_lines = Vec::new();
7287 if !codes.is_empty() {
7288 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7289 datasynth_generators::TaxLineGeneratorConfig::default(),
7290 codes.clone(),
7291 seed + 372,
7292 );
7293
7294 let buyer_country = self
7297 .config
7298 .companies
7299 .first()
7300 .map(|c| c.country.as_str())
7301 .unwrap_or("US");
7302 for vi in &document_flows.vendor_invoices {
7303 let lines = tax_line_gen.generate_for_document(
7304 datasynth_core::models::TaxableDocumentType::VendorInvoice,
7305 &vi.header.document_id,
7306 buyer_country, buyer_country,
7308 vi.payable_amount,
7309 vi.header.document_date,
7310 None,
7311 );
7312 tax_lines.extend(lines);
7313 }
7314
7315 for ci in &document_flows.customer_invoices {
7317 let lines = tax_line_gen.generate_for_document(
7318 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7319 &ci.header.document_id,
7320 buyer_country, buyer_country,
7322 ci.total_gross_amount,
7323 ci.header.document_date,
7324 None,
7325 );
7326 tax_lines.extend(lines);
7327 }
7328 }
7329
7330 let deferred_tax = {
7332 let companies: Vec<(&str, &str)> = self
7333 .config
7334 .companies
7335 .iter()
7336 .map(|c| (c.code.as_str(), c.country.as_str()))
7337 .collect();
7338 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7339 deferred_gen.generate(&companies, start_date, journal_entries)
7340 };
7341
7342 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7345 std::collections::HashMap::new();
7346 for vi in &document_flows.vendor_invoices {
7347 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7348 }
7349 for ci in &document_flows.customer_invoices {
7350 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7351 }
7352
7353 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7355 let tax_posting_journal_entries = if !tax_lines.is_empty() {
7356 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7357 &tax_lines,
7358 company_code,
7359 &doc_dates,
7360 end_date,
7361 );
7362 debug!("Generated {} tax posting JEs", jes.len());
7363 jes
7364 } else {
7365 Vec::new()
7366 };
7367
7368 let snapshot = TaxSnapshot {
7369 jurisdiction_count: jurisdictions.len(),
7370 code_count: codes.len(),
7371 jurisdictions,
7372 codes,
7373 tax_provisions: provisions,
7374 tax_lines,
7375 tax_returns: Vec::new(),
7376 withholding_records: Vec::new(),
7377 tax_anomaly_labels: Vec::new(),
7378 deferred_tax,
7379 tax_posting_journal_entries,
7380 };
7381
7382 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7383 stats.tax_code_count = snapshot.code_count;
7384 stats.tax_provision_count = snapshot.tax_provisions.len();
7385 stats.tax_line_count = snapshot.tax_lines.len();
7386
7387 info!(
7388 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7389 snapshot.jurisdiction_count,
7390 snapshot.code_count,
7391 snapshot.tax_provisions.len(),
7392 snapshot.deferred_tax.temporary_differences.len(),
7393 snapshot.deferred_tax.journal_entries.len(),
7394 snapshot.tax_posting_journal_entries.len(),
7395 );
7396 self.check_resources_with_log("post-tax")?;
7397
7398 Ok(snapshot)
7399 }
7400
7401 fn phase_esg_generation(
7403 &mut self,
7404 document_flows: &DocumentFlowSnapshot,
7405 manufacturing: &ManufacturingSnapshot,
7406 stats: &mut EnhancedGenerationStatistics,
7407 ) -> SynthResult<EsgSnapshot> {
7408 if !self.phase_config.generate_esg {
7409 debug!("Phase 21: Skipped (ESG generation disabled)");
7410 return Ok(EsgSnapshot::default());
7411 }
7412 let degradation = self.check_resources()?;
7413 if degradation >= DegradationLevel::Reduced {
7414 debug!(
7415 "Phase skipped due to resource pressure (degradation: {:?})",
7416 degradation
7417 );
7418 return Ok(EsgSnapshot::default());
7419 }
7420 info!("Phase 21: Generating ESG Data");
7421
7422 let seed = self.seed;
7423 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7424 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7425 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7426 let entity_id = self
7427 .config
7428 .companies
7429 .first()
7430 .map(|c| c.code.as_str())
7431 .unwrap_or("1000");
7432
7433 let esg_cfg = &self.config.esg;
7434 let mut snapshot = EsgSnapshot::default();
7435
7436 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7438 esg_cfg.environmental.energy.clone(),
7439 seed + 80,
7440 );
7441 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7442
7443 let facility_count = esg_cfg.environmental.energy.facility_count;
7445 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7446 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7447
7448 let mut waste_gen = datasynth_generators::WasteGenerator::new(
7450 seed + 82,
7451 esg_cfg.environmental.waste.diversion_target,
7452 facility_count,
7453 );
7454 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7455
7456 let mut emission_gen =
7458 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7459
7460 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7462 .iter()
7463 .map(|e| datasynth_generators::EnergyInput {
7464 facility_id: e.facility_id.clone(),
7465 energy_type: match e.energy_source {
7466 EnergySourceType::NaturalGas => {
7467 datasynth_generators::EnergyInputType::NaturalGas
7468 }
7469 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7470 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7471 _ => datasynth_generators::EnergyInputType::Electricity,
7472 },
7473 consumption_kwh: e.consumption_kwh,
7474 period: e.period,
7475 })
7476 .collect();
7477
7478 if !manufacturing.production_orders.is_empty() {
7480 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7481 &manufacturing.production_orders,
7482 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
7485 if !mfg_energy.is_empty() {
7486 info!(
7487 "ESG: {} energy inputs derived from {} production orders",
7488 mfg_energy.len(),
7489 manufacturing.production_orders.len(),
7490 );
7491 energy_inputs.extend(mfg_energy);
7492 }
7493 }
7494
7495 let mut emissions = Vec::new();
7496 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7497 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7498
7499 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7501 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7502 for payment in &document_flows.payments {
7503 if payment.is_vendor {
7504 *totals
7505 .entry(payment.business_partner_id.clone())
7506 .or_default() += payment.amount;
7507 }
7508 }
7509 totals
7510 };
7511 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7512 .master_data
7513 .vendors
7514 .iter()
7515 .map(|v| {
7516 let spend = vendor_payment_totals
7517 .get(&v.vendor_id)
7518 .copied()
7519 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7520 datasynth_generators::VendorSpendInput {
7521 vendor_id: v.vendor_id.clone(),
7522 category: format!("{:?}", v.vendor_type).to_lowercase(),
7523 spend,
7524 country: v.country.clone(),
7525 }
7526 })
7527 .collect();
7528 if !vendor_spend.is_empty() {
7529 emissions.extend(emission_gen.generate_scope3_purchased_goods(
7530 entity_id,
7531 &vendor_spend,
7532 start_date,
7533 end_date,
7534 ));
7535 }
7536
7537 let headcount = self.master_data.employees.len() as u32;
7539 if headcount > 0 {
7540 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7541 emissions.extend(emission_gen.generate_scope3_business_travel(
7542 entity_id,
7543 travel_spend,
7544 start_date,
7545 ));
7546 emissions
7547 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7548 }
7549
7550 snapshot.emission_count = emissions.len();
7551 snapshot.emissions = emissions;
7552 snapshot.energy = energy_records;
7553
7554 let mut workforce_gen =
7556 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7557 let total_headcount = headcount.max(100);
7558 snapshot.diversity =
7559 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7560 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7561
7562 if !self.master_data.employees.is_empty() {
7564 let hr_diversity = workforce_gen.generate_diversity_from_employees(
7565 entity_id,
7566 &self.master_data.employees,
7567 end_date,
7568 );
7569 if !hr_diversity.is_empty() {
7570 info!(
7571 "ESG: {} diversity metrics derived from {} actual employees",
7572 hr_diversity.len(),
7573 self.master_data.employees.len(),
7574 );
7575 snapshot.diversity.extend(hr_diversity);
7576 }
7577 }
7578
7579 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7580 entity_id,
7581 facility_count,
7582 start_date,
7583 end_date,
7584 );
7585
7586 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
7589 entity_id,
7590 &snapshot.safety_incidents,
7591 total_hours,
7592 start_date,
7593 );
7594 snapshot.safety_metrics = vec![safety_metric];
7595
7596 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7598 seed + 85,
7599 esg_cfg.governance.board_size,
7600 esg_cfg.governance.independence_target,
7601 );
7602 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7603
7604 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7606 esg_cfg.supply_chain_esg.clone(),
7607 seed + 86,
7608 );
7609 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7610 .master_data
7611 .vendors
7612 .iter()
7613 .map(|v| datasynth_generators::VendorInput {
7614 vendor_id: v.vendor_id.clone(),
7615 country: v.country.clone(),
7616 industry: format!("{:?}", v.vendor_type).to_lowercase(),
7617 quality_score: None,
7618 })
7619 .collect();
7620 snapshot.supplier_assessments =
7621 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7622
7623 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7625 seed + 87,
7626 esg_cfg.reporting.clone(),
7627 esg_cfg.climate_scenarios.clone(),
7628 );
7629 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7630 snapshot.disclosures = disclosure_gen.generate_disclosures(
7631 entity_id,
7632 &snapshot.materiality,
7633 start_date,
7634 end_date,
7635 );
7636 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7637 snapshot.disclosure_count = snapshot.disclosures.len();
7638
7639 if esg_cfg.anomaly_rate > 0.0 {
7641 let mut anomaly_injector =
7642 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7643 let mut labels = Vec::new();
7644 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7645 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7646 labels.extend(
7647 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7648 );
7649 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7650 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7651 snapshot.anomaly_labels = labels;
7652 }
7653
7654 stats.esg_emission_count = snapshot.emission_count;
7655 stats.esg_disclosure_count = snapshot.disclosure_count;
7656
7657 info!(
7658 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7659 snapshot.emission_count,
7660 snapshot.disclosure_count,
7661 snapshot.supplier_assessments.len()
7662 );
7663 self.check_resources_with_log("post-esg")?;
7664
7665 Ok(snapshot)
7666 }
7667
7668 fn phase_treasury_data(
7670 &mut self,
7671 document_flows: &DocumentFlowSnapshot,
7672 subledger: &SubledgerSnapshot,
7673 intercompany: &IntercompanySnapshot,
7674 stats: &mut EnhancedGenerationStatistics,
7675 ) -> SynthResult<TreasurySnapshot> {
7676 if !self.phase_config.generate_treasury {
7677 debug!("Phase 22: Skipped (treasury generation disabled)");
7678 return Ok(TreasurySnapshot::default());
7679 }
7680 let degradation = self.check_resources()?;
7681 if degradation >= DegradationLevel::Reduced {
7682 debug!(
7683 "Phase skipped due to resource pressure (degradation: {:?})",
7684 degradation
7685 );
7686 return Ok(TreasurySnapshot::default());
7687 }
7688 info!("Phase 22: Generating Treasury Data");
7689
7690 let seed = self.seed;
7691 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7692 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7693 let currency = self
7694 .config
7695 .companies
7696 .first()
7697 .map(|c| c.currency.as_str())
7698 .unwrap_or("USD");
7699 let entity_id = self
7700 .config
7701 .companies
7702 .first()
7703 .map(|c| c.code.as_str())
7704 .unwrap_or("1000");
7705
7706 let mut snapshot = TreasurySnapshot::default();
7707
7708 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7710 self.config.treasury.debt.clone(),
7711 seed + 90,
7712 );
7713 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7714
7715 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7717 self.config.treasury.hedging.clone(),
7718 seed + 91,
7719 );
7720 for debt in &snapshot.debt_instruments {
7721 if debt.rate_type == InterestRateType::Variable {
7722 let swap = hedge_gen.generate_ir_swap(
7723 currency,
7724 debt.principal,
7725 debt.origination_date,
7726 debt.maturity_date,
7727 );
7728 snapshot.hedging_instruments.push(swap);
7729 }
7730 }
7731
7732 {
7735 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7736 for payment in &document_flows.payments {
7737 if payment.currency != currency {
7738 let entry = fx_map
7739 .entry(payment.currency.clone())
7740 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7741 entry.0 += payment.amount;
7742 if payment.header.document_date > entry.1 {
7744 entry.1 = payment.header.document_date;
7745 }
7746 }
7747 }
7748 if !fx_map.is_empty() {
7749 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7750 .into_iter()
7751 .map(|(foreign_ccy, (net_amount, settlement_date))| {
7752 datasynth_generators::treasury::FxExposure {
7753 currency_pair: format!("{foreign_ccy}/{currency}"),
7754 foreign_currency: foreign_ccy,
7755 net_amount,
7756 settlement_date,
7757 description: "AP payment FX exposure".to_string(),
7758 }
7759 })
7760 .collect();
7761 let (fx_instruments, fx_relationships) =
7762 hedge_gen.generate(start_date, &fx_exposures);
7763 snapshot.hedging_instruments.extend(fx_instruments);
7764 snapshot.hedge_relationships.extend(fx_relationships);
7765 }
7766 }
7767
7768 if self.config.treasury.anomaly_rate > 0.0 {
7770 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7771 seed + 92,
7772 self.config.treasury.anomaly_rate,
7773 );
7774 let mut labels = Vec::new();
7775 labels.extend(
7776 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7777 );
7778 snapshot.treasury_anomaly_labels = labels;
7779 }
7780
7781 if self.config.treasury.cash_positioning.enabled {
7783 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7784
7785 for payment in &document_flows.payments {
7787 cash_flows.push(datasynth_generators::treasury::CashFlow {
7788 date: payment.header.document_date,
7789 account_id: format!("{entity_id}-MAIN"),
7790 amount: payment.amount,
7791 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7792 });
7793 }
7794
7795 for chain in &document_flows.o2c_chains {
7797 if let Some(ref receipt) = chain.customer_receipt {
7798 cash_flows.push(datasynth_generators::treasury::CashFlow {
7799 date: receipt.header.document_date,
7800 account_id: format!("{entity_id}-MAIN"),
7801 amount: receipt.amount,
7802 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7803 });
7804 }
7805 for receipt in &chain.remainder_receipts {
7807 cash_flows.push(datasynth_generators::treasury::CashFlow {
7808 date: receipt.header.document_date,
7809 account_id: format!("{entity_id}-MAIN"),
7810 amount: receipt.amount,
7811 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7812 });
7813 }
7814 }
7815
7816 if !cash_flows.is_empty() {
7817 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7818 self.config.treasury.cash_positioning.clone(),
7819 seed + 93,
7820 );
7821 let account_id = format!("{entity_id}-MAIN");
7822 snapshot.cash_positions = cash_gen.generate(
7823 entity_id,
7824 &account_id,
7825 currency,
7826 &cash_flows,
7827 start_date,
7828 start_date + chrono::Months::new(self.config.global.period_months),
7829 rust_decimal::Decimal::new(1_000_000, 0), );
7831 }
7832 }
7833
7834 if self.config.treasury.cash_forecasting.enabled {
7836 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7837
7838 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7840 .ar_invoices
7841 .iter()
7842 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7843 .map(|inv| {
7844 let days_past_due = if inv.due_date < end_date {
7845 (end_date - inv.due_date).num_days().max(0) as u32
7846 } else {
7847 0
7848 };
7849 datasynth_generators::treasury::ArAgingItem {
7850 expected_date: inv.due_date,
7851 amount: inv.amount_remaining,
7852 days_past_due,
7853 document_id: inv.invoice_number.clone(),
7854 }
7855 })
7856 .collect();
7857
7858 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7860 .ap_invoices
7861 .iter()
7862 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7863 .map(|inv| datasynth_generators::treasury::ApAgingItem {
7864 payment_date: inv.due_date,
7865 amount: inv.amount_remaining,
7866 document_id: inv.invoice_number.clone(),
7867 })
7868 .collect();
7869
7870 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7871 self.config.treasury.cash_forecasting.clone(),
7872 seed + 94,
7873 );
7874 let forecast = forecast_gen.generate(
7875 entity_id,
7876 currency,
7877 end_date,
7878 &ar_items,
7879 &ap_items,
7880 &[], );
7882 snapshot.cash_forecasts.push(forecast);
7883 }
7884
7885 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7887 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7888 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7889 self.config.treasury.cash_pooling.clone(),
7890 seed + 95,
7891 );
7892
7893 let account_ids: Vec<String> = snapshot
7895 .cash_positions
7896 .iter()
7897 .map(|cp| cp.bank_account_id.clone())
7898 .collect::<std::collections::HashSet<_>>()
7899 .into_iter()
7900 .collect();
7901
7902 if let Some(pool) =
7903 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7904 {
7905 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7907 for cp in &snapshot.cash_positions {
7908 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7909 }
7910
7911 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7912 latest_balances
7913 .into_iter()
7914 .filter(|(id, _)| pool.participant_accounts.contains(id))
7915 .map(
7916 |(id, balance)| datasynth_generators::treasury::AccountBalance {
7917 account_id: id,
7918 balance,
7919 },
7920 )
7921 .collect();
7922
7923 let sweeps =
7924 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7925 snapshot.cash_pool_sweeps = sweeps;
7926 snapshot.cash_pools.push(pool);
7927 }
7928 }
7929
7930 if self.config.treasury.bank_guarantees.enabled {
7932 let vendor_names: Vec<String> = self
7933 .master_data
7934 .vendors
7935 .iter()
7936 .map(|v| v.name.clone())
7937 .collect();
7938 if !vendor_names.is_empty() {
7939 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7940 self.config.treasury.bank_guarantees.clone(),
7941 seed + 96,
7942 );
7943 snapshot.bank_guarantees =
7944 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7945 }
7946 }
7947
7948 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7950 let entity_ids: Vec<String> = self
7951 .config
7952 .companies
7953 .iter()
7954 .map(|c| c.code.clone())
7955 .collect();
7956 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7957 .matched_pairs
7958 .iter()
7959 .map(|mp| {
7960 (
7961 mp.seller_company.clone(),
7962 mp.buyer_company.clone(),
7963 mp.amount,
7964 )
7965 })
7966 .collect();
7967 if entity_ids.len() >= 2 {
7968 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7969 self.config.treasury.netting.clone(),
7970 seed + 97,
7971 );
7972 snapshot.netting_runs = netting_gen.generate(
7973 &entity_ids,
7974 currency,
7975 start_date,
7976 self.config.global.period_months,
7977 &ic_amounts,
7978 );
7979 }
7980 }
7981
7982 {
7984 use datasynth_generators::treasury::TreasuryAccounting;
7985
7986 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7987 let mut treasury_jes = Vec::new();
7988
7989 if !snapshot.debt_instruments.is_empty() {
7991 let debt_jes =
7992 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
7993 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
7994 treasury_jes.extend(debt_jes);
7995 }
7996
7997 if !snapshot.hedging_instruments.is_empty() {
7999 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8000 &snapshot.hedging_instruments,
8001 &snapshot.hedge_relationships,
8002 end_date,
8003 entity_id,
8004 );
8005 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8006 treasury_jes.extend(hedge_jes);
8007 }
8008
8009 if !snapshot.cash_pool_sweeps.is_empty() {
8011 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8012 &snapshot.cash_pool_sweeps,
8013 entity_id,
8014 );
8015 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8016 treasury_jes.extend(sweep_jes);
8017 }
8018
8019 if !treasury_jes.is_empty() {
8020 debug!("Total treasury journal entries: {}", treasury_jes.len());
8021 }
8022 snapshot.journal_entries = treasury_jes;
8023 }
8024
8025 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8026 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8027 stats.cash_position_count = snapshot.cash_positions.len();
8028 stats.cash_forecast_count = snapshot.cash_forecasts.len();
8029 stats.cash_pool_count = snapshot.cash_pools.len();
8030
8031 info!(
8032 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8033 snapshot.debt_instruments.len(),
8034 snapshot.hedging_instruments.len(),
8035 snapshot.cash_positions.len(),
8036 snapshot.cash_forecasts.len(),
8037 snapshot.cash_pools.len(),
8038 snapshot.bank_guarantees.len(),
8039 snapshot.netting_runs.len(),
8040 snapshot.journal_entries.len(),
8041 );
8042 self.check_resources_with_log("post-treasury")?;
8043
8044 Ok(snapshot)
8045 }
8046
8047 fn phase_project_accounting(
8049 &mut self,
8050 document_flows: &DocumentFlowSnapshot,
8051 hr: &HrSnapshot,
8052 stats: &mut EnhancedGenerationStatistics,
8053 ) -> SynthResult<ProjectAccountingSnapshot> {
8054 if !self.phase_config.generate_project_accounting {
8055 debug!("Phase 23: Skipped (project accounting disabled)");
8056 return Ok(ProjectAccountingSnapshot::default());
8057 }
8058 let degradation = self.check_resources()?;
8059 if degradation >= DegradationLevel::Reduced {
8060 debug!(
8061 "Phase skipped due to resource pressure (degradation: {:?})",
8062 degradation
8063 );
8064 return Ok(ProjectAccountingSnapshot::default());
8065 }
8066 info!("Phase 23: Generating Project Accounting Data");
8067
8068 let seed = self.seed;
8069 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8070 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8071 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8072 let company_code = self
8073 .config
8074 .companies
8075 .first()
8076 .map(|c| c.code.as_str())
8077 .unwrap_or("1000");
8078
8079 let mut snapshot = ProjectAccountingSnapshot::default();
8080
8081 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8083 self.config.project_accounting.clone(),
8084 seed + 95,
8085 );
8086 let pool = project_gen.generate(company_code, start_date, end_date);
8087 snapshot.projects = pool.projects.clone();
8088
8089 {
8091 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8092 Vec::new();
8093
8094 for te in &hr.time_entries {
8096 let total_hours = te.hours_regular + te.hours_overtime;
8097 if total_hours > 0.0 {
8098 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8099 id: te.entry_id.clone(),
8100 entity_id: company_code.to_string(),
8101 date: te.date,
8102 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8103 .unwrap_or(rust_decimal::Decimal::ZERO),
8104 source_type: CostSourceType::TimeEntry,
8105 hours: Some(
8106 rust_decimal::Decimal::from_f64_retain(total_hours)
8107 .unwrap_or(rust_decimal::Decimal::ZERO),
8108 ),
8109 });
8110 }
8111 }
8112
8113 for er in &hr.expense_reports {
8115 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8116 id: er.report_id.clone(),
8117 entity_id: company_code.to_string(),
8118 date: er.submission_date,
8119 amount: er.total_amount,
8120 source_type: CostSourceType::ExpenseReport,
8121 hours: None,
8122 });
8123 }
8124
8125 for po in &document_flows.purchase_orders {
8127 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8128 id: po.header.document_id.clone(),
8129 entity_id: company_code.to_string(),
8130 date: po.header.document_date,
8131 amount: po.total_net_amount,
8132 source_type: CostSourceType::PurchaseOrder,
8133 hours: None,
8134 });
8135 }
8136
8137 for vi in &document_flows.vendor_invoices {
8139 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8140 id: vi.header.document_id.clone(),
8141 entity_id: company_code.to_string(),
8142 date: vi.header.document_date,
8143 amount: vi.payable_amount,
8144 source_type: CostSourceType::VendorInvoice,
8145 hours: None,
8146 });
8147 }
8148
8149 if !source_docs.is_empty() && !pool.projects.is_empty() {
8150 let mut cost_gen =
8151 datasynth_generators::project_accounting::ProjectCostGenerator::new(
8152 self.config.project_accounting.cost_allocation.clone(),
8153 seed + 99,
8154 );
8155 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8156 }
8157 }
8158
8159 if self.config.project_accounting.change_orders.enabled {
8161 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8162 self.config.project_accounting.change_orders.clone(),
8163 seed + 96,
8164 );
8165 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8166 }
8167
8168 if self.config.project_accounting.milestones.enabled {
8170 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8171 self.config.project_accounting.milestones.clone(),
8172 seed + 97,
8173 );
8174 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8175 }
8176
8177 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8179 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8180 self.config.project_accounting.earned_value.clone(),
8181 seed + 98,
8182 );
8183 snapshot.earned_value_metrics =
8184 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8185 }
8186
8187 if self.config.project_accounting.revenue_recognition.enabled
8189 && !snapshot.projects.is_empty()
8190 && !snapshot.cost_lines.is_empty()
8191 {
8192 use datasynth_generators::project_accounting::RevenueGenerator;
8193 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8194 let avg_contract_value =
8195 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8196 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8197
8198 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8201 snapshot
8202 .projects
8203 .iter()
8204 .filter(|p| {
8205 matches!(
8206 p.project_type,
8207 datasynth_core::models::ProjectType::Customer
8208 )
8209 })
8210 .map(|p| {
8211 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8212 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8213 } else {
8215 avg_contract_value
8216 };
8217 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8219 })
8220 .collect();
8221
8222 if !contract_values.is_empty() {
8223 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8224 snapshot.revenue_records = rev_gen.generate(
8225 &snapshot.projects,
8226 &snapshot.cost_lines,
8227 &contract_values,
8228 start_date,
8229 end_date,
8230 );
8231 debug!(
8232 "Generated {} revenue recognition records for {} customer projects",
8233 snapshot.revenue_records.len(),
8234 contract_values.len()
8235 );
8236 }
8237 }
8238
8239 stats.project_count = snapshot.projects.len();
8240 stats.project_change_order_count = snapshot.change_orders.len();
8241 stats.project_cost_line_count = snapshot.cost_lines.len();
8242
8243 info!(
8244 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8245 snapshot.projects.len(),
8246 snapshot.change_orders.len(),
8247 snapshot.milestones.len(),
8248 snapshot.earned_value_metrics.len()
8249 );
8250 self.check_resources_with_log("post-project-accounting")?;
8251
8252 Ok(snapshot)
8253 }
8254
8255 fn phase_evolution_events(
8257 &mut self,
8258 stats: &mut EnhancedGenerationStatistics,
8259 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8260 if !self.phase_config.generate_evolution_events {
8261 debug!("Phase 24: Skipped (evolution events disabled)");
8262 return Ok((Vec::new(), Vec::new()));
8263 }
8264 info!("Phase 24: Generating Process Evolution + Organizational Events");
8265
8266 let seed = self.seed;
8267 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8268 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8269 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8270
8271 let mut proc_gen =
8273 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8274 seed + 100,
8275 );
8276 let process_events = proc_gen.generate_events(start_date, end_date);
8277
8278 let company_codes: Vec<String> = self
8280 .config
8281 .companies
8282 .iter()
8283 .map(|c| c.code.clone())
8284 .collect();
8285 let mut org_gen =
8286 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8287 seed + 101,
8288 );
8289 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8290
8291 stats.process_evolution_event_count = process_events.len();
8292 stats.organizational_event_count = org_events.len();
8293
8294 info!(
8295 "Evolution events generated: {} process evolution, {} organizational",
8296 process_events.len(),
8297 org_events.len()
8298 );
8299 self.check_resources_with_log("post-evolution-events")?;
8300
8301 Ok((process_events, org_events))
8302 }
8303
8304 fn phase_disruption_events(
8307 &self,
8308 stats: &mut EnhancedGenerationStatistics,
8309 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8310 if !self.config.organizational_events.enabled {
8311 debug!("Phase 24b: Skipped (organizational events disabled)");
8312 return Ok(Vec::new());
8313 }
8314 info!("Phase 24b: Generating Disruption Events");
8315
8316 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8317 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8318 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8319
8320 let company_codes: Vec<String> = self
8321 .config
8322 .companies
8323 .iter()
8324 .map(|c| c.code.clone())
8325 .collect();
8326
8327 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8328 let events = gen.generate(start_date, end_date, &company_codes);
8329
8330 stats.disruption_event_count = events.len();
8331 info!("Disruption events generated: {} events", events.len());
8332 self.check_resources_with_log("post-disruption-events")?;
8333
8334 Ok(events)
8335 }
8336
8337 fn phase_counterfactuals(
8344 &self,
8345 journal_entries: &[JournalEntry],
8346 stats: &mut EnhancedGenerationStatistics,
8347 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8348 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8349 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8350 return Ok(Vec::new());
8351 }
8352 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8353
8354 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8355
8356 let mut gen = CounterfactualGenerator::new(self.seed + 110);
8357
8358 let specs = [
8360 CounterfactualSpec::ScaleAmount { factor: 2.5 },
8361 CounterfactualSpec::ShiftDate { days: -14 },
8362 CounterfactualSpec::SelfApprove,
8363 CounterfactualSpec::SplitTransaction { split_count: 3 },
8364 ];
8365
8366 let pairs: Vec<_> = journal_entries
8367 .iter()
8368 .enumerate()
8369 .map(|(i, je)| {
8370 let spec = &specs[i % specs.len()];
8371 gen.generate(je, spec)
8372 })
8373 .collect();
8374
8375 stats.counterfactual_pair_count = pairs.len();
8376 info!(
8377 "Counterfactual pairs generated: {} pairs from {} journal entries",
8378 pairs.len(),
8379 journal_entries.len()
8380 );
8381 self.check_resources_with_log("post-counterfactuals")?;
8382
8383 Ok(pairs)
8384 }
8385
8386 fn phase_red_flags(
8393 &self,
8394 anomaly_labels: &AnomalyLabels,
8395 document_flows: &DocumentFlowSnapshot,
8396 stats: &mut EnhancedGenerationStatistics,
8397 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8398 if !self.config.fraud.enabled {
8399 debug!("Phase 26: Skipped (fraud generation disabled)");
8400 return Ok(Vec::new());
8401 }
8402 info!("Phase 26: Generating Fraud Red-Flag Indicators");
8403
8404 use datasynth_generators::fraud::RedFlagGenerator;
8405
8406 let generator = RedFlagGenerator::new();
8407 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8408
8409 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8411 .labels
8412 .iter()
8413 .filter(|label| label.anomaly_type.is_intentional())
8414 .map(|label| label.document_id.as_str())
8415 .collect();
8416
8417 let mut flags = Vec::new();
8418
8419 for chain in &document_flows.p2p_chains {
8421 let doc_id = &chain.purchase_order.header.document_id;
8422 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8423 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8424 }
8425
8426 for chain in &document_flows.o2c_chains {
8428 let doc_id = &chain.sales_order.header.document_id;
8429 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8430 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8431 }
8432
8433 stats.red_flag_count = flags.len();
8434 info!(
8435 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8436 flags.len(),
8437 document_flows.p2p_chains.len(),
8438 document_flows.o2c_chains.len(),
8439 fraud_doc_ids.len()
8440 );
8441 self.check_resources_with_log("post-red-flags")?;
8442
8443 Ok(flags)
8444 }
8445
8446 fn phase_collusion_rings(
8452 &mut self,
8453 stats: &mut EnhancedGenerationStatistics,
8454 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8455 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8456 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8457 return Ok(Vec::new());
8458 }
8459 info!("Phase 26b: Generating Collusion Rings");
8460
8461 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8462 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8463 let months = self.config.global.period_months;
8464
8465 let employee_ids: Vec<String> = self
8466 .master_data
8467 .employees
8468 .iter()
8469 .map(|e| e.employee_id.clone())
8470 .collect();
8471 let vendor_ids: Vec<String> = self
8472 .master_data
8473 .vendors
8474 .iter()
8475 .map(|v| v.vendor_id.clone())
8476 .collect();
8477
8478 let mut generator =
8479 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8480 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8481
8482 stats.collusion_ring_count = rings.len();
8483 info!(
8484 "Collusion rings generated: {} rings, total members: {}",
8485 rings.len(),
8486 rings
8487 .iter()
8488 .map(datasynth_generators::fraud::CollusionRing::size)
8489 .sum::<usize>()
8490 );
8491 self.check_resources_with_log("post-collusion-rings")?;
8492
8493 Ok(rings)
8494 }
8495
8496 fn phase_temporal_attributes(
8501 &mut self,
8502 stats: &mut EnhancedGenerationStatistics,
8503 ) -> SynthResult<
8504 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8505 > {
8506 if !self.config.temporal_attributes.enabled {
8507 debug!("Phase 27: Skipped (temporal attributes disabled)");
8508 return Ok(Vec::new());
8509 }
8510 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8511
8512 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8513 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8514
8515 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8519 || self.config.temporal_attributes.enabled;
8520 let temporal_config = {
8521 let ta = &self.config.temporal_attributes;
8522 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8523 .enabled(ta.enabled)
8524 .closed_probability(ta.valid_time.closed_probability)
8525 .avg_validity_days(ta.valid_time.avg_validity_days)
8526 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8527 .with_version_chains(if generate_version_chains {
8528 ta.avg_versions_per_entity
8529 } else {
8530 1.0
8531 })
8532 .build()
8533 };
8534 let temporal_config = if self
8536 .config
8537 .temporal_attributes
8538 .transaction_time
8539 .allow_backdating
8540 {
8541 let mut c = temporal_config;
8542 c.transaction_time.allow_backdating = true;
8543 c.transaction_time.backdating_probability = self
8544 .config
8545 .temporal_attributes
8546 .transaction_time
8547 .backdating_probability;
8548 c.transaction_time.max_backdate_days = self
8549 .config
8550 .temporal_attributes
8551 .transaction_time
8552 .max_backdate_days;
8553 c
8554 } else {
8555 temporal_config
8556 };
8557 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8558 temporal_config,
8559 self.seed + 130,
8560 start_date,
8561 );
8562
8563 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8564 self.seed + 130,
8565 datasynth_core::GeneratorType::Vendor,
8566 );
8567
8568 let chains: Vec<_> = self
8569 .master_data
8570 .vendors
8571 .iter()
8572 .map(|vendor| {
8573 let id = uuid_factory.next();
8574 gen.generate_version_chain(vendor.clone(), id)
8575 })
8576 .collect();
8577
8578 stats.temporal_version_chain_count = chains.len();
8579 info!("Temporal version chains generated: {} chains", chains.len());
8580 self.check_resources_with_log("post-temporal-attributes")?;
8581
8582 Ok(chains)
8583 }
8584
8585 fn phase_entity_relationships(
8595 &self,
8596 journal_entries: &[JournalEntry],
8597 document_flows: &DocumentFlowSnapshot,
8598 stats: &mut EnhancedGenerationStatistics,
8599 ) -> SynthResult<(
8600 Option<datasynth_core::models::EntityGraph>,
8601 Vec<datasynth_core::models::CrossProcessLink>,
8602 )> {
8603 use datasynth_generators::relationships::{
8604 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8605 TransactionSummary,
8606 };
8607
8608 let rs_enabled = self.config.relationship_strength.enabled;
8609 let cpl_enabled = self.config.cross_process_links.enabled
8610 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8611
8612 if !rs_enabled && !cpl_enabled {
8613 debug!(
8614 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8615 );
8616 return Ok((None, Vec::new()));
8617 }
8618
8619 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8620
8621 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8622 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8623
8624 let company_code = self
8625 .config
8626 .companies
8627 .first()
8628 .map(|c| c.code.as_str())
8629 .unwrap_or("1000");
8630
8631 let gen_config = EntityGraphConfig {
8633 enabled: rs_enabled,
8634 cross_process: datasynth_generators::relationships::CrossProcessConfig {
8635 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8636 enable_return_flows: false,
8637 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8638 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8639 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8641 1.0
8642 } else {
8643 0.30
8644 },
8645 ..Default::default()
8646 },
8647 strength_config: datasynth_generators::relationships::StrengthConfig {
8648 transaction_volume_weight: self
8649 .config
8650 .relationship_strength
8651 .calculation
8652 .transaction_volume_weight,
8653 transaction_count_weight: self
8654 .config
8655 .relationship_strength
8656 .calculation
8657 .transaction_count_weight,
8658 duration_weight: self
8659 .config
8660 .relationship_strength
8661 .calculation
8662 .relationship_duration_weight,
8663 recency_weight: self.config.relationship_strength.calculation.recency_weight,
8664 mutual_connections_weight: self
8665 .config
8666 .relationship_strength
8667 .calculation
8668 .mutual_connections_weight,
8669 recency_half_life_days: self
8670 .config
8671 .relationship_strength
8672 .calculation
8673 .recency_half_life_days,
8674 },
8675 ..Default::default()
8676 };
8677
8678 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8679
8680 let entity_graph = if rs_enabled {
8682 let vendor_summaries: Vec<EntitySummary> = self
8684 .master_data
8685 .vendors
8686 .iter()
8687 .map(|v| {
8688 EntitySummary::new(
8689 &v.vendor_id,
8690 &v.name,
8691 datasynth_core::models::GraphEntityType::Vendor,
8692 start_date,
8693 )
8694 })
8695 .collect();
8696
8697 let customer_summaries: Vec<EntitySummary> = self
8698 .master_data
8699 .customers
8700 .iter()
8701 .map(|c| {
8702 EntitySummary::new(
8703 &c.customer_id,
8704 &c.name,
8705 datasynth_core::models::GraphEntityType::Customer,
8706 start_date,
8707 )
8708 })
8709 .collect();
8710
8711 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8716 std::collections::HashMap::new();
8717
8718 for je in journal_entries {
8719 let cc = je.header.company_code.clone();
8720 let posting_date = je.header.posting_date;
8721 for line in &je.lines {
8722 if let Some(ref tp) = line.trading_partner {
8723 let amount = if line.debit_amount > line.credit_amount {
8724 line.debit_amount
8725 } else {
8726 line.credit_amount
8727 };
8728 let entry = txn_summaries
8729 .entry((cc.clone(), tp.clone()))
8730 .or_insert_with(|| TransactionSummary {
8731 total_volume: rust_decimal::Decimal::ZERO,
8732 transaction_count: 0,
8733 first_transaction_date: posting_date,
8734 last_transaction_date: posting_date,
8735 related_entities: std::collections::HashSet::new(),
8736 });
8737 entry.total_volume += amount;
8738 entry.transaction_count += 1;
8739 if posting_date < entry.first_transaction_date {
8740 entry.first_transaction_date = posting_date;
8741 }
8742 if posting_date > entry.last_transaction_date {
8743 entry.last_transaction_date = posting_date;
8744 }
8745 entry.related_entities.insert(cc.clone());
8746 }
8747 }
8748 }
8749
8750 for chain in &document_flows.p2p_chains {
8753 let cc = chain.purchase_order.header.company_code.clone();
8754 let vendor_id = chain.purchase_order.vendor_id.clone();
8755 let po_date = chain.purchase_order.header.document_date;
8756 let amount = chain.purchase_order.total_net_amount;
8757
8758 let entry = txn_summaries
8759 .entry((cc.clone(), vendor_id))
8760 .or_insert_with(|| TransactionSummary {
8761 total_volume: rust_decimal::Decimal::ZERO,
8762 transaction_count: 0,
8763 first_transaction_date: po_date,
8764 last_transaction_date: po_date,
8765 related_entities: std::collections::HashSet::new(),
8766 });
8767 entry.total_volume += amount;
8768 entry.transaction_count += 1;
8769 if po_date < entry.first_transaction_date {
8770 entry.first_transaction_date = po_date;
8771 }
8772 if po_date > entry.last_transaction_date {
8773 entry.last_transaction_date = po_date;
8774 }
8775 entry.related_entities.insert(cc);
8776 }
8777
8778 for chain in &document_flows.o2c_chains {
8780 let cc = chain.sales_order.header.company_code.clone();
8781 let customer_id = chain.sales_order.customer_id.clone();
8782 let so_date = chain.sales_order.header.document_date;
8783 let amount = chain.sales_order.total_net_amount;
8784
8785 let entry = txn_summaries
8786 .entry((cc.clone(), customer_id))
8787 .or_insert_with(|| TransactionSummary {
8788 total_volume: rust_decimal::Decimal::ZERO,
8789 transaction_count: 0,
8790 first_transaction_date: so_date,
8791 last_transaction_date: so_date,
8792 related_entities: std::collections::HashSet::new(),
8793 });
8794 entry.total_volume += amount;
8795 entry.transaction_count += 1;
8796 if so_date < entry.first_transaction_date {
8797 entry.first_transaction_date = so_date;
8798 }
8799 if so_date > entry.last_transaction_date {
8800 entry.last_transaction_date = so_date;
8801 }
8802 entry.related_entities.insert(cc);
8803 }
8804
8805 let as_of_date = journal_entries
8806 .last()
8807 .map(|je| je.header.posting_date)
8808 .unwrap_or(start_date);
8809
8810 let graph = gen.generate_entity_graph(
8811 company_code,
8812 as_of_date,
8813 &vendor_summaries,
8814 &customer_summaries,
8815 &txn_summaries,
8816 );
8817
8818 info!(
8819 "Entity relationship graph: {} nodes, {} edges",
8820 graph.nodes.len(),
8821 graph.edges.len()
8822 );
8823 stats.entity_relationship_node_count = graph.nodes.len();
8824 stats.entity_relationship_edge_count = graph.edges.len();
8825 Some(graph)
8826 } else {
8827 None
8828 };
8829
8830 let cross_process_links = if cpl_enabled {
8832 let gr_refs: Vec<GoodsReceiptRef> = document_flows
8834 .p2p_chains
8835 .iter()
8836 .flat_map(|chain| {
8837 let vendor_id = chain.purchase_order.vendor_id.clone();
8838 let cc = chain.purchase_order.header.company_code.clone();
8839 chain.goods_receipts.iter().flat_map(move |gr| {
8840 gr.items.iter().filter_map({
8841 let doc_id = gr.header.document_id.clone();
8842 let v_id = vendor_id.clone();
8843 let company = cc.clone();
8844 let receipt_date = gr.header.document_date;
8845 move |item| {
8846 item.base
8847 .material_id
8848 .as_ref()
8849 .map(|mat_id| GoodsReceiptRef {
8850 document_id: doc_id.clone(),
8851 material_id: mat_id.clone(),
8852 quantity: item.base.quantity,
8853 receipt_date,
8854 vendor_id: v_id.clone(),
8855 company_code: company.clone(),
8856 })
8857 }
8858 })
8859 })
8860 })
8861 .collect();
8862
8863 let del_refs: Vec<DeliveryRef> = document_flows
8865 .o2c_chains
8866 .iter()
8867 .flat_map(|chain| {
8868 let customer_id = chain.sales_order.customer_id.clone();
8869 let cc = chain.sales_order.header.company_code.clone();
8870 chain.deliveries.iter().flat_map(move |del| {
8871 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8872 del.items.iter().filter_map({
8873 let doc_id = del.header.document_id.clone();
8874 let c_id = customer_id.clone();
8875 let company = cc.clone();
8876 move |item| {
8877 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8878 document_id: doc_id.clone(),
8879 material_id: mat_id.clone(),
8880 quantity: item.base.quantity,
8881 delivery_date,
8882 customer_id: c_id.clone(),
8883 company_code: company.clone(),
8884 })
8885 }
8886 })
8887 })
8888 })
8889 .collect();
8890
8891 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8892 info!("Cross-process links generated: {} links", links.len());
8893 stats.cross_process_link_count = links.len();
8894 links
8895 } else {
8896 Vec::new()
8897 };
8898
8899 self.check_resources_with_log("post-entity-relationships")?;
8900 Ok((entity_graph, cross_process_links))
8901 }
8902
8903 fn phase_industry_data(
8905 &self,
8906 stats: &mut EnhancedGenerationStatistics,
8907 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8908 if !self.config.industry_specific.enabled {
8909 return None;
8910 }
8911 info!("Phase 29: Generating industry-specific data");
8912 let output = datasynth_generators::industry::factory::generate_industry_output(
8913 self.config.global.industry,
8914 );
8915 stats.industry_gl_account_count = output.gl_accounts.len();
8916 info!(
8917 "Industry data generated: {} GL accounts for {:?}",
8918 output.gl_accounts.len(),
8919 self.config.global.industry
8920 );
8921 Some(output)
8922 }
8923
8924 fn phase_opening_balances(
8926 &mut self,
8927 coa: &Arc<ChartOfAccounts>,
8928 stats: &mut EnhancedGenerationStatistics,
8929 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8930 if !self.config.balance.generate_opening_balances {
8931 debug!("Phase 3b: Skipped (opening balance generation disabled)");
8932 return Ok(Vec::new());
8933 }
8934 info!("Phase 3b: Generating Opening Balances");
8935
8936 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8937 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8938 let fiscal_year = start_date.year();
8939
8940 let industry = match self.config.global.industry {
8941 IndustrySector::Manufacturing => IndustryType::Manufacturing,
8942 IndustrySector::Retail => IndustryType::Retail,
8943 IndustrySector::FinancialServices => IndustryType::Financial,
8944 IndustrySector::Healthcare => IndustryType::Healthcare,
8945 IndustrySector::Technology => IndustryType::Technology,
8946 _ => IndustryType::Manufacturing,
8947 };
8948
8949 let config = datasynth_generators::OpeningBalanceConfig {
8950 industry,
8951 ..Default::default()
8952 };
8953 let mut gen =
8954 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8955
8956 let mut results = Vec::new();
8957 for company in &self.config.companies {
8958 let spec = OpeningBalanceSpec::new(
8959 company.code.clone(),
8960 start_date,
8961 fiscal_year,
8962 company.currency.clone(),
8963 rust_decimal::Decimal::new(10_000_000, 0),
8964 industry,
8965 );
8966 let ob = gen.generate(&spec, coa, start_date, &company.code);
8967 results.push(ob);
8968 }
8969
8970 stats.opening_balance_count = results.len();
8971 info!("Opening balances generated: {} companies", results.len());
8972 self.check_resources_with_log("post-opening-balances")?;
8973
8974 Ok(results)
8975 }
8976
8977 fn phase_subledger_reconciliation(
8979 &mut self,
8980 subledger: &SubledgerSnapshot,
8981 entries: &[JournalEntry],
8982 stats: &mut EnhancedGenerationStatistics,
8983 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8984 if !self.config.balance.reconcile_subledgers {
8985 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8986 return Ok(Vec::new());
8987 }
8988 info!("Phase 9b: Reconciling GL to subledger balances");
8989
8990 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8991 .map(|d| d + chrono::Months::new(self.config.global.period_months))
8992 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8993
8994 let tracker_config = BalanceTrackerConfig {
8996 validate_on_each_entry: false,
8997 track_history: false,
8998 fail_on_validation_error: false,
8999 ..Default::default()
9000 };
9001 let recon_currency = self
9002 .config
9003 .companies
9004 .first()
9005 .map(|c| c.currency.clone())
9006 .unwrap_or_else(|| "USD".to_string());
9007 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9008 let validation_errors = tracker.apply_entries(entries);
9009 if !validation_errors.is_empty() {
9010 warn!(
9011 error_count = validation_errors.len(),
9012 "Balance tracker encountered validation errors during subledger reconciliation"
9013 );
9014 for err in &validation_errors {
9015 debug!("Balance validation error: {:?}", err);
9016 }
9017 }
9018
9019 let mut engine = datasynth_generators::ReconciliationEngine::new(
9020 datasynth_generators::ReconciliationConfig::default(),
9021 );
9022
9023 let mut results = Vec::new();
9024 let company_code = self
9025 .config
9026 .companies
9027 .first()
9028 .map(|c| c.code.as_str())
9029 .unwrap_or("1000");
9030
9031 if !subledger.ar_invoices.is_empty() {
9033 let gl_balance = tracker
9034 .get_account_balance(
9035 company_code,
9036 datasynth_core::accounts::control_accounts::AR_CONTROL,
9037 )
9038 .map(|b| b.closing_balance)
9039 .unwrap_or_default();
9040 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9041 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9042 }
9043
9044 if !subledger.ap_invoices.is_empty() {
9046 let gl_balance = tracker
9047 .get_account_balance(
9048 company_code,
9049 datasynth_core::accounts::control_accounts::AP_CONTROL,
9050 )
9051 .map(|b| b.closing_balance)
9052 .unwrap_or_default();
9053 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9054 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9055 }
9056
9057 if !subledger.fa_records.is_empty() {
9059 let gl_asset_balance = tracker
9060 .get_account_balance(
9061 company_code,
9062 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9063 )
9064 .map(|b| b.closing_balance)
9065 .unwrap_or_default();
9066 let gl_accum_depr_balance = tracker
9067 .get_account_balance(
9068 company_code,
9069 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9070 )
9071 .map(|b| b.closing_balance)
9072 .unwrap_or_default();
9073 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9074 subledger.fa_records.iter().collect();
9075 let (asset_recon, depr_recon) = engine.reconcile_fa(
9076 company_code,
9077 end_date,
9078 gl_asset_balance,
9079 gl_accum_depr_balance,
9080 &fa_refs,
9081 );
9082 results.push(asset_recon);
9083 results.push(depr_recon);
9084 }
9085
9086 if !subledger.inventory_positions.is_empty() {
9088 let gl_balance = tracker
9089 .get_account_balance(
9090 company_code,
9091 datasynth_core::accounts::control_accounts::INVENTORY,
9092 )
9093 .map(|b| b.closing_balance)
9094 .unwrap_or_default();
9095 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9096 subledger.inventory_positions.iter().collect();
9097 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9098 }
9099
9100 stats.subledger_reconciliation_count = results.len();
9101 let passed = results.iter().filter(|r| r.is_balanced()).count();
9102 let failed = results.len() - passed;
9103 info!(
9104 "Subledger reconciliation: {} checks, {} passed, {} failed",
9105 results.len(),
9106 passed,
9107 failed
9108 );
9109 self.check_resources_with_log("post-subledger-reconciliation")?;
9110
9111 Ok(results)
9112 }
9113
9114 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9116 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9117
9118 let coa_framework = self.resolve_coa_framework();
9119
9120 let mut gen = ChartOfAccountsGenerator::new(
9121 self.config.chart_of_accounts.complexity,
9122 self.config.global.industry,
9123 self.seed,
9124 )
9125 .with_coa_framework(coa_framework);
9126
9127 let coa = Arc::new(gen.generate());
9128 self.coa = Some(Arc::clone(&coa));
9129
9130 if let Some(pb) = pb {
9131 pb.finish_with_message("Chart of Accounts complete");
9132 }
9133
9134 Ok(coa)
9135 }
9136
9137 fn generate_master_data(&mut self) -> SynthResult<()> {
9139 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9140 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9141 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9142
9143 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
9145
9146 let pack = self.primary_pack().clone();
9148
9149 let vendors_per_company = self.phase_config.vendors_per_company;
9151 let customers_per_company = self.phase_config.customers_per_company;
9152 let materials_per_company = self.phase_config.materials_per_company;
9153 let assets_per_company = self.phase_config.assets_per_company;
9154 let coa_framework = self.resolve_coa_framework();
9155
9156 let per_company_results: Vec<_> = self
9159 .config
9160 .companies
9161 .par_iter()
9162 .enumerate()
9163 .map(|(i, company)| {
9164 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9165 let pack = pack.clone();
9166
9167 let mut vendor_gen = VendorGenerator::new(company_seed);
9169 vendor_gen.set_country_pack(pack.clone());
9170 vendor_gen.set_coa_framework(coa_framework);
9171 vendor_gen.set_counter_offset(i * vendors_per_company);
9172 if self.config.vendor_network.enabled {
9174 let vn = &self.config.vendor_network;
9175 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9176 enabled: true,
9177 depth: vn.depth,
9178 tier1_count: datasynth_generators::TierCountConfig::new(
9179 vn.tier1.min,
9180 vn.tier1.max,
9181 ),
9182 tier2_per_parent: datasynth_generators::TierCountConfig::new(
9183 vn.tier2_per_parent.min,
9184 vn.tier2_per_parent.max,
9185 ),
9186 tier3_per_parent: datasynth_generators::TierCountConfig::new(
9187 vn.tier3_per_parent.min,
9188 vn.tier3_per_parent.max,
9189 ),
9190 cluster_distribution: datasynth_generators::ClusterDistribution {
9191 reliable_strategic: vn.clusters.reliable_strategic,
9192 standard_operational: vn.clusters.standard_operational,
9193 transactional: vn.clusters.transactional,
9194 problematic: vn.clusters.problematic,
9195 },
9196 concentration_limits: datasynth_generators::ConcentrationLimits {
9197 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9198 max_top5: vn.dependencies.top_5_concentration,
9199 },
9200 ..datasynth_generators::VendorNetworkConfig::default()
9201 });
9202 }
9203 let vendor_pool =
9204 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9205
9206 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9208 customer_gen.set_country_pack(pack.clone());
9209 customer_gen.set_coa_framework(coa_framework);
9210 customer_gen.set_counter_offset(i * customers_per_company);
9211 if self.config.customer_segmentation.enabled {
9213 let cs = &self.config.customer_segmentation;
9214 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9215 enabled: true,
9216 segment_distribution: datasynth_generators::SegmentDistribution {
9217 enterprise: cs.value_segments.enterprise.customer_share,
9218 mid_market: cs.value_segments.mid_market.customer_share,
9219 smb: cs.value_segments.smb.customer_share,
9220 consumer: cs.value_segments.consumer.customer_share,
9221 },
9222 referral_config: datasynth_generators::ReferralConfig {
9223 enabled: cs.networks.referrals.enabled,
9224 referral_rate: cs.networks.referrals.referral_rate,
9225 ..Default::default()
9226 },
9227 hierarchy_config: datasynth_generators::HierarchyConfig {
9228 enabled: cs.networks.corporate_hierarchies.enabled,
9229 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9230 ..Default::default()
9231 },
9232 ..Default::default()
9233 };
9234 customer_gen.set_segmentation_config(seg_cfg);
9235 }
9236 let customer_pool = customer_gen.generate_customer_pool(
9237 customers_per_company,
9238 &company.code,
9239 start_date,
9240 );
9241
9242 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9244 material_gen.set_country_pack(pack.clone());
9245 material_gen.set_counter_offset(i * materials_per_company);
9246 let material_pool = material_gen.generate_material_pool(
9247 materials_per_company,
9248 &company.code,
9249 start_date,
9250 );
9251
9252 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9254 let asset_pool = asset_gen.generate_asset_pool(
9255 assets_per_company,
9256 &company.code,
9257 (start_date, end_date),
9258 );
9259
9260 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9262 employee_gen.set_country_pack(pack);
9263 let employee_pool =
9264 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9265
9266 let employee_change_history =
9268 employee_gen.generate_all_change_history(&employee_pool, end_date);
9269
9270 let employee_ids: Vec<String> = employee_pool
9272 .employees
9273 .iter()
9274 .map(|e| e.employee_id.clone())
9275 .collect();
9276 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9277 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9278
9279 (
9280 vendor_pool.vendors,
9281 customer_pool.customers,
9282 material_pool.materials,
9283 asset_pool.assets,
9284 employee_pool.employees,
9285 employee_change_history,
9286 cost_centers,
9287 )
9288 })
9289 .collect();
9290
9291 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9293 per_company_results
9294 {
9295 self.master_data.vendors.extend(vendors);
9296 self.master_data.customers.extend(customers);
9297 self.master_data.materials.extend(materials);
9298 self.master_data.assets.extend(assets);
9299 self.master_data.employees.extend(employees);
9300 self.master_data.cost_centers.extend(cost_centers);
9301 self.master_data
9302 .employee_change_history
9303 .extend(change_history);
9304 }
9305
9306 if let Some(pb) = &pb {
9307 pb.inc(total);
9308 }
9309 if let Some(pb) = pb {
9310 pb.finish_with_message("Master data generation complete");
9311 }
9312
9313 Ok(())
9314 }
9315
9316 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9318 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9319 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9320
9321 let months = (self.config.global.period_months as usize).max(1);
9324 let p2p_count = self
9325 .phase_config
9326 .p2p_chains
9327 .min(self.master_data.vendors.len() * 2 * months);
9328 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9329
9330 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9332 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9333 p2p_gen.set_country_pack(self.primary_pack().clone());
9334
9335 for i in 0..p2p_count {
9336 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9337 let materials: Vec<&Material> = self
9338 .master_data
9339 .materials
9340 .iter()
9341 .skip(i % self.master_data.materials.len().max(1))
9342 .take(2.min(self.master_data.materials.len()))
9343 .collect();
9344
9345 if materials.is_empty() {
9346 continue;
9347 }
9348
9349 let company = &self.config.companies[i % self.config.companies.len()];
9350 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9351 let fiscal_period = po_date.month() as u8;
9352 let created_by = if self.master_data.employees.is_empty() {
9353 "SYSTEM"
9354 } else {
9355 self.master_data.employees[i % self.master_data.employees.len()]
9356 .user_id
9357 .as_str()
9358 };
9359
9360 let chain = p2p_gen.generate_chain(
9361 &company.code,
9362 vendor,
9363 &materials,
9364 po_date,
9365 start_date.year() as u16,
9366 fiscal_period,
9367 created_by,
9368 );
9369
9370 flows.purchase_orders.push(chain.purchase_order.clone());
9372 flows.goods_receipts.extend(chain.goods_receipts.clone());
9373 if let Some(vi) = &chain.vendor_invoice {
9374 flows.vendor_invoices.push(vi.clone());
9375 }
9376 if let Some(payment) = &chain.payment {
9377 flows.payments.push(payment.clone());
9378 }
9379 for remainder in &chain.remainder_payments {
9380 flows.payments.push(remainder.clone());
9381 }
9382 flows.p2p_chains.push(chain);
9383
9384 if let Some(pb) = &pb {
9385 pb.inc(1);
9386 }
9387 }
9388
9389 if let Some(pb) = pb {
9390 pb.finish_with_message("P2P document flows complete");
9391 }
9392
9393 let o2c_count = self
9396 .phase_config
9397 .o2c_chains
9398 .min(self.master_data.customers.len() * 2 * months);
9399 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9400
9401 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9403 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9404 o2c_gen.set_country_pack(self.primary_pack().clone());
9405
9406 for i in 0..o2c_count {
9407 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9408 let materials: Vec<&Material> = self
9409 .master_data
9410 .materials
9411 .iter()
9412 .skip(i % self.master_data.materials.len().max(1))
9413 .take(2.min(self.master_data.materials.len()))
9414 .collect();
9415
9416 if materials.is_empty() {
9417 continue;
9418 }
9419
9420 let company = &self.config.companies[i % self.config.companies.len()];
9421 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9422 let fiscal_period = so_date.month() as u8;
9423 let created_by = if self.master_data.employees.is_empty() {
9424 "SYSTEM"
9425 } else {
9426 self.master_data.employees[i % self.master_data.employees.len()]
9427 .user_id
9428 .as_str()
9429 };
9430
9431 let chain = o2c_gen.generate_chain(
9432 &company.code,
9433 customer,
9434 &materials,
9435 so_date,
9436 start_date.year() as u16,
9437 fiscal_period,
9438 created_by,
9439 );
9440
9441 flows.sales_orders.push(chain.sales_order.clone());
9443 flows.deliveries.extend(chain.deliveries.clone());
9444 if let Some(ci) = &chain.customer_invoice {
9445 flows.customer_invoices.push(ci.clone());
9446 }
9447 if let Some(receipt) = &chain.customer_receipt {
9448 flows.payments.push(receipt.clone());
9449 }
9450 for receipt in &chain.remainder_receipts {
9452 flows.payments.push(receipt.clone());
9453 }
9454 flows.o2c_chains.push(chain);
9455
9456 if let Some(pb) = &pb {
9457 pb.inc(1);
9458 }
9459 }
9460
9461 if let Some(pb) = pb {
9462 pb.finish_with_message("O2C document flows complete");
9463 }
9464
9465 {
9469 let mut refs = Vec::new();
9470 for doc in &flows.purchase_orders {
9471 refs.extend(doc.header.document_references.iter().cloned());
9472 }
9473 for doc in &flows.goods_receipts {
9474 refs.extend(doc.header.document_references.iter().cloned());
9475 }
9476 for doc in &flows.vendor_invoices {
9477 refs.extend(doc.header.document_references.iter().cloned());
9478 }
9479 for doc in &flows.sales_orders {
9480 refs.extend(doc.header.document_references.iter().cloned());
9481 }
9482 for doc in &flows.deliveries {
9483 refs.extend(doc.header.document_references.iter().cloned());
9484 }
9485 for doc in &flows.customer_invoices {
9486 refs.extend(doc.header.document_references.iter().cloned());
9487 }
9488 for doc in &flows.payments {
9489 refs.extend(doc.header.document_references.iter().cloned());
9490 }
9491 debug!(
9492 "Collected {} document cross-references from document headers",
9493 refs.len()
9494 );
9495 flows.document_references = refs;
9496 }
9497
9498 Ok(())
9499 }
9500
9501 fn generate_journal_entries(
9503 &mut self,
9504 coa: &Arc<ChartOfAccounts>,
9505 ) -> SynthResult<Vec<JournalEntry>> {
9506 use datasynth_core::traits::ParallelGenerator;
9507
9508 let total = self.calculate_total_transactions();
9509 let pb = self.create_progress_bar(total, "Generating Journal Entries");
9510
9511 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9512 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9513 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9514
9515 let company_codes: Vec<String> = self
9516 .config
9517 .companies
9518 .iter()
9519 .map(|c| c.code.clone())
9520 .collect();
9521
9522 let generator = JournalEntryGenerator::new_with_params(
9523 self.config.transactions.clone(),
9524 Arc::clone(coa),
9525 company_codes,
9526 start_date,
9527 end_date,
9528 self.seed,
9529 );
9530
9531 let je_pack = self.primary_pack();
9535
9536 let mut generator = generator
9537 .with_master_data(
9538 &self.master_data.vendors,
9539 &self.master_data.customers,
9540 &self.master_data.materials,
9541 )
9542 .with_country_pack_names(je_pack)
9543 .with_country_pack_temporal(
9544 self.config.temporal_patterns.clone(),
9545 self.seed + 200,
9546 je_pack,
9547 )
9548 .with_persona_errors(true)
9549 .with_fraud_config(self.config.fraud.clone());
9550
9551 if self.config.temporal.enabled {
9553 let drift_config = self.config.temporal.to_core_config();
9554 generator = generator.with_drift_config(drift_config, self.seed + 100);
9555 }
9556
9557 self.check_memory_limit()?;
9559
9560 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9562
9563 let entries = if total >= 10_000 && num_threads > 1 {
9567 let sub_generators = generator.split(num_threads);
9570 let entries_per_thread = total as usize / num_threads;
9571 let remainder = total as usize % num_threads;
9572
9573 let batches: Vec<Vec<JournalEntry>> = sub_generators
9574 .into_par_iter()
9575 .enumerate()
9576 .map(|(i, mut gen)| {
9577 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9578 gen.generate_batch(count)
9579 })
9580 .collect();
9581
9582 let entries = JournalEntryGenerator::merge_results(batches);
9584
9585 if let Some(pb) = &pb {
9586 pb.inc(total);
9587 }
9588 entries
9589 } else {
9590 let mut entries = Vec::with_capacity(total as usize);
9592 for _ in 0..total {
9593 let entry = generator.generate();
9594 entries.push(entry);
9595 if let Some(pb) = &pb {
9596 pb.inc(1);
9597 }
9598 }
9599 entries
9600 };
9601
9602 if let Some(pb) = pb {
9603 pb.finish_with_message("Journal entries complete");
9604 }
9605
9606 Ok(entries)
9607 }
9608
9609 fn generate_jes_from_document_flows(
9614 &mut self,
9615 flows: &DocumentFlowSnapshot,
9616 ) -> SynthResult<Vec<JournalEntry>> {
9617 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9618 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9619
9620 let je_config = match self.resolve_coa_framework() {
9621 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9622 CoAFramework::GermanSkr04 => {
9623 let fa = datasynth_core::FrameworkAccounts::german_gaap();
9624 DocumentFlowJeConfig::from(&fa)
9625 }
9626 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9627 };
9628
9629 let populate_fec = je_config.populate_fec_fields;
9630 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9631
9632 if populate_fec {
9636 let mut aux_lookup = std::collections::HashMap::new();
9637 for vendor in &self.master_data.vendors {
9638 if let Some(ref aux) = vendor.auxiliary_gl_account {
9639 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9640 }
9641 }
9642 for customer in &self.master_data.customers {
9643 if let Some(ref aux) = customer.auxiliary_gl_account {
9644 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9645 }
9646 }
9647 if !aux_lookup.is_empty() {
9648 generator.set_auxiliary_account_lookup(aux_lookup);
9649 }
9650 }
9651
9652 let mut entries = Vec::new();
9653
9654 for chain in &flows.p2p_chains {
9656 let chain_entries = generator.generate_from_p2p_chain(chain);
9657 entries.extend(chain_entries);
9658 if let Some(pb) = &pb {
9659 pb.inc(1);
9660 }
9661 }
9662
9663 for chain in &flows.o2c_chains {
9665 let chain_entries = generator.generate_from_o2c_chain(chain);
9666 entries.extend(chain_entries);
9667 if let Some(pb) = &pb {
9668 pb.inc(1);
9669 }
9670 }
9671
9672 if let Some(pb) = pb {
9673 pb.finish_with_message(format!(
9674 "Generated {} JEs from document flows",
9675 entries.len()
9676 ));
9677 }
9678
9679 Ok(entries)
9680 }
9681
9682 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9688 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9689
9690 let mut jes = Vec::with_capacity(payroll_runs.len());
9691
9692 for run in payroll_runs {
9693 let mut je = JournalEntry::new_simple(
9694 format!("JE-PAYROLL-{}", run.payroll_id),
9695 run.company_code.clone(),
9696 run.run_date,
9697 format!("Payroll {}", run.payroll_id),
9698 );
9699
9700 je.add_line(JournalEntryLine {
9702 line_number: 1,
9703 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9704 debit_amount: run.total_gross,
9705 reference: Some(run.payroll_id.clone()),
9706 text: Some(format!(
9707 "Payroll {} ({} employees)",
9708 run.payroll_id, run.employee_count
9709 )),
9710 ..Default::default()
9711 });
9712
9713 je.add_line(JournalEntryLine {
9715 line_number: 2,
9716 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9717 credit_amount: run.total_gross,
9718 reference: Some(run.payroll_id.clone()),
9719 ..Default::default()
9720 });
9721
9722 jes.push(je);
9723 }
9724
9725 jes
9726 }
9727
9728 fn link_document_flows_to_subledgers(
9733 &mut self,
9734 flows: &DocumentFlowSnapshot,
9735 ) -> SynthResult<SubledgerSnapshot> {
9736 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9737 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9738
9739 let vendor_names: std::collections::HashMap<String, String> = self
9741 .master_data
9742 .vendors
9743 .iter()
9744 .map(|v| (v.vendor_id.clone(), v.name.clone()))
9745 .collect();
9746 let customer_names: std::collections::HashMap<String, String> = self
9747 .master_data
9748 .customers
9749 .iter()
9750 .map(|c| (c.customer_id.clone(), c.name.clone()))
9751 .collect();
9752
9753 let mut linker = DocumentFlowLinker::new()
9754 .with_vendor_names(vendor_names)
9755 .with_customer_names(customer_names);
9756
9757 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9759 if let Some(pb) = &pb {
9760 pb.inc(flows.vendor_invoices.len() as u64);
9761 }
9762
9763 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9765 if let Some(pb) = &pb {
9766 pb.inc(flows.customer_invoices.len() as u64);
9767 }
9768
9769 if let Some(pb) = pb {
9770 pb.finish_with_message(format!(
9771 "Linked {} AP and {} AR invoices",
9772 ap_invoices.len(),
9773 ar_invoices.len()
9774 ));
9775 }
9776
9777 Ok(SubledgerSnapshot {
9778 ap_invoices,
9779 ar_invoices,
9780 fa_records: Vec::new(),
9781 inventory_positions: Vec::new(),
9782 inventory_movements: Vec::new(),
9783 ar_aging_reports: Vec::new(),
9785 ap_aging_reports: Vec::new(),
9786 depreciation_runs: Vec::new(),
9788 inventory_valuations: Vec::new(),
9789 dunning_runs: Vec::new(),
9791 dunning_letters: Vec::new(),
9792 })
9793 }
9794
9795 #[allow(clippy::too_many_arguments)]
9800 fn generate_ocpm_events(
9801 &mut self,
9802 flows: &DocumentFlowSnapshot,
9803 sourcing: &SourcingSnapshot,
9804 hr: &HrSnapshot,
9805 manufacturing: &ManufacturingSnapshot,
9806 banking: &BankingSnapshot,
9807 audit: &AuditSnapshot,
9808 financial_reporting: &FinancialReportingSnapshot,
9809 ) -> SynthResult<OcpmSnapshot> {
9810 let total_chains = flows.p2p_chains.len()
9811 + flows.o2c_chains.len()
9812 + sourcing.sourcing_projects.len()
9813 + hr.payroll_runs.len()
9814 + manufacturing.production_orders.len()
9815 + banking.customers.len()
9816 + audit.engagements.len()
9817 + financial_reporting.bank_reconciliations.len();
9818 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9819
9820 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9822 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9823
9824 let ocpm_config = OcpmGeneratorConfig {
9826 generate_p2p: true,
9827 generate_o2c: true,
9828 generate_s2c: !sourcing.sourcing_projects.is_empty(),
9829 generate_h2r: !hr.payroll_runs.is_empty(),
9830 generate_mfg: !manufacturing.production_orders.is_empty(),
9831 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9832 generate_bank: !banking.customers.is_empty(),
9833 generate_audit: !audit.engagements.is_empty(),
9834 happy_path_rate: 0.75,
9835 exception_path_rate: 0.20,
9836 error_path_rate: 0.05,
9837 add_duration_variability: true,
9838 duration_std_dev_factor: 0.3,
9839 };
9840 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9841 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9842
9843 let available_users: Vec<String> = self
9845 .master_data
9846 .employees
9847 .iter()
9848 .take(20)
9849 .map(|e| e.user_id.clone())
9850 .collect();
9851
9852 let fallback_date =
9854 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9855 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9856 .unwrap_or(fallback_date);
9857 let base_midnight = base_date
9858 .and_hms_opt(0, 0, 0)
9859 .expect("midnight is always valid");
9860 let base_datetime =
9861 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9862
9863 let add_result = |event_log: &mut OcpmEventLog,
9865 result: datasynth_ocpm::CaseGenerationResult| {
9866 for event in result.events {
9867 event_log.add_event(event);
9868 }
9869 for object in result.objects {
9870 event_log.add_object(object);
9871 }
9872 for relationship in result.relationships {
9873 event_log.add_relationship(relationship);
9874 }
9875 for corr in result.correlation_events {
9876 event_log.add_correlation_event(corr);
9877 }
9878 event_log.add_case(result.case_trace);
9879 };
9880
9881 for chain in &flows.p2p_chains {
9883 let po = &chain.purchase_order;
9884 let documents = P2pDocuments::new(
9885 &po.header.document_id,
9886 &po.vendor_id,
9887 &po.header.company_code,
9888 po.total_net_amount,
9889 &po.header.currency,
9890 &ocpm_uuid_factory,
9891 )
9892 .with_goods_receipt(
9893 chain
9894 .goods_receipts
9895 .first()
9896 .map(|gr| gr.header.document_id.as_str())
9897 .unwrap_or(""),
9898 &ocpm_uuid_factory,
9899 )
9900 .with_invoice(
9901 chain
9902 .vendor_invoice
9903 .as_ref()
9904 .map(|vi| vi.header.document_id.as_str())
9905 .unwrap_or(""),
9906 &ocpm_uuid_factory,
9907 )
9908 .with_payment(
9909 chain
9910 .payment
9911 .as_ref()
9912 .map(|p| p.header.document_id.as_str())
9913 .unwrap_or(""),
9914 &ocpm_uuid_factory,
9915 );
9916
9917 let start_time =
9918 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9919 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9920 add_result(&mut event_log, result);
9921
9922 if let Some(pb) = &pb {
9923 pb.inc(1);
9924 }
9925 }
9926
9927 for chain in &flows.o2c_chains {
9929 let so = &chain.sales_order;
9930 let documents = O2cDocuments::new(
9931 &so.header.document_id,
9932 &so.customer_id,
9933 &so.header.company_code,
9934 so.total_net_amount,
9935 &so.header.currency,
9936 &ocpm_uuid_factory,
9937 )
9938 .with_delivery(
9939 chain
9940 .deliveries
9941 .first()
9942 .map(|d| d.header.document_id.as_str())
9943 .unwrap_or(""),
9944 &ocpm_uuid_factory,
9945 )
9946 .with_invoice(
9947 chain
9948 .customer_invoice
9949 .as_ref()
9950 .map(|ci| ci.header.document_id.as_str())
9951 .unwrap_or(""),
9952 &ocpm_uuid_factory,
9953 )
9954 .with_receipt(
9955 chain
9956 .customer_receipt
9957 .as_ref()
9958 .map(|r| r.header.document_id.as_str())
9959 .unwrap_or(""),
9960 &ocpm_uuid_factory,
9961 );
9962
9963 let start_time =
9964 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9965 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9966 add_result(&mut event_log, result);
9967
9968 if let Some(pb) = &pb {
9969 pb.inc(1);
9970 }
9971 }
9972
9973 for project in &sourcing.sourcing_projects {
9975 let vendor_id = sourcing
9977 .contracts
9978 .iter()
9979 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9980 .map(|c| c.vendor_id.clone())
9981 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9982 .or_else(|| {
9983 self.master_data
9984 .vendors
9985 .first()
9986 .map(|v| v.vendor_id.clone())
9987 })
9988 .unwrap_or_else(|| "V000".to_string());
9989 let mut docs = S2cDocuments::new(
9990 &project.project_id,
9991 &vendor_id,
9992 &project.company_code,
9993 project.estimated_annual_spend,
9994 &ocpm_uuid_factory,
9995 );
9996 if let Some(rfx) = sourcing
9998 .rfx_events
9999 .iter()
10000 .find(|r| r.sourcing_project_id == project.project_id)
10001 {
10002 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10003 if let Some(bid) = sourcing.bids.iter().find(|b| {
10005 b.rfx_id == rfx.rfx_id
10006 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10007 }) {
10008 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10009 }
10010 }
10011 if let Some(contract) = sourcing
10013 .contracts
10014 .iter()
10015 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10016 {
10017 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10018 }
10019 let start_time = base_datetime - chrono::Duration::days(90);
10020 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10021 add_result(&mut event_log, result);
10022
10023 if let Some(pb) = &pb {
10024 pb.inc(1);
10025 }
10026 }
10027
10028 for run in &hr.payroll_runs {
10030 let employee_id = hr
10032 .payroll_line_items
10033 .iter()
10034 .find(|li| li.payroll_id == run.payroll_id)
10035 .map(|li| li.employee_id.as_str())
10036 .unwrap_or("EMP000");
10037 let docs = H2rDocuments::new(
10038 &run.payroll_id,
10039 employee_id,
10040 &run.company_code,
10041 run.total_gross,
10042 &ocpm_uuid_factory,
10043 )
10044 .with_time_entries(
10045 hr.time_entries
10046 .iter()
10047 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10048 .take(5)
10049 .map(|t| t.entry_id.as_str())
10050 .collect(),
10051 );
10052 let start_time = base_datetime - chrono::Duration::days(30);
10053 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10054 add_result(&mut event_log, result);
10055
10056 if let Some(pb) = &pb {
10057 pb.inc(1);
10058 }
10059 }
10060
10061 for order in &manufacturing.production_orders {
10063 let mut docs = MfgDocuments::new(
10064 &order.order_id,
10065 &order.material_id,
10066 &order.company_code,
10067 order.planned_quantity,
10068 &ocpm_uuid_factory,
10069 )
10070 .with_operations(
10071 order
10072 .operations
10073 .iter()
10074 .map(|o| format!("OP-{:04}", o.operation_number))
10075 .collect::<Vec<_>>()
10076 .iter()
10077 .map(std::string::String::as_str)
10078 .collect(),
10079 );
10080 if let Some(insp) = manufacturing
10082 .quality_inspections
10083 .iter()
10084 .find(|i| i.reference_id == order.order_id)
10085 {
10086 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10087 }
10088 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10090 cc.items
10091 .iter()
10092 .any(|item| item.material_id == order.material_id)
10093 }) {
10094 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10095 }
10096 let start_time = base_datetime - chrono::Duration::days(60);
10097 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10098 add_result(&mut event_log, result);
10099
10100 if let Some(pb) = &pb {
10101 pb.inc(1);
10102 }
10103 }
10104
10105 for customer in &banking.customers {
10107 let customer_id_str = customer.customer_id.to_string();
10108 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10109 if let Some(account) = banking
10111 .accounts
10112 .iter()
10113 .find(|a| a.primary_owner_id == customer.customer_id)
10114 {
10115 let account_id_str = account.account_id.to_string();
10116 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10117 let txn_strs: Vec<String> = banking
10119 .transactions
10120 .iter()
10121 .filter(|t| t.account_id == account.account_id)
10122 .take(10)
10123 .map(|t| t.transaction_id.to_string())
10124 .collect();
10125 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10126 let txn_amounts: Vec<rust_decimal::Decimal> = banking
10127 .transactions
10128 .iter()
10129 .filter(|t| t.account_id == account.account_id)
10130 .take(10)
10131 .map(|t| t.amount)
10132 .collect();
10133 if !txn_ids.is_empty() {
10134 docs = docs.with_transactions(txn_ids, txn_amounts);
10135 }
10136 }
10137 let start_time = base_datetime - chrono::Duration::days(180);
10138 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10139 add_result(&mut event_log, result);
10140
10141 if let Some(pb) = &pb {
10142 pb.inc(1);
10143 }
10144 }
10145
10146 for engagement in &audit.engagements {
10148 let engagement_id_str = engagement.engagement_id.to_string();
10149 let docs = AuditDocuments::new(
10150 &engagement_id_str,
10151 &engagement.client_entity_id,
10152 &ocpm_uuid_factory,
10153 )
10154 .with_workpapers(
10155 audit
10156 .workpapers
10157 .iter()
10158 .filter(|w| w.engagement_id == engagement.engagement_id)
10159 .take(10)
10160 .map(|w| w.workpaper_id.to_string())
10161 .collect::<Vec<_>>()
10162 .iter()
10163 .map(std::string::String::as_str)
10164 .collect(),
10165 )
10166 .with_evidence(
10167 audit
10168 .evidence
10169 .iter()
10170 .filter(|e| e.engagement_id == engagement.engagement_id)
10171 .take(10)
10172 .map(|e| e.evidence_id.to_string())
10173 .collect::<Vec<_>>()
10174 .iter()
10175 .map(std::string::String::as_str)
10176 .collect(),
10177 )
10178 .with_risks(
10179 audit
10180 .risk_assessments
10181 .iter()
10182 .filter(|r| r.engagement_id == engagement.engagement_id)
10183 .take(5)
10184 .map(|r| r.risk_id.to_string())
10185 .collect::<Vec<_>>()
10186 .iter()
10187 .map(std::string::String::as_str)
10188 .collect(),
10189 )
10190 .with_findings(
10191 audit
10192 .findings
10193 .iter()
10194 .filter(|f| f.engagement_id == engagement.engagement_id)
10195 .take(5)
10196 .map(|f| f.finding_id.to_string())
10197 .collect::<Vec<_>>()
10198 .iter()
10199 .map(std::string::String::as_str)
10200 .collect(),
10201 )
10202 .with_judgments(
10203 audit
10204 .judgments
10205 .iter()
10206 .filter(|j| j.engagement_id == engagement.engagement_id)
10207 .take(5)
10208 .map(|j| j.judgment_id.to_string())
10209 .collect::<Vec<_>>()
10210 .iter()
10211 .map(std::string::String::as_str)
10212 .collect(),
10213 );
10214 let start_time = base_datetime - chrono::Duration::days(120);
10215 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10216 add_result(&mut event_log, result);
10217
10218 if let Some(pb) = &pb {
10219 pb.inc(1);
10220 }
10221 }
10222
10223 for recon in &financial_reporting.bank_reconciliations {
10225 let docs = BankReconDocuments::new(
10226 &recon.reconciliation_id,
10227 &recon.bank_account_id,
10228 &recon.company_code,
10229 recon.bank_ending_balance,
10230 &ocpm_uuid_factory,
10231 )
10232 .with_statement_lines(
10233 recon
10234 .statement_lines
10235 .iter()
10236 .take(20)
10237 .map(|l| l.line_id.as_str())
10238 .collect(),
10239 )
10240 .with_reconciling_items(
10241 recon
10242 .reconciling_items
10243 .iter()
10244 .take(10)
10245 .map(|i| i.item_id.as_str())
10246 .collect(),
10247 );
10248 let start_time = base_datetime - chrono::Duration::days(30);
10249 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10250 add_result(&mut event_log, result);
10251
10252 if let Some(pb) = &pb {
10253 pb.inc(1);
10254 }
10255 }
10256
10257 event_log.compute_variants();
10259
10260 let summary = event_log.summary();
10261
10262 if let Some(pb) = pb {
10263 pb.finish_with_message(format!(
10264 "Generated {} OCPM events, {} objects",
10265 summary.event_count, summary.object_count
10266 ));
10267 }
10268
10269 Ok(OcpmSnapshot {
10270 event_count: summary.event_count,
10271 object_count: summary.object_count,
10272 case_count: summary.case_count,
10273 event_log: Some(event_log),
10274 })
10275 }
10276
10277 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10279 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10280
10281 let total_rate = if self.config.anomaly_injection.enabled {
10284 self.config.anomaly_injection.rates.total_rate
10285 } else if self.config.fraud.enabled {
10286 self.config.fraud.fraud_rate
10287 } else {
10288 0.02
10289 };
10290
10291 let fraud_rate = if self.config.anomaly_injection.enabled {
10292 self.config.anomaly_injection.rates.fraud_rate
10293 } else {
10294 AnomalyRateConfig::default().fraud_rate
10295 };
10296
10297 let error_rate = if self.config.anomaly_injection.enabled {
10298 self.config.anomaly_injection.rates.error_rate
10299 } else {
10300 AnomalyRateConfig::default().error_rate
10301 };
10302
10303 let process_issue_rate = if self.config.anomaly_injection.enabled {
10304 self.config.anomaly_injection.rates.process_rate
10305 } else {
10306 AnomalyRateConfig::default().process_issue_rate
10307 };
10308
10309 let anomaly_config = AnomalyInjectorConfig {
10310 rates: AnomalyRateConfig {
10311 total_rate,
10312 fraud_rate,
10313 error_rate,
10314 process_issue_rate,
10315 ..Default::default()
10316 },
10317 seed: self.seed + 5000,
10318 ..Default::default()
10319 };
10320
10321 let mut injector = AnomalyInjector::new(anomaly_config);
10322 let result = injector.process_entries(entries);
10323
10324 if let Some(pb) = &pb {
10325 pb.inc(entries.len() as u64);
10326 pb.finish_with_message("Anomaly injection complete");
10327 }
10328
10329 let mut by_type = HashMap::new();
10330 for label in &result.labels {
10331 *by_type
10332 .entry(format!("{:?}", label.anomaly_type))
10333 .or_insert(0) += 1;
10334 }
10335
10336 Ok(AnomalyLabels {
10337 labels: result.labels,
10338 summary: Some(result.summary),
10339 by_type,
10340 })
10341 }
10342
10343 fn validate_journal_entries(
10352 &mut self,
10353 entries: &[JournalEntry],
10354 ) -> SynthResult<BalanceValidationResult> {
10355 let clean_entries: Vec<&JournalEntry> = entries
10357 .iter()
10358 .filter(|e| {
10359 e.header
10360 .header_text
10361 .as_ref()
10362 .map(|t| !t.contains("[HUMAN_ERROR:"))
10363 .unwrap_or(true)
10364 })
10365 .collect();
10366
10367 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10368
10369 let config = BalanceTrackerConfig {
10371 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
10375 };
10376 let validation_currency = self
10377 .config
10378 .companies
10379 .first()
10380 .map(|c| c.currency.clone())
10381 .unwrap_or_else(|| "USD".to_string());
10382
10383 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10384
10385 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10387 let errors = tracker.apply_entries(&clean_refs);
10388
10389 if let Some(pb) = &pb {
10390 pb.inc(entries.len() as u64);
10391 }
10392
10393 let has_unbalanced = tracker
10396 .get_validation_errors()
10397 .iter()
10398 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10399
10400 let mut all_errors = errors;
10403 all_errors.extend(tracker.get_validation_errors().iter().cloned());
10404 let company_codes: Vec<String> = self
10405 .config
10406 .companies
10407 .iter()
10408 .map(|c| c.code.clone())
10409 .collect();
10410
10411 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10412 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10413 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10414
10415 for company_code in &company_codes {
10416 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10417 all_errors.push(e);
10418 }
10419 }
10420
10421 let stats = tracker.get_statistics();
10423
10424 let is_balanced = all_errors.is_empty();
10426
10427 if let Some(pb) = pb {
10428 let msg = if is_balanced {
10429 "Balance validation passed"
10430 } else {
10431 "Balance validation completed with errors"
10432 };
10433 pb.finish_with_message(msg);
10434 }
10435
10436 Ok(BalanceValidationResult {
10437 validated: true,
10438 is_balanced,
10439 entries_processed: stats.entries_processed,
10440 total_debits: stats.total_debits,
10441 total_credits: stats.total_credits,
10442 accounts_tracked: stats.accounts_tracked,
10443 companies_tracked: stats.companies_tracked,
10444 validation_errors: all_errors,
10445 has_unbalanced_entries: has_unbalanced,
10446 })
10447 }
10448
10449 fn inject_data_quality(
10454 &mut self,
10455 entries: &mut [JournalEntry],
10456 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10457 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10458
10459 let config = if self.config.data_quality.enabled {
10462 let dq = &self.config.data_quality;
10463 DataQualityConfig {
10464 enable_missing_values: dq.missing_values.enabled,
10465 missing_values: datasynth_generators::MissingValueConfig {
10466 global_rate: dq.effective_missing_rate(),
10467 ..Default::default()
10468 },
10469 enable_format_variations: dq.format_variations.enabled,
10470 format_variations: datasynth_generators::FormatVariationConfig {
10471 date_variation_rate: dq.format_variations.dates.rate,
10472 amount_variation_rate: dq.format_variations.amounts.rate,
10473 identifier_variation_rate: dq.format_variations.identifiers.rate,
10474 ..Default::default()
10475 },
10476 enable_duplicates: dq.duplicates.enabled,
10477 duplicates: datasynth_generators::DuplicateConfig {
10478 duplicate_rate: dq.effective_duplicate_rate(),
10479 ..Default::default()
10480 },
10481 enable_typos: dq.typos.enabled,
10482 typos: datasynth_generators::TypoConfig {
10483 char_error_rate: dq.effective_typo_rate(),
10484 ..Default::default()
10485 },
10486 enable_encoding_issues: dq.encoding_issues.enabled,
10487 encoding_issue_rate: dq.encoding_issues.rate,
10488 seed: self.seed.wrapping_add(77), track_statistics: true,
10490 }
10491 } else {
10492 DataQualityConfig::minimal()
10493 };
10494 let mut injector = DataQualityInjector::new(config);
10495
10496 injector.set_country_pack(self.primary_pack().clone());
10498
10499 let context = HashMap::new();
10501
10502 for entry in entries.iter_mut() {
10503 if let Some(text) = &entry.header.header_text {
10505 let processed = injector.process_text_field(
10506 "header_text",
10507 text,
10508 &entry.header.document_id.to_string(),
10509 &context,
10510 );
10511 match processed {
10512 Some(new_text) if new_text != *text => {
10513 entry.header.header_text = Some(new_text);
10514 }
10515 None => {
10516 entry.header.header_text = None; }
10518 _ => {}
10519 }
10520 }
10521
10522 if let Some(ref_text) = &entry.header.reference {
10524 let processed = injector.process_text_field(
10525 "reference",
10526 ref_text,
10527 &entry.header.document_id.to_string(),
10528 &context,
10529 );
10530 match processed {
10531 Some(new_text) if new_text != *ref_text => {
10532 entry.header.reference = Some(new_text);
10533 }
10534 None => {
10535 entry.header.reference = None;
10536 }
10537 _ => {}
10538 }
10539 }
10540
10541 let user_persona = entry.header.user_persona.clone();
10543 if let Some(processed) = injector.process_text_field(
10544 "user_persona",
10545 &user_persona,
10546 &entry.header.document_id.to_string(),
10547 &context,
10548 ) {
10549 if processed != user_persona {
10550 entry.header.user_persona = processed;
10551 }
10552 }
10553
10554 for line in &mut entry.lines {
10556 if let Some(ref text) = line.line_text {
10558 let processed = injector.process_text_field(
10559 "line_text",
10560 text,
10561 &entry.header.document_id.to_string(),
10562 &context,
10563 );
10564 match processed {
10565 Some(new_text) if new_text != *text => {
10566 line.line_text = Some(new_text);
10567 }
10568 None => {
10569 line.line_text = None;
10570 }
10571 _ => {}
10572 }
10573 }
10574
10575 if let Some(cc) = &line.cost_center {
10577 let processed = injector.process_text_field(
10578 "cost_center",
10579 cc,
10580 &entry.header.document_id.to_string(),
10581 &context,
10582 );
10583 match processed {
10584 Some(new_cc) if new_cc != *cc => {
10585 line.cost_center = Some(new_cc);
10586 }
10587 None => {
10588 line.cost_center = None;
10589 }
10590 _ => {}
10591 }
10592 }
10593 }
10594
10595 if let Some(pb) = &pb {
10596 pb.inc(1);
10597 }
10598 }
10599
10600 if let Some(pb) = pb {
10601 pb.finish_with_message("Data quality injection complete");
10602 }
10603
10604 let quality_issues = injector.issues().to_vec();
10605 Ok((injector.stats().clone(), quality_issues))
10606 }
10607
10608 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10619 let use_fsm = self
10621 .config
10622 .audit
10623 .fsm
10624 .as_ref()
10625 .map(|f| f.enabled)
10626 .unwrap_or(false);
10627
10628 if use_fsm {
10629 return self.generate_audit_data_with_fsm(entries);
10630 }
10631
10632 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10634 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10635 let fiscal_year = start_date.year() as u16;
10636 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10637
10638 let total_revenue: rust_decimal::Decimal = entries
10640 .iter()
10641 .flat_map(|e| e.lines.iter())
10642 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10643 .map(|l| l.credit_amount)
10644 .sum();
10645
10646 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10648
10649 let mut snapshot = AuditSnapshot::default();
10650
10651 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10653 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10654 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10655 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10656 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10657 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10658 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10659 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10660 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10661 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10662 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10663 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10664
10665 let accounts: Vec<String> = self
10667 .coa
10668 .as_ref()
10669 .map(|coa| {
10670 coa.get_postable_accounts()
10671 .iter()
10672 .map(|acc| acc.account_code().to_string())
10673 .collect()
10674 })
10675 .unwrap_or_default();
10676
10677 for (i, company) in self.config.companies.iter().enumerate() {
10679 let company_revenue = total_revenue
10681 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10682
10683 let engagements_for_company =
10685 self.phase_config.audit_engagements / self.config.companies.len().max(1);
10686 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10687 1
10688 } else {
10689 0
10690 };
10691
10692 for _eng_idx in 0..(engagements_for_company + extra) {
10693 let mut engagement = engagement_gen.generate_engagement(
10695 &company.code,
10696 &company.name,
10697 fiscal_year,
10698 period_end,
10699 company_revenue,
10700 None, );
10702
10703 if !self.master_data.employees.is_empty() {
10705 let emp_count = self.master_data.employees.len();
10706 let base = (i * 10 + _eng_idx) % emp_count;
10708 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10709 .employee_id
10710 .clone();
10711 engagement.engagement_manager_id = self.master_data.employees
10712 [(base + 1) % emp_count]
10713 .employee_id
10714 .clone();
10715 let real_team: Vec<String> = engagement
10716 .team_member_ids
10717 .iter()
10718 .enumerate()
10719 .map(|(j, _)| {
10720 self.master_data.employees[(base + 2 + j) % emp_count]
10721 .employee_id
10722 .clone()
10723 })
10724 .collect();
10725 engagement.team_member_ids = real_team;
10726 }
10727
10728 if let Some(pb) = &pb {
10729 pb.inc(1);
10730 }
10731
10732 let team_members: Vec<String> = engagement.team_member_ids.clone();
10734
10735 let workpapers =
10737 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10738
10739 for wp in &workpapers {
10740 if let Some(pb) = &pb {
10741 pb.inc(1);
10742 }
10743
10744 let evidence = evidence_gen.generate_evidence_for_workpaper(
10746 wp,
10747 &team_members,
10748 wp.preparer_date,
10749 );
10750
10751 for _ in &evidence {
10752 if let Some(pb) = &pb {
10753 pb.inc(1);
10754 }
10755 }
10756
10757 snapshot.evidence.extend(evidence);
10758 }
10759
10760 let risks =
10762 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10763
10764 for _ in &risks {
10765 if let Some(pb) = &pb {
10766 pb.inc(1);
10767 }
10768 }
10769 snapshot.risk_assessments.extend(risks);
10770
10771 let findings = finding_gen.generate_findings_for_engagement(
10773 &engagement,
10774 &workpapers,
10775 &team_members,
10776 );
10777
10778 for _ in &findings {
10779 if let Some(pb) = &pb {
10780 pb.inc(1);
10781 }
10782 }
10783 snapshot.findings.extend(findings);
10784
10785 let judgments =
10787 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10788
10789 for _ in &judgments {
10790 if let Some(pb) = &pb {
10791 pb.inc(1);
10792 }
10793 }
10794 snapshot.judgments.extend(judgments);
10795
10796 let (confs, resps) =
10798 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10799 snapshot.confirmations.extend(confs);
10800 snapshot.confirmation_responses.extend(resps);
10801
10802 let team_pairs: Vec<(String, String)> = team_members
10804 .iter()
10805 .map(|id| {
10806 let name = self
10807 .master_data
10808 .employees
10809 .iter()
10810 .find(|e| e.employee_id == *id)
10811 .map(|e| e.display_name.clone())
10812 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10813 (id.clone(), name)
10814 })
10815 .collect();
10816 for wp in &workpapers {
10817 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10818 snapshot.procedure_steps.extend(steps);
10819 }
10820
10821 for wp in &workpapers {
10823 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10824 snapshot.samples.push(sample);
10825 }
10826 }
10827
10828 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10830 snapshot.analytical_results.extend(analytical);
10831
10832 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10834 snapshot.ia_functions.push(ia_func);
10835 snapshot.ia_reports.extend(ia_reports);
10836
10837 let vendor_names: Vec<String> = self
10839 .master_data
10840 .vendors
10841 .iter()
10842 .map(|v| v.name.clone())
10843 .collect();
10844 let customer_names: Vec<String> = self
10845 .master_data
10846 .customers
10847 .iter()
10848 .map(|c| c.name.clone())
10849 .collect();
10850 let (parties, rp_txns) =
10851 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10852 snapshot.related_parties.extend(parties);
10853 snapshot.related_party_transactions.extend(rp_txns);
10854
10855 snapshot.workpapers.extend(workpapers);
10857
10858 {
10860 let scope_id = format!(
10861 "SCOPE-{}-{}",
10862 engagement.engagement_id.simple(),
10863 &engagement.client_entity_id
10864 );
10865 let scope = datasynth_core::models::audit::AuditScope::new(
10866 scope_id.clone(),
10867 engagement.engagement_id.to_string(),
10868 engagement.client_entity_id.clone(),
10869 engagement.materiality,
10870 );
10871 let mut eng = engagement;
10873 eng.scope_id = Some(scope_id);
10874 snapshot.audit_scopes.push(scope);
10875 snapshot.engagements.push(eng);
10876 }
10877 }
10878 }
10879
10880 if self.config.companies.len() > 1 {
10884 let group_materiality = snapshot
10887 .engagements
10888 .first()
10889 .map(|e| e.materiality)
10890 .unwrap_or_else(|| {
10891 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10892 total_revenue * pct
10893 });
10894
10895 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10896 let group_engagement_id = snapshot
10897 .engagements
10898 .first()
10899 .map(|e| e.engagement_id.to_string())
10900 .unwrap_or_else(|| "GROUP-ENG".to_string());
10901
10902 let component_snapshot = component_gen.generate(
10903 &self.config.companies,
10904 group_materiality,
10905 &group_engagement_id,
10906 period_end,
10907 );
10908
10909 snapshot.component_auditors = component_snapshot.component_auditors;
10910 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10911 snapshot.component_instructions = component_snapshot.component_instructions;
10912 snapshot.component_reports = component_snapshot.component_reports;
10913
10914 info!(
10915 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10916 snapshot.component_auditors.len(),
10917 snapshot.component_instructions.len(),
10918 snapshot.component_reports.len(),
10919 );
10920 }
10921
10922 {
10926 let applicable_framework = self
10927 .config
10928 .accounting_standards
10929 .framework
10930 .as_ref()
10931 .map(|f| format!("{f:?}"))
10932 .unwrap_or_else(|| "IFRS".to_string());
10933
10934 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10935 let entity_count = self.config.companies.len();
10936
10937 for engagement in &snapshot.engagements {
10938 let company = self
10939 .config
10940 .companies
10941 .iter()
10942 .find(|c| c.code == engagement.client_entity_id);
10943 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10944 let letter_date = engagement.planning_start;
10945 let letter = letter_gen.generate(
10946 &engagement.engagement_id.to_string(),
10947 &engagement.client_name,
10948 entity_count,
10949 engagement.period_end_date,
10950 currency,
10951 &applicable_framework,
10952 letter_date,
10953 );
10954 snapshot.engagement_letters.push(letter);
10955 }
10956
10957 info!(
10958 "ISA 210 engagement letters: {} generated",
10959 snapshot.engagement_letters.len()
10960 );
10961 }
10962
10963 {
10967 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10968 let entity_codes: Vec<String> = self
10969 .config
10970 .companies
10971 .iter()
10972 .map(|c| c.code.clone())
10973 .collect();
10974 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10975 info!(
10976 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10977 subsequent.len(),
10978 subsequent
10979 .iter()
10980 .filter(|e| matches!(
10981 e.classification,
10982 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10983 ))
10984 .count(),
10985 subsequent
10986 .iter()
10987 .filter(|e| matches!(
10988 e.classification,
10989 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10990 ))
10991 .count(),
10992 );
10993 snapshot.subsequent_events = subsequent;
10994 }
10995
10996 {
11000 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11001 let entity_codes: Vec<String> = self
11002 .config
11003 .companies
11004 .iter()
11005 .map(|c| c.code.clone())
11006 .collect();
11007 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11008 info!(
11009 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11010 soc_snapshot.service_organizations.len(),
11011 soc_snapshot.soc_reports.len(),
11012 soc_snapshot.user_entity_controls.len(),
11013 );
11014 snapshot.service_organizations = soc_snapshot.service_organizations;
11015 snapshot.soc_reports = soc_snapshot.soc_reports;
11016 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11017 }
11018
11019 {
11023 use datasynth_generators::audit::going_concern_generator::{
11024 GoingConcernGenerator, GoingConcernInput,
11025 };
11026 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11027 let entity_codes: Vec<String> = self
11028 .config
11029 .companies
11030 .iter()
11031 .map(|c| c.code.clone())
11032 .collect();
11033 let assessment_date = period_end + chrono::Duration::days(75);
11035 let period_label = format!("FY{}", period_end.year());
11036
11037 let gc_inputs: Vec<GoingConcernInput> = self
11048 .config
11049 .companies
11050 .iter()
11051 .map(|company| {
11052 let code = &company.code;
11053 let mut revenue = rust_decimal::Decimal::ZERO;
11054 let mut expenses = rust_decimal::Decimal::ZERO;
11055 let mut current_assets = rust_decimal::Decimal::ZERO;
11056 let mut current_liabs = rust_decimal::Decimal::ZERO;
11057 let mut total_debt = rust_decimal::Decimal::ZERO;
11058
11059 for je in entries.iter().filter(|je| &je.header.company_code == code) {
11060 for line in &je.lines {
11061 let acct = line.gl_account.as_str();
11062 let net = line.debit_amount - line.credit_amount;
11063 if acct.starts_with('4') {
11064 revenue -= net;
11066 } else if acct.starts_with('6') {
11067 expenses += net;
11069 }
11070 if acct.starts_with('1') {
11072 if let Ok(n) = acct.parse::<u32>() {
11074 if (1000..=1499).contains(&n) {
11075 current_assets += net;
11076 }
11077 }
11078 } else if acct.starts_with('2') {
11079 if let Ok(n) = acct.parse::<u32>() {
11080 if (2000..=2499).contains(&n) {
11081 current_liabs -= net; } else if (2500..=2999).contains(&n) {
11084 total_debt -= net;
11086 }
11087 }
11088 }
11089 }
11090 }
11091
11092 let net_income = revenue - expenses;
11093 let working_capital = current_assets - current_liabs;
11094 let operating_cash_flow = net_income;
11097
11098 GoingConcernInput {
11099 entity_code: code.clone(),
11100 net_income,
11101 working_capital,
11102 operating_cash_flow,
11103 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11104 assessment_date,
11105 }
11106 })
11107 .collect();
11108
11109 let assessments = if gc_inputs.is_empty() {
11110 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11111 } else {
11112 gc_gen.generate_for_entities_with_inputs(
11113 &entity_codes,
11114 &gc_inputs,
11115 assessment_date,
11116 &period_label,
11117 )
11118 };
11119 info!(
11120 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11121 assessments.len(),
11122 assessments.iter().filter(|a| matches!(
11123 a.auditor_conclusion,
11124 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11125 )).count(),
11126 assessments.iter().filter(|a| matches!(
11127 a.auditor_conclusion,
11128 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11129 )).count(),
11130 assessments.iter().filter(|a| matches!(
11131 a.auditor_conclusion,
11132 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11133 )).count(),
11134 );
11135 snapshot.going_concern_assessments = assessments;
11136 }
11137
11138 {
11142 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11143 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11144 let entity_codes: Vec<String> = self
11145 .config
11146 .companies
11147 .iter()
11148 .map(|c| c.code.clone())
11149 .collect();
11150 let estimates = est_gen.generate_for_entities(&entity_codes);
11151 info!(
11152 "ISA 540 accounting estimates: {} estimates across {} entities \
11153 ({} with retrospective reviews, {} with auditor point estimates)",
11154 estimates.len(),
11155 entity_codes.len(),
11156 estimates
11157 .iter()
11158 .filter(|e| e.retrospective_review.is_some())
11159 .count(),
11160 estimates
11161 .iter()
11162 .filter(|e| e.auditor_point_estimate.is_some())
11163 .count(),
11164 );
11165 snapshot.accounting_estimates = estimates;
11166 }
11167
11168 {
11172 use datasynth_generators::audit::audit_opinion_generator::{
11173 AuditOpinionGenerator, AuditOpinionInput,
11174 };
11175
11176 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11177
11178 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11180 .engagements
11181 .iter()
11182 .map(|eng| {
11183 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11185 .findings
11186 .iter()
11187 .filter(|f| f.engagement_id == eng.engagement_id)
11188 .cloned()
11189 .collect();
11190
11191 let gc = snapshot
11193 .going_concern_assessments
11194 .iter()
11195 .find(|g| g.entity_code == eng.client_entity_id)
11196 .cloned();
11197
11198 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11200 snapshot.component_reports.clone();
11201
11202 let auditor = self
11203 .master_data
11204 .employees
11205 .first()
11206 .map(|e| e.display_name.clone())
11207 .unwrap_or_else(|| "Global Audit LLP".into());
11208
11209 let partner = self
11210 .master_data
11211 .employees
11212 .get(1)
11213 .map(|e| e.display_name.clone())
11214 .unwrap_or_else(|| eng.engagement_partner_id.clone());
11215
11216 AuditOpinionInput {
11217 entity_code: eng.client_entity_id.clone(),
11218 entity_name: eng.client_name.clone(),
11219 engagement_id: eng.engagement_id,
11220 period_end: eng.period_end_date,
11221 findings: eng_findings,
11222 going_concern: gc,
11223 component_reports: comp_reports,
11224 is_us_listed: {
11226 let fw = &self.config.audit_standards.isa_compliance.framework;
11227 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11228 },
11229 auditor_name: auditor,
11230 engagement_partner: partner,
11231 }
11232 })
11233 .collect();
11234
11235 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11236
11237 for go in &generated_opinions {
11238 snapshot
11239 .key_audit_matters
11240 .extend(go.key_audit_matters.clone());
11241 }
11242 snapshot.audit_opinions = generated_opinions
11243 .into_iter()
11244 .map(|go| go.opinion)
11245 .collect();
11246
11247 info!(
11248 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11249 snapshot.audit_opinions.len(),
11250 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11251 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11252 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11253 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11254 );
11255 }
11256
11257 {
11261 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11262
11263 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11264
11265 for (i, company) in self.config.companies.iter().enumerate() {
11266 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11268 .engagements
11269 .iter()
11270 .filter(|e| e.client_entity_id == company.code)
11271 .map(|e| e.engagement_id)
11272 .collect();
11273
11274 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11275 .findings
11276 .iter()
11277 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11278 .cloned()
11279 .collect();
11280
11281 let emp_count = self.master_data.employees.len();
11283 let ceo_name = if emp_count > 0 {
11284 self.master_data.employees[i % emp_count]
11285 .display_name
11286 .clone()
11287 } else {
11288 format!("CEO of {}", company.name)
11289 };
11290 let cfo_name = if emp_count > 1 {
11291 self.master_data.employees[(i + 1) % emp_count]
11292 .display_name
11293 .clone()
11294 } else {
11295 format!("CFO of {}", company.name)
11296 };
11297
11298 let materiality = snapshot
11300 .engagements
11301 .iter()
11302 .find(|e| e.client_entity_id == company.code)
11303 .map(|e| e.materiality)
11304 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11305
11306 let input = SoxGeneratorInput {
11307 company_code: company.code.clone(),
11308 company_name: company.name.clone(),
11309 fiscal_year,
11310 period_end,
11311 findings: company_findings,
11312 ceo_name,
11313 cfo_name,
11314 materiality_threshold: materiality,
11315 revenue_percent: rust_decimal::Decimal::from(100),
11316 assets_percent: rust_decimal::Decimal::from(100),
11317 significant_accounts: vec![
11318 "Revenue".into(),
11319 "Accounts Receivable".into(),
11320 "Inventory".into(),
11321 "Fixed Assets".into(),
11322 "Accounts Payable".into(),
11323 ],
11324 };
11325
11326 let (certs, assessment) = sox_gen.generate(&input);
11327 snapshot.sox_302_certifications.extend(certs);
11328 snapshot.sox_404_assessments.push(assessment);
11329 }
11330
11331 info!(
11332 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11333 snapshot.sox_302_certifications.len(),
11334 snapshot.sox_404_assessments.len(),
11335 snapshot
11336 .sox_404_assessments
11337 .iter()
11338 .filter(|a| a.icfr_effective)
11339 .count(),
11340 snapshot
11341 .sox_404_assessments
11342 .iter()
11343 .filter(|a| !a.icfr_effective)
11344 .count(),
11345 );
11346 }
11347
11348 {
11352 use datasynth_generators::audit::materiality_generator::{
11353 MaterialityGenerator, MaterialityInput,
11354 };
11355
11356 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11357
11358 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11362
11363 for company in &self.config.companies {
11364 let company_code = company.code.clone();
11365
11366 let company_revenue: rust_decimal::Decimal = entries
11368 .iter()
11369 .filter(|e| e.company_code() == company_code)
11370 .flat_map(|e| e.lines.iter())
11371 .filter(|l| l.account_code.starts_with('4'))
11372 .map(|l| l.credit_amount)
11373 .sum();
11374
11375 let total_assets: rust_decimal::Decimal = entries
11377 .iter()
11378 .filter(|e| e.company_code() == company_code)
11379 .flat_map(|e| e.lines.iter())
11380 .filter(|l| l.account_code.starts_with('1'))
11381 .map(|l| l.debit_amount)
11382 .sum();
11383
11384 let total_expenses: rust_decimal::Decimal = entries
11386 .iter()
11387 .filter(|e| e.company_code() == company_code)
11388 .flat_map(|e| e.lines.iter())
11389 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11390 .map(|l| l.debit_amount)
11391 .sum();
11392
11393 let equity: rust_decimal::Decimal = entries
11395 .iter()
11396 .filter(|e| e.company_code() == company_code)
11397 .flat_map(|e| e.lines.iter())
11398 .filter(|l| l.account_code.starts_with('3'))
11399 .map(|l| l.credit_amount)
11400 .sum();
11401
11402 let pretax_income = company_revenue - total_expenses;
11403
11404 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11406 let w = rust_decimal::Decimal::try_from(company.volume_weight)
11407 .unwrap_or(rust_decimal::Decimal::ONE);
11408 (
11409 total_revenue * w,
11410 total_revenue * w * rust_decimal::Decimal::from(3),
11411 total_revenue * w * rust_decimal::Decimal::new(1, 1),
11412 total_revenue * w * rust_decimal::Decimal::from(2),
11413 )
11414 } else {
11415 (company_revenue, total_assets, pretax_income, equity)
11416 };
11417
11418 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
11421 entity_code: company_code,
11422 period: format!("FY{}", fiscal_year),
11423 revenue: rev,
11424 pretax_income: pti,
11425 total_assets: assets,
11426 equity: eq,
11427 gross_profit,
11428 });
11429 }
11430
11431 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11432
11433 info!(
11434 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11435 {} total assets, {} equity benchmarks)",
11436 snapshot.materiality_calculations.len(),
11437 snapshot
11438 .materiality_calculations
11439 .iter()
11440 .filter(|m| matches!(
11441 m.benchmark,
11442 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11443 ))
11444 .count(),
11445 snapshot
11446 .materiality_calculations
11447 .iter()
11448 .filter(|m| matches!(
11449 m.benchmark,
11450 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11451 ))
11452 .count(),
11453 snapshot
11454 .materiality_calculations
11455 .iter()
11456 .filter(|m| matches!(
11457 m.benchmark,
11458 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11459 ))
11460 .count(),
11461 snapshot
11462 .materiality_calculations
11463 .iter()
11464 .filter(|m| matches!(
11465 m.benchmark,
11466 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11467 ))
11468 .count(),
11469 );
11470 }
11471
11472 {
11476 use datasynth_generators::audit::cra_generator::CraGenerator;
11477
11478 let mut cra_gen = CraGenerator::new(self.seed + 8315);
11479
11480 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11482 .audit_scopes
11483 .iter()
11484 .map(|s| (s.entity_code.clone(), s.id.clone()))
11485 .collect();
11486
11487 for company in &self.config.companies {
11488 let cras = cra_gen.generate_for_entity(&company.code, None);
11489 let scope_id = entity_scope_map.get(&company.code).cloned();
11490 let cras_with_scope: Vec<_> = cras
11491 .into_iter()
11492 .map(|mut cra| {
11493 cra.scope_id = scope_id.clone();
11494 cra
11495 })
11496 .collect();
11497 snapshot.combined_risk_assessments.extend(cras_with_scope);
11498 }
11499
11500 let significant_count = snapshot
11501 .combined_risk_assessments
11502 .iter()
11503 .filter(|c| c.significant_risk)
11504 .count();
11505 let high_cra_count = snapshot
11506 .combined_risk_assessments
11507 .iter()
11508 .filter(|c| {
11509 matches!(
11510 c.combined_risk,
11511 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11512 )
11513 })
11514 .count();
11515
11516 info!(
11517 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11518 snapshot.combined_risk_assessments.len(),
11519 significant_count,
11520 high_cra_count,
11521 );
11522 }
11523
11524 {
11528 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11529
11530 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11531
11532 for company in &self.config.companies {
11534 let entity_code = company.code.clone();
11535
11536 let tolerable_error = snapshot
11538 .materiality_calculations
11539 .iter()
11540 .find(|m| m.entity_code == entity_code)
11541 .map(|m| m.tolerable_error);
11542
11543 let entity_cras: Vec<_> = snapshot
11545 .combined_risk_assessments
11546 .iter()
11547 .filter(|c| c.entity_code == entity_code)
11548 .cloned()
11549 .collect();
11550
11551 if !entity_cras.is_empty() {
11552 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11553 snapshot.sampling_plans.extend(plans);
11554 snapshot.sampled_items.extend(items);
11555 }
11556 }
11557
11558 let misstatement_count = snapshot
11559 .sampled_items
11560 .iter()
11561 .filter(|i| i.misstatement_found)
11562 .count();
11563
11564 info!(
11565 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11566 snapshot.sampling_plans.len(),
11567 snapshot.sampled_items.len(),
11568 misstatement_count,
11569 );
11570 }
11571
11572 {
11576 use datasynth_generators::audit::scots_generator::{
11577 ScotsGenerator, ScotsGeneratorConfig,
11578 };
11579
11580 let ic_enabled = self.config.intercompany.enabled;
11581
11582 let config = ScotsGeneratorConfig {
11583 intercompany_enabled: ic_enabled,
11584 ..ScotsGeneratorConfig::default()
11585 };
11586 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11587
11588 for company in &self.config.companies {
11589 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11590 snapshot
11591 .significant_transaction_classes
11592 .extend(entity_scots);
11593 }
11594
11595 let estimation_count = snapshot
11596 .significant_transaction_classes
11597 .iter()
11598 .filter(|s| {
11599 matches!(
11600 s.transaction_type,
11601 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11602 )
11603 })
11604 .count();
11605
11606 info!(
11607 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11608 snapshot.significant_transaction_classes.len(),
11609 estimation_count,
11610 );
11611 }
11612
11613 {
11617 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11618
11619 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11620 let entity_codes: Vec<String> = self
11621 .config
11622 .companies
11623 .iter()
11624 .map(|c| c.code.clone())
11625 .collect();
11626 let unusual_flags =
11627 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11628 info!(
11629 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11630 unusual_flags.len(),
11631 unusual_flags
11632 .iter()
11633 .filter(|f| matches!(
11634 f.severity,
11635 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11636 ))
11637 .count(),
11638 unusual_flags
11639 .iter()
11640 .filter(|f| matches!(
11641 f.severity,
11642 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11643 ))
11644 .count(),
11645 unusual_flags
11646 .iter()
11647 .filter(|f| matches!(
11648 f.severity,
11649 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11650 ))
11651 .count(),
11652 );
11653 snapshot.unusual_items = unusual_flags;
11654 }
11655
11656 {
11660 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11661
11662 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11663 let entity_codes: Vec<String> = self
11664 .config
11665 .companies
11666 .iter()
11667 .map(|c| c.code.clone())
11668 .collect();
11669 let current_period_label = format!("FY{fiscal_year}");
11670 let prior_period_label = format!("FY{}", fiscal_year - 1);
11671 let analytical_rels = ar_gen.generate_for_entities(
11672 &entity_codes,
11673 entries,
11674 ¤t_period_label,
11675 &prior_period_label,
11676 );
11677 let out_of_range = analytical_rels
11678 .iter()
11679 .filter(|r| !r.within_expected_range)
11680 .count();
11681 info!(
11682 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11683 analytical_rels.len(),
11684 out_of_range,
11685 );
11686 snapshot.analytical_relationships = analytical_rels;
11687 }
11688
11689 if let Some(pb) = pb {
11690 pb.finish_with_message(format!(
11691 "Audit data: {} engagements, {} workpapers, {} evidence, \
11692 {} confirmations, {} procedure steps, {} samples, \
11693 {} analytical, {} IA funcs, {} related parties, \
11694 {} component auditors, {} letters, {} subsequent events, \
11695 {} service orgs, {} going concern, {} accounting estimates, \
11696 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11697 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11698 {} unusual items, {} analytical relationships",
11699 snapshot.engagements.len(),
11700 snapshot.workpapers.len(),
11701 snapshot.evidence.len(),
11702 snapshot.confirmations.len(),
11703 snapshot.procedure_steps.len(),
11704 snapshot.samples.len(),
11705 snapshot.analytical_results.len(),
11706 snapshot.ia_functions.len(),
11707 snapshot.related_parties.len(),
11708 snapshot.component_auditors.len(),
11709 snapshot.engagement_letters.len(),
11710 snapshot.subsequent_events.len(),
11711 snapshot.service_organizations.len(),
11712 snapshot.going_concern_assessments.len(),
11713 snapshot.accounting_estimates.len(),
11714 snapshot.audit_opinions.len(),
11715 snapshot.key_audit_matters.len(),
11716 snapshot.sox_302_certifications.len(),
11717 snapshot.sox_404_assessments.len(),
11718 snapshot.materiality_calculations.len(),
11719 snapshot.combined_risk_assessments.len(),
11720 snapshot.sampling_plans.len(),
11721 snapshot.significant_transaction_classes.len(),
11722 snapshot.unusual_items.len(),
11723 snapshot.analytical_relationships.len(),
11724 ));
11725 }
11726
11727 {
11734 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11735 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11736 debug!(
11737 "PCAOB-ISA mappings generated: {} mappings",
11738 snapshot.isa_pcaob_mappings.len()
11739 );
11740 }
11741
11742 {
11749 use datasynth_standards::audit::isa_reference::IsaStandard;
11750 snapshot.isa_mappings = IsaStandard::standard_entries();
11751 debug!(
11752 "ISA standard entries generated: {} standards",
11753 snapshot.isa_mappings.len()
11754 );
11755 }
11756
11757 {
11760 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11761 .engagements
11762 .iter()
11763 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11764 .collect();
11765
11766 for rpt in &mut snapshot.related_party_transactions {
11767 if rpt.journal_entry_id.is_some() {
11768 continue; }
11770 let entity = engagement_by_id
11771 .get(&rpt.engagement_id.to_string())
11772 .copied()
11773 .unwrap_or("");
11774
11775 let best_je = entries
11777 .iter()
11778 .filter(|je| je.header.company_code == entity)
11779 .min_by_key(|je| {
11780 (je.header.posting_date - rpt.transaction_date)
11781 .num_days()
11782 .abs()
11783 });
11784
11785 if let Some(je) = best_je {
11786 rpt.journal_entry_id = Some(je.header.document_id.to_string());
11787 }
11788 }
11789
11790 let linked = snapshot
11791 .related_party_transactions
11792 .iter()
11793 .filter(|t| t.journal_entry_id.is_some())
11794 .count();
11795 debug!(
11796 "Linked {}/{} related party transactions to journal entries",
11797 linked,
11798 snapshot.related_party_transactions.len()
11799 );
11800 }
11801
11802 Ok(snapshot)
11803 }
11804
11805 fn generate_audit_data_with_fsm(
11812 &mut self,
11813 entries: &[JournalEntry],
11814 ) -> SynthResult<AuditSnapshot> {
11815 use datasynth_audit_fsm::{
11816 context::EngagementContext,
11817 engine::AuditFsmEngine,
11818 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11819 };
11820 use rand::SeedableRng;
11821 use rand_chacha::ChaCha8Rng;
11822
11823 info!("Audit FSM: generating audit data via FSM engine");
11824
11825 let fsm_config = self
11826 .config
11827 .audit
11828 .fsm
11829 .as_ref()
11830 .expect("FSM config must be present when FSM is enabled");
11831
11832 let bwp = match fsm_config.blueprint.as_str() {
11834 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11835 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11836 _ => {
11837 warn!(
11838 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11839 fsm_config.blueprint
11840 );
11841 BlueprintWithPreconditions::load_builtin_fsa()
11842 }
11843 }
11844 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11845
11846 let overlay = match fsm_config.overlay.as_str() {
11848 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11849 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11850 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11851 _ => {
11852 warn!(
11853 "Unknown FSM overlay '{}', falling back to builtin:default",
11854 fsm_config.overlay
11855 );
11856 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11857 }
11858 }
11859 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11860
11861 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11863 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11864 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11865
11866 let company = self.config.companies.first();
11868 let company_code = company
11869 .map(|c| c.code.clone())
11870 .unwrap_or_else(|| "UNKNOWN".to_string());
11871 let company_name = company
11872 .map(|c| c.name.clone())
11873 .unwrap_or_else(|| "Unknown Company".to_string());
11874 let currency = company
11875 .map(|c| c.currency.clone())
11876 .unwrap_or_else(|| "USD".to_string());
11877
11878 let entity_entries: Vec<_> = entries
11880 .iter()
11881 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11882 .cloned()
11883 .collect();
11884 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
11888 .iter()
11889 .flat_map(|e| e.lines.iter())
11890 .filter(|l| l.account_code.starts_with('4'))
11891 .map(|l| l.credit_amount - l.debit_amount)
11892 .sum();
11893
11894 let total_assets: rust_decimal::Decimal = entries
11895 .iter()
11896 .flat_map(|e| e.lines.iter())
11897 .filter(|l| l.account_code.starts_with('1'))
11898 .map(|l| l.debit_amount - l.credit_amount)
11899 .sum();
11900
11901 let total_expenses: rust_decimal::Decimal = entries
11902 .iter()
11903 .flat_map(|e| e.lines.iter())
11904 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11905 .map(|l| l.debit_amount)
11906 .sum();
11907
11908 let equity: rust_decimal::Decimal = entries
11909 .iter()
11910 .flat_map(|e| e.lines.iter())
11911 .filter(|l| l.account_code.starts_with('3'))
11912 .map(|l| l.credit_amount - l.debit_amount)
11913 .sum();
11914
11915 let total_debt: rust_decimal::Decimal = entries
11916 .iter()
11917 .flat_map(|e| e.lines.iter())
11918 .filter(|l| l.account_code.starts_with('2'))
11919 .map(|l| l.credit_amount - l.debit_amount)
11920 .sum();
11921
11922 let pretax_income = total_revenue - total_expenses;
11923
11924 let cogs: rust_decimal::Decimal = entries
11925 .iter()
11926 .flat_map(|e| e.lines.iter())
11927 .filter(|l| l.account_code.starts_with('5'))
11928 .map(|l| l.debit_amount)
11929 .sum();
11930 let gross_profit = total_revenue - cogs;
11931
11932 let current_assets: rust_decimal::Decimal = entries
11933 .iter()
11934 .flat_map(|e| e.lines.iter())
11935 .filter(|l| {
11936 l.account_code.starts_with("10")
11937 || l.account_code.starts_with("11")
11938 || l.account_code.starts_with("12")
11939 || l.account_code.starts_with("13")
11940 })
11941 .map(|l| l.debit_amount - l.credit_amount)
11942 .sum();
11943 let current_liabilities: rust_decimal::Decimal = entries
11944 .iter()
11945 .flat_map(|e| e.lines.iter())
11946 .filter(|l| {
11947 l.account_code.starts_with("20")
11948 || l.account_code.starts_with("21")
11949 || l.account_code.starts_with("22")
11950 })
11951 .map(|l| l.credit_amount - l.debit_amount)
11952 .sum();
11953 let working_capital = current_assets - current_liabilities;
11954
11955 let depreciation: rust_decimal::Decimal = entries
11956 .iter()
11957 .flat_map(|e| e.lines.iter())
11958 .filter(|l| l.account_code.starts_with("60"))
11959 .map(|l| l.debit_amount)
11960 .sum();
11961 let operating_cash_flow = pretax_income + depreciation;
11962
11963 let accounts: Vec<String> = self
11965 .coa
11966 .as_ref()
11967 .map(|coa| {
11968 coa.get_postable_accounts()
11969 .iter()
11970 .map(|acc| acc.account_code().to_string())
11971 .collect()
11972 })
11973 .unwrap_or_default();
11974
11975 let team_member_ids: Vec<String> = self
11977 .master_data
11978 .employees
11979 .iter()
11980 .take(8) .map(|e| e.employee_id.clone())
11982 .collect();
11983 let team_member_pairs: Vec<(String, String)> = self
11984 .master_data
11985 .employees
11986 .iter()
11987 .take(8)
11988 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11989 .collect();
11990
11991 let vendor_names: Vec<String> = self
11992 .master_data
11993 .vendors
11994 .iter()
11995 .map(|v| v.name.clone())
11996 .collect();
11997 let customer_names: Vec<String> = self
11998 .master_data
11999 .customers
12000 .iter()
12001 .map(|c| c.name.clone())
12002 .collect();
12003
12004 let entity_codes: Vec<String> = self
12005 .config
12006 .companies
12007 .iter()
12008 .map(|c| c.code.clone())
12009 .collect();
12010
12011 let journal_entry_ids: Vec<String> = entries
12013 .iter()
12014 .take(50)
12015 .map(|e| e.header.document_id.to_string())
12016 .collect();
12017
12018 let mut account_balances = std::collections::HashMap::<String, f64>::new();
12020 for entry in entries {
12021 for line in &entry.lines {
12022 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12023 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12024 *account_balances
12025 .entry(line.account_code.clone())
12026 .or_insert(0.0) += debit_f64 - credit_f64;
12027 }
12028 }
12029
12030 let control_ids: Vec<String> = Vec::new();
12035 let anomaly_refs: Vec<String> = Vec::new();
12036
12037 let mut context = EngagementContext {
12038 company_code,
12039 company_name,
12040 fiscal_year: start_date.year(),
12041 currency,
12042 total_revenue,
12043 total_assets,
12044 engagement_start: start_date,
12045 report_date: period_end,
12046 pretax_income,
12047 equity,
12048 gross_profit,
12049 working_capital,
12050 operating_cash_flow,
12051 total_debt,
12052 team_member_ids,
12053 team_member_pairs,
12054 accounts,
12055 vendor_names,
12056 customer_names,
12057 journal_entry_ids,
12058 account_balances,
12059 control_ids,
12060 anomaly_refs,
12061 journal_entries: entries.to_vec(),
12062 is_us_listed: false,
12063 entity_codes,
12064 auditor_firm_name: "DataSynth Audit LLP".into(),
12065 accounting_framework: self
12066 .config
12067 .accounting_standards
12068 .framework
12069 .map(|f| match f {
12070 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12071 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12072 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12073 "French GAAP"
12074 }
12075 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12076 "German GAAP"
12077 }
12078 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12079 "Dual Reporting"
12080 }
12081 })
12082 .unwrap_or("IFRS")
12083 .into(),
12084 };
12085
12086 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12088 let rng = ChaCha8Rng::seed_from_u64(seed);
12089 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12090
12091 let mut result = engine
12092 .run_engagement(&context)
12093 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12094
12095 info!(
12096 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12097 {} phases completed, duration {:.1}h",
12098 result.event_log.len(),
12099 result.artifacts.total_artifacts(),
12100 result.anomalies.len(),
12101 result.phases_completed.len(),
12102 result.total_duration_hours,
12103 );
12104
12105 let tb_entity = context.company_code.clone();
12107 let tb_fy = context.fiscal_year;
12108 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12109 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12110 entries,
12111 &tb_entity,
12112 tb_fy,
12113 self.coa.as_ref().map(|c| c.as_ref()),
12114 );
12115
12116 let bag = result.artifacts;
12118 let mut snapshot = AuditSnapshot {
12119 engagements: bag.engagements,
12120 engagement_letters: bag.engagement_letters,
12121 materiality_calculations: bag.materiality_calculations,
12122 risk_assessments: bag.risk_assessments,
12123 combined_risk_assessments: bag.combined_risk_assessments,
12124 workpapers: bag.workpapers,
12125 evidence: bag.evidence,
12126 findings: bag.findings,
12127 judgments: bag.judgments,
12128 sampling_plans: bag.sampling_plans,
12129 sampled_items: bag.sampled_items,
12130 analytical_results: bag.analytical_results,
12131 going_concern_assessments: bag.going_concern_assessments,
12132 subsequent_events: bag.subsequent_events,
12133 audit_opinions: bag.audit_opinions,
12134 key_audit_matters: bag.key_audit_matters,
12135 procedure_steps: bag.procedure_steps,
12136 samples: bag.samples,
12137 confirmations: bag.confirmations,
12138 confirmation_responses: bag.confirmation_responses,
12139 fsm_event_trail: Some(result.event_log),
12141 ..Default::default()
12143 };
12144
12145 {
12147 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12148 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12149 }
12150 {
12151 use datasynth_standards::audit::isa_reference::IsaStandard;
12152 snapshot.isa_mappings = IsaStandard::standard_entries();
12153 }
12154
12155 info!(
12156 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12157 {} risk assessments, {} findings, {} materiality calcs",
12158 snapshot.engagements.len(),
12159 snapshot.workpapers.len(),
12160 snapshot.evidence.len(),
12161 snapshot.risk_assessments.len(),
12162 snapshot.findings.len(),
12163 snapshot.materiality_calculations.len(),
12164 );
12165
12166 Ok(snapshot)
12167 }
12168
12169 fn export_graphs(
12176 &mut self,
12177 entries: &[JournalEntry],
12178 _coa: &Arc<ChartOfAccounts>,
12179 stats: &mut EnhancedGenerationStatistics,
12180 ) -> SynthResult<GraphExportSnapshot> {
12181 let pb = self.create_progress_bar(100, "Exporting Graphs");
12182
12183 let mut snapshot = GraphExportSnapshot::default();
12184
12185 let output_dir = self
12187 .output_path
12188 .clone()
12189 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12190 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12191
12192 for graph_type in &self.config.graph_export.graph_types {
12194 if let Some(pb) = &pb {
12195 pb.inc(10);
12196 }
12197
12198 let graph_config = TransactionGraphConfig {
12200 include_vendors: false,
12201 include_customers: false,
12202 create_debit_credit_edges: true,
12203 include_document_nodes: graph_type.include_document_nodes,
12204 min_edge_weight: graph_type.min_edge_weight,
12205 aggregate_parallel_edges: graph_type.aggregate_edges,
12206 framework: None,
12207 };
12208
12209 let mut builder = TransactionGraphBuilder::new(graph_config);
12210 builder.add_journal_entries(entries);
12211 let graph = builder.build();
12212
12213 stats.graph_node_count += graph.node_count();
12215 stats.graph_edge_count += graph.edge_count();
12216
12217 if let Some(pb) = &pb {
12218 pb.inc(40);
12219 }
12220
12221 for format in &self.config.graph_export.formats {
12223 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12224
12225 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12227 warn!("Failed to create graph output directory: {}", e);
12228 continue;
12229 }
12230
12231 match format {
12232 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12233 let pyg_config = PyGExportConfig {
12234 common: datasynth_graph::CommonExportConfig {
12235 export_node_features: true,
12236 export_edge_features: true,
12237 export_node_labels: true,
12238 export_edge_labels: true,
12239 export_masks: true,
12240 train_ratio: self.config.graph_export.train_ratio,
12241 val_ratio: self.config.graph_export.validation_ratio,
12242 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12243 },
12244 one_hot_categoricals: false,
12245 };
12246
12247 let exporter = PyGExporter::new(pyg_config);
12248 match exporter.export(&graph, &format_dir) {
12249 Ok(metadata) => {
12250 snapshot.exports.insert(
12251 format!("{}_{}", graph_type.name, "pytorch_geometric"),
12252 GraphExportInfo {
12253 name: graph_type.name.clone(),
12254 format: "pytorch_geometric".to_string(),
12255 output_path: format_dir.clone(),
12256 node_count: metadata.num_nodes,
12257 edge_count: metadata.num_edges,
12258 },
12259 );
12260 snapshot.graph_count += 1;
12261 }
12262 Err(e) => {
12263 warn!("Failed to export PyTorch Geometric graph: {}", e);
12264 }
12265 }
12266 }
12267 datasynth_config::schema::GraphExportFormat::Neo4j => {
12268 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12269
12270 let neo4j_config = Neo4jExportConfig {
12271 export_node_properties: true,
12272 export_edge_properties: true,
12273 export_features: true,
12274 generate_cypher: true,
12275 generate_admin_import: true,
12276 database_name: "synth".to_string(),
12277 cypher_batch_size: 1000,
12278 };
12279
12280 let exporter = Neo4jExporter::new(neo4j_config);
12281 match exporter.export(&graph, &format_dir) {
12282 Ok(metadata) => {
12283 snapshot.exports.insert(
12284 format!("{}_{}", graph_type.name, "neo4j"),
12285 GraphExportInfo {
12286 name: graph_type.name.clone(),
12287 format: "neo4j".to_string(),
12288 output_path: format_dir.clone(),
12289 node_count: metadata.num_nodes,
12290 edge_count: metadata.num_edges,
12291 },
12292 );
12293 snapshot.graph_count += 1;
12294 }
12295 Err(e) => {
12296 warn!("Failed to export Neo4j graph: {}", e);
12297 }
12298 }
12299 }
12300 datasynth_config::schema::GraphExportFormat::Dgl => {
12301 use datasynth_graph::{DGLExportConfig, DGLExporter};
12302
12303 let dgl_config = DGLExportConfig {
12304 common: datasynth_graph::CommonExportConfig {
12305 export_node_features: true,
12306 export_edge_features: true,
12307 export_node_labels: true,
12308 export_edge_labels: true,
12309 export_masks: true,
12310 train_ratio: self.config.graph_export.train_ratio,
12311 val_ratio: self.config.graph_export.validation_ratio,
12312 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12313 },
12314 heterogeneous: self.config.graph_export.dgl.heterogeneous,
12315 include_pickle_script: true, };
12317
12318 let exporter = DGLExporter::new(dgl_config);
12319 match exporter.export(&graph, &format_dir) {
12320 Ok(metadata) => {
12321 snapshot.exports.insert(
12322 format!("{}_{}", graph_type.name, "dgl"),
12323 GraphExportInfo {
12324 name: graph_type.name.clone(),
12325 format: "dgl".to_string(),
12326 output_path: format_dir.clone(),
12327 node_count: metadata.common.num_nodes,
12328 edge_count: metadata.common.num_edges,
12329 },
12330 );
12331 snapshot.graph_count += 1;
12332 }
12333 Err(e) => {
12334 warn!("Failed to export DGL graph: {}", e);
12335 }
12336 }
12337 }
12338 datasynth_config::schema::GraphExportFormat::RustGraph => {
12339 use datasynth_graph::{
12340 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12341 };
12342
12343 let rustgraph_config = RustGraphExportConfig {
12344 include_features: true,
12345 include_temporal: true,
12346 include_labels: true,
12347 source_name: "datasynth".to_string(),
12348 batch_id: None,
12349 output_format: RustGraphOutputFormat::JsonLines,
12350 export_node_properties: true,
12351 export_edge_properties: true,
12352 pretty_print: false,
12353 };
12354
12355 let exporter = RustGraphExporter::new(rustgraph_config);
12356 match exporter.export(&graph, &format_dir) {
12357 Ok(metadata) => {
12358 snapshot.exports.insert(
12359 format!("{}_{}", graph_type.name, "rustgraph"),
12360 GraphExportInfo {
12361 name: graph_type.name.clone(),
12362 format: "rustgraph".to_string(),
12363 output_path: format_dir.clone(),
12364 node_count: metadata.num_nodes,
12365 edge_count: metadata.num_edges,
12366 },
12367 );
12368 snapshot.graph_count += 1;
12369 }
12370 Err(e) => {
12371 warn!("Failed to export RustGraph: {}", e);
12372 }
12373 }
12374 }
12375 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12376 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12378 }
12379 }
12380 }
12381
12382 if let Some(pb) = &pb {
12383 pb.inc(40);
12384 }
12385 }
12386
12387 stats.graph_export_count = snapshot.graph_count;
12388 snapshot.exported = snapshot.graph_count > 0;
12389
12390 if let Some(pb) = pb {
12391 pb.finish_with_message(format!(
12392 "Graphs exported: {} graphs ({} nodes, {} edges)",
12393 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12394 ));
12395 }
12396
12397 Ok(snapshot)
12398 }
12399
12400 fn build_additional_graphs(
12405 &self,
12406 banking: &BankingSnapshot,
12407 intercompany: &IntercompanySnapshot,
12408 entries: &[JournalEntry],
12409 stats: &mut EnhancedGenerationStatistics,
12410 ) {
12411 let output_dir = self
12412 .output_path
12413 .clone()
12414 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12415 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12416
12417 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12419 info!("Phase 10c: Building banking network graph");
12420 let config = BankingGraphConfig::default();
12421 let mut builder = BankingGraphBuilder::new(config);
12422 builder.add_customers(&banking.customers);
12423 builder.add_accounts(&banking.accounts, &banking.customers);
12424 builder.add_transactions(&banking.transactions);
12425 let graph = builder.build();
12426
12427 let node_count = graph.node_count();
12428 let edge_count = graph.edge_count();
12429 stats.graph_node_count += node_count;
12430 stats.graph_edge_count += edge_count;
12431
12432 for format in &self.config.graph_export.formats {
12434 if matches!(
12435 format,
12436 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12437 ) {
12438 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12439 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12440 warn!("Failed to create banking graph output dir: {}", e);
12441 continue;
12442 }
12443 let pyg_config = PyGExportConfig::default();
12444 let exporter = PyGExporter::new(pyg_config);
12445 if let Err(e) = exporter.export(&graph, &format_dir) {
12446 warn!("Failed to export banking graph as PyG: {}", e);
12447 } else {
12448 info!(
12449 "Banking network graph exported: {} nodes, {} edges",
12450 node_count, edge_count
12451 );
12452 }
12453 }
12454 }
12455 }
12456
12457 let approval_entries: Vec<_> = entries
12459 .iter()
12460 .filter(|je| je.header.approval_workflow.is_some())
12461 .collect();
12462
12463 if !approval_entries.is_empty() {
12464 info!(
12465 "Phase 10c: Building approval network graph ({} entries with approvals)",
12466 approval_entries.len()
12467 );
12468 let config = ApprovalGraphConfig::default();
12469 let mut builder = ApprovalGraphBuilder::new(config);
12470
12471 for je in &approval_entries {
12472 if let Some(ref wf) = je.header.approval_workflow {
12473 for action in &wf.actions {
12474 let record = datasynth_core::models::ApprovalRecord {
12475 approval_id: format!(
12476 "APR-{}-{}",
12477 je.header.document_id, action.approval_level
12478 ),
12479 document_number: je.header.document_id.to_string(),
12480 document_type: "JE".to_string(),
12481 company_code: je.company_code().to_string(),
12482 requester_id: wf.preparer_id.clone(),
12483 requester_name: Some(wf.preparer_name.clone()),
12484 approver_id: action.actor_id.clone(),
12485 approver_name: action.actor_name.clone(),
12486 approval_date: je.posting_date(),
12487 action: format!("{:?}", action.action),
12488 amount: wf.amount,
12489 approval_limit: None,
12490 comments: action.comments.clone(),
12491 delegation_from: None,
12492 is_auto_approved: false,
12493 };
12494 builder.add_approval(&record);
12495 }
12496 }
12497 }
12498
12499 let graph = builder.build();
12500 let node_count = graph.node_count();
12501 let edge_count = graph.edge_count();
12502 stats.graph_node_count += node_count;
12503 stats.graph_edge_count += edge_count;
12504
12505 for format in &self.config.graph_export.formats {
12507 if matches!(
12508 format,
12509 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12510 ) {
12511 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12512 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12513 warn!("Failed to create approval graph output dir: {}", e);
12514 continue;
12515 }
12516 let pyg_config = PyGExportConfig::default();
12517 let exporter = PyGExporter::new(pyg_config);
12518 if let Err(e) = exporter.export(&graph, &format_dir) {
12519 warn!("Failed to export approval graph as PyG: {}", e);
12520 } else {
12521 info!(
12522 "Approval network graph exported: {} nodes, {} edges",
12523 node_count, edge_count
12524 );
12525 }
12526 }
12527 }
12528 }
12529
12530 if self.config.companies.len() >= 2 {
12532 info!(
12533 "Phase 10c: Building entity relationship graph ({} companies)",
12534 self.config.companies.len()
12535 );
12536
12537 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12538 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12539
12540 let parent_code = &self.config.companies[0].code;
12542 let mut companies: Vec<datasynth_core::models::Company> =
12543 Vec::with_capacity(self.config.companies.len());
12544
12545 let first = &self.config.companies[0];
12547 companies.push(datasynth_core::models::Company::parent(
12548 &first.code,
12549 &first.name,
12550 &first.country,
12551 &first.currency,
12552 ));
12553
12554 for cc in self.config.companies.iter().skip(1) {
12556 companies.push(datasynth_core::models::Company::subsidiary(
12557 &cc.code,
12558 &cc.name,
12559 &cc.country,
12560 &cc.currency,
12561 parent_code,
12562 rust_decimal::Decimal::from(100),
12563 ));
12564 }
12565
12566 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12568 self.config
12569 .companies
12570 .iter()
12571 .skip(1)
12572 .enumerate()
12573 .map(|(i, cc)| {
12574 let mut rel =
12575 datasynth_core::models::intercompany::IntercompanyRelationship::new(
12576 format!("REL{:03}", i + 1),
12577 parent_code.clone(),
12578 cc.code.clone(),
12579 rust_decimal::Decimal::from(100),
12580 start_date,
12581 );
12582 rel.functional_currency = cc.currency.clone();
12583 rel
12584 })
12585 .collect();
12586
12587 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12588 builder.add_companies(&companies);
12589 builder.add_ownership_relationships(&relationships);
12590
12591 for pair in &intercompany.matched_pairs {
12593 builder.add_intercompany_edge(
12594 &pair.seller_company,
12595 &pair.buyer_company,
12596 pair.amount,
12597 &format!("{:?}", pair.transaction_type),
12598 );
12599 }
12600
12601 let graph = builder.build();
12602 let node_count = graph.node_count();
12603 let edge_count = graph.edge_count();
12604 stats.graph_node_count += node_count;
12605 stats.graph_edge_count += edge_count;
12606
12607 for format in &self.config.graph_export.formats {
12609 if matches!(
12610 format,
12611 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12612 ) {
12613 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12614 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12615 warn!("Failed to create entity graph output dir: {}", e);
12616 continue;
12617 }
12618 let pyg_config = PyGExportConfig::default();
12619 let exporter = PyGExporter::new(pyg_config);
12620 if let Err(e) = exporter.export(&graph, &format_dir) {
12621 warn!("Failed to export entity graph as PyG: {}", e);
12622 } else {
12623 info!(
12624 "Entity relationship graph exported: {} nodes, {} edges",
12625 node_count, edge_count
12626 );
12627 }
12628 }
12629 }
12630 } else {
12631 debug!(
12632 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12633 self.config.companies.len()
12634 );
12635 }
12636 }
12637
12638 #[allow(clippy::too_many_arguments)]
12645 fn export_hypergraph(
12646 &self,
12647 coa: &Arc<ChartOfAccounts>,
12648 entries: &[JournalEntry],
12649 document_flows: &DocumentFlowSnapshot,
12650 sourcing: &SourcingSnapshot,
12651 hr: &HrSnapshot,
12652 manufacturing: &ManufacturingSnapshot,
12653 banking: &BankingSnapshot,
12654 audit: &AuditSnapshot,
12655 financial_reporting: &FinancialReportingSnapshot,
12656 ocpm: &OcpmSnapshot,
12657 compliance: &ComplianceRegulationsSnapshot,
12658 stats: &mut EnhancedGenerationStatistics,
12659 ) -> SynthResult<HypergraphExportInfo> {
12660 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12661 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12662 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12663 use datasynth_graph::models::hypergraph::AggregationStrategy;
12664
12665 let hg_settings = &self.config.graph_export.hypergraph;
12666
12667 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12669 "truncate" => AggregationStrategy::Truncate,
12670 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12671 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12672 "importance_sample" => AggregationStrategy::ImportanceSample,
12673 _ => AggregationStrategy::PoolByCounterparty,
12674 };
12675
12676 let builder_config = HypergraphConfig {
12677 max_nodes: hg_settings.max_nodes,
12678 aggregation_strategy,
12679 include_coso: hg_settings.governance_layer.include_coso,
12680 include_controls: hg_settings.governance_layer.include_controls,
12681 include_sox: hg_settings.governance_layer.include_sox,
12682 include_vendors: hg_settings.governance_layer.include_vendors,
12683 include_customers: hg_settings.governance_layer.include_customers,
12684 include_employees: hg_settings.governance_layer.include_employees,
12685 include_p2p: hg_settings.process_layer.include_p2p,
12686 include_o2c: hg_settings.process_layer.include_o2c,
12687 include_s2c: hg_settings.process_layer.include_s2c,
12688 include_h2r: hg_settings.process_layer.include_h2r,
12689 include_mfg: hg_settings.process_layer.include_mfg,
12690 include_bank: hg_settings.process_layer.include_bank,
12691 include_audit: hg_settings.process_layer.include_audit,
12692 include_r2r: hg_settings.process_layer.include_r2r,
12693 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12694 docs_per_counterparty_threshold: hg_settings
12695 .process_layer
12696 .docs_per_counterparty_threshold,
12697 include_accounts: hg_settings.accounting_layer.include_accounts,
12698 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12699 include_cross_layer_edges: hg_settings.cross_layer.enabled,
12700 include_compliance: self.config.compliance_regulations.enabled,
12701 include_tax: true,
12702 include_treasury: true,
12703 include_esg: true,
12704 include_project: true,
12705 include_intercompany: true,
12706 include_temporal_events: true,
12707 };
12708
12709 let mut builder = HypergraphBuilder::new(builder_config);
12710
12711 builder.add_coso_framework();
12713
12714 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12717 let controls = InternalControl::standard_controls();
12718 builder.add_controls(&controls);
12719 }
12720
12721 builder.add_vendors(&self.master_data.vendors);
12723 builder.add_customers(&self.master_data.customers);
12724 builder.add_employees(&self.master_data.employees);
12725
12726 builder.add_p2p_documents(
12728 &document_flows.purchase_orders,
12729 &document_flows.goods_receipts,
12730 &document_flows.vendor_invoices,
12731 &document_flows.payments,
12732 );
12733 builder.add_o2c_documents(
12734 &document_flows.sales_orders,
12735 &document_flows.deliveries,
12736 &document_flows.customer_invoices,
12737 );
12738 builder.add_s2c_documents(
12739 &sourcing.sourcing_projects,
12740 &sourcing.qualifications,
12741 &sourcing.rfx_events,
12742 &sourcing.bids,
12743 &sourcing.bid_evaluations,
12744 &sourcing.contracts,
12745 );
12746 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12747 builder.add_mfg_documents(
12748 &manufacturing.production_orders,
12749 &manufacturing.quality_inspections,
12750 &manufacturing.cycle_counts,
12751 );
12752 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12753 builder.add_audit_documents(
12754 &audit.engagements,
12755 &audit.workpapers,
12756 &audit.findings,
12757 &audit.evidence,
12758 &audit.risk_assessments,
12759 &audit.judgments,
12760 &audit.materiality_calculations,
12761 &audit.audit_opinions,
12762 &audit.going_concern_assessments,
12763 );
12764 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12765
12766 if let Some(ref event_log) = ocpm.event_log {
12768 builder.add_ocpm_events(event_log);
12769 }
12770
12771 if self.config.compliance_regulations.enabled
12773 && hg_settings.governance_layer.include_controls
12774 {
12775 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12777 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12778 .standard_records
12779 .iter()
12780 .filter_map(|r| {
12781 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12782 registry.get(&sid).cloned()
12783 })
12784 .collect();
12785
12786 builder.add_compliance_regulations(
12787 &standards,
12788 &compliance.findings,
12789 &compliance.filings,
12790 );
12791 }
12792
12793 builder.add_accounts(coa);
12795 builder.add_journal_entries_as_hyperedges(entries);
12796
12797 let hypergraph = builder.build();
12799
12800 let output_dir = self
12802 .output_path
12803 .clone()
12804 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12805 let hg_dir = output_dir
12806 .join(&self.config.graph_export.output_subdirectory)
12807 .join(&hg_settings.output_subdirectory);
12808
12809 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12811 "unified" => {
12812 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12813 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12814 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12815 })?;
12816 (
12817 metadata.num_nodes,
12818 metadata.num_edges,
12819 metadata.num_hyperedges,
12820 )
12821 }
12822 _ => {
12823 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12825 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12826 SynthError::generation(format!("Hypergraph export failed: {e}"))
12827 })?;
12828 (
12829 metadata.num_nodes,
12830 metadata.num_edges,
12831 metadata.num_hyperedges,
12832 )
12833 }
12834 };
12835
12836 #[cfg(feature = "streaming")]
12838 if let Some(ref target_url) = hg_settings.stream_target {
12839 use crate::stream_client::{StreamClient, StreamConfig};
12840 use std::io::Write as _;
12841
12842 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12843 let stream_config = StreamConfig {
12844 target_url: target_url.clone(),
12845 batch_size: hg_settings.stream_batch_size,
12846 api_key,
12847 ..StreamConfig::default()
12848 };
12849
12850 match StreamClient::new(stream_config) {
12851 Ok(mut client) => {
12852 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12853 match exporter.export_to_writer(&hypergraph, &mut client) {
12854 Ok(_) => {
12855 if let Err(e) = client.flush() {
12856 warn!("Failed to flush stream client: {}", e);
12857 } else {
12858 info!("Streamed {} records to {}", client.total_sent(), target_url);
12859 }
12860 }
12861 Err(e) => {
12862 warn!("Streaming export failed: {}", e);
12863 }
12864 }
12865 }
12866 Err(e) => {
12867 warn!("Failed to create stream client: {}", e);
12868 }
12869 }
12870 }
12871
12872 stats.graph_node_count += num_nodes;
12874 stats.graph_edge_count += num_edges;
12875 stats.graph_export_count += 1;
12876
12877 Ok(HypergraphExportInfo {
12878 node_count: num_nodes,
12879 edge_count: num_edges,
12880 hyperedge_count: num_hyperedges,
12881 output_path: hg_dir,
12882 })
12883 }
12884
12885 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12890 let pb = self.create_progress_bar(100, "Generating Banking Data");
12891
12892 let orchestrator = BankingOrchestratorBuilder::new()
12894 .config(self.config.banking.clone())
12895 .seed(self.seed + 9000)
12896 .country_pack(self.primary_pack().clone())
12897 .build();
12898
12899 if let Some(pb) = &pb {
12900 pb.inc(10);
12901 }
12902
12903 let result = orchestrator.generate();
12905
12906 if let Some(pb) = &pb {
12907 pb.inc(90);
12908 pb.finish_with_message(format!(
12909 "Banking: {} customers, {} transactions",
12910 result.customers.len(),
12911 result.transactions.len()
12912 ));
12913 }
12914
12915 let mut banking_customers = result.customers;
12920 let core_customers = &self.master_data.customers;
12921 if !core_customers.is_empty() {
12922 for (i, bc) in banking_customers.iter_mut().enumerate() {
12923 let core = &core_customers[i % core_customers.len()];
12924 bc.name = CustomerName::business(&core.name);
12925 bc.residence_country = core.country.clone();
12926 bc.enterprise_customer_id = Some(core.customer_id.clone());
12927 }
12928 debug!(
12929 "Cross-referenced {} banking customers with {} core customers",
12930 banking_customers.len(),
12931 core_customers.len()
12932 );
12933 }
12934
12935 Ok(BankingSnapshot {
12936 customers: banking_customers,
12937 accounts: result.accounts,
12938 transactions: result.transactions,
12939 transaction_labels: result.transaction_labels,
12940 customer_labels: result.customer_labels,
12941 account_labels: result.account_labels,
12942 relationship_labels: result.relationship_labels,
12943 narratives: result.narratives,
12944 suspicious_count: result.stats.suspicious_count,
12945 scenario_count: result.scenarios.len(),
12946 })
12947 }
12948
12949 fn calculate_total_transactions(&self) -> u64 {
12951 let months = self.config.global.period_months as f64;
12952 self.config
12953 .companies
12954 .iter()
12955 .map(|c| {
12956 let annual = c.annual_transaction_volume.count() as f64;
12957 let weighted = annual * c.volume_weight;
12958 (weighted * months / 12.0) as u64
12959 })
12960 .sum()
12961 }
12962
12963 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12965 if !self.phase_config.show_progress {
12966 return None;
12967 }
12968
12969 let pb = if let Some(mp) = &self.multi_progress {
12970 mp.add(ProgressBar::new(total))
12971 } else {
12972 ProgressBar::new(total)
12973 };
12974
12975 pb.set_style(
12976 ProgressStyle::default_bar()
12977 .template(&format!(
12978 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12979 ))
12980 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12981 .progress_chars("#>-"),
12982 );
12983
12984 Some(pb)
12985 }
12986
12987 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12989 self.coa.clone()
12990 }
12991
12992 pub fn get_master_data(&self) -> &MasterDataSnapshot {
12994 &self.master_data
12995 }
12996
12997 fn phase_compliance_regulations(
12999 &mut self,
13000 _stats: &mut EnhancedGenerationStatistics,
13001 ) -> SynthResult<ComplianceRegulationsSnapshot> {
13002 if !self.phase_config.generate_compliance_regulations {
13003 return Ok(ComplianceRegulationsSnapshot::default());
13004 }
13005
13006 info!("Phase: Generating Compliance Regulations Data");
13007
13008 let cr_config = &self.config.compliance_regulations;
13009
13010 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13012 self.config
13013 .companies
13014 .iter()
13015 .map(|c| c.country.clone())
13016 .collect::<std::collections::HashSet<_>>()
13017 .into_iter()
13018 .collect()
13019 } else {
13020 cr_config.jurisdictions.clone()
13021 };
13022
13023 let fallback_date =
13025 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13026 let reference_date = cr_config
13027 .reference_date
13028 .as_ref()
13029 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13030 .unwrap_or_else(|| {
13031 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13032 .unwrap_or(fallback_date)
13033 });
13034
13035 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13037 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13038 let cross_reference_records = reg_gen.generate_cross_reference_records();
13039 let jurisdiction_records =
13040 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13041
13042 info!(
13043 " Standards: {} records, {} cross-references, {} jurisdictions",
13044 standard_records.len(),
13045 cross_reference_records.len(),
13046 jurisdiction_records.len()
13047 );
13048
13049 let audit_procedures = if cr_config.audit_procedures.enabled {
13051 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13052 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13053 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13054 confidence_level: cr_config.audit_procedures.confidence_level,
13055 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13056 };
13057 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13058 self.seed + 9000,
13059 proc_config,
13060 );
13061 let registry = reg_gen.registry();
13062 let mut all_procs = Vec::new();
13063 for jurisdiction in &jurisdictions {
13064 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13065 all_procs.extend(procs);
13066 }
13067 info!(" Audit procedures: {}", all_procs.len());
13068 all_procs
13069 } else {
13070 Vec::new()
13071 };
13072
13073 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13075 let finding_config =
13076 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13077 finding_rate: cr_config.findings.finding_rate,
13078 material_weakness_rate: cr_config.findings.material_weakness_rate,
13079 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13080 generate_remediation: cr_config.findings.generate_remediation,
13081 };
13082 let mut finding_gen =
13083 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13084 self.seed + 9100,
13085 finding_config,
13086 );
13087 let mut all_findings = Vec::new();
13088 for company in &self.config.companies {
13089 let company_findings =
13090 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13091 all_findings.extend(company_findings);
13092 }
13093 info!(" Compliance findings: {}", all_findings.len());
13094 all_findings
13095 } else {
13096 Vec::new()
13097 };
13098
13099 let filings = if cr_config.filings.enabled {
13101 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13102 filing_types: cr_config.filings.filing_types.clone(),
13103 generate_status_progression: cr_config.filings.generate_status_progression,
13104 };
13105 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13106 self.seed + 9200,
13107 filing_config,
13108 );
13109 let company_codes: Vec<String> = self
13110 .config
13111 .companies
13112 .iter()
13113 .map(|c| c.code.clone())
13114 .collect();
13115 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13116 .unwrap_or(fallback_date);
13117 let filings = filing_gen.generate_filings(
13118 &company_codes,
13119 &jurisdictions,
13120 start_date,
13121 self.config.global.period_months,
13122 );
13123 info!(" Regulatory filings: {}", filings.len());
13124 filings
13125 } else {
13126 Vec::new()
13127 };
13128
13129 let compliance_graph = if cr_config.graph.enabled {
13131 let graph_config = datasynth_graph::ComplianceGraphConfig {
13132 include_standard_nodes: cr_config.graph.include_compliance_nodes,
13133 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13134 include_cross_references: cr_config.graph.include_cross_references,
13135 include_supersession_edges: cr_config.graph.include_supersession_edges,
13136 include_account_links: cr_config.graph.include_account_links,
13137 include_control_links: cr_config.graph.include_control_links,
13138 include_company_links: cr_config.graph.include_company_links,
13139 };
13140 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13141
13142 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13144 .iter()
13145 .map(|r| datasynth_graph::StandardNodeInput {
13146 standard_id: r.standard_id.clone(),
13147 title: r.title.clone(),
13148 category: r.category.clone(),
13149 domain: r.domain.clone(),
13150 is_active: r.is_active,
13151 features: vec![if r.is_active { 1.0 } else { 0.0 }],
13152 applicable_account_types: r.applicable_account_types.clone(),
13153 applicable_processes: r.applicable_processes.clone(),
13154 })
13155 .collect();
13156 builder.add_standards(&standard_inputs);
13157
13158 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13160 jurisdiction_records
13161 .iter()
13162 .map(|r| datasynth_graph::JurisdictionNodeInput {
13163 country_code: r.country_code.clone(),
13164 country_name: r.country_name.clone(),
13165 framework: r.accounting_framework.clone(),
13166 standard_count: r.standard_count,
13167 tax_rate: r.statutory_tax_rate,
13168 })
13169 .collect();
13170 builder.add_jurisdictions(&jurisdiction_inputs);
13171
13172 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13174 cross_reference_records
13175 .iter()
13176 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13177 from_standard: r.from_standard.clone(),
13178 to_standard: r.to_standard.clone(),
13179 relationship: r.relationship.clone(),
13180 convergence_level: r.convergence_level,
13181 })
13182 .collect();
13183 builder.add_cross_references(&xref_inputs);
13184
13185 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13187 .iter()
13188 .map(|r| datasynth_graph::JurisdictionMappingInput {
13189 country_code: r.jurisdiction.clone(),
13190 standard_id: r.standard_id.clone(),
13191 })
13192 .collect();
13193 builder.add_jurisdiction_mappings(&mapping_inputs);
13194
13195 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13197 .iter()
13198 .map(|p| datasynth_graph::ProcedureNodeInput {
13199 procedure_id: p.procedure_id.clone(),
13200 standard_id: p.standard_id.clone(),
13201 procedure_type: p.procedure_type.clone(),
13202 sample_size: p.sample_size,
13203 confidence_level: p.confidence_level,
13204 })
13205 .collect();
13206 builder.add_procedures(&proc_inputs);
13207
13208 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13210 .iter()
13211 .map(|f| datasynth_graph::FindingNodeInput {
13212 finding_id: f.finding_id.to_string(),
13213 standard_id: f
13214 .related_standards
13215 .first()
13216 .map(|s| s.as_str().to_string())
13217 .unwrap_or_default(),
13218 severity: f.severity.to_string(),
13219 deficiency_level: f.deficiency_level.to_string(),
13220 severity_score: f.deficiency_level.severity_score(),
13221 control_id: f.control_id.clone(),
13222 affected_accounts: f.affected_accounts.clone(),
13223 })
13224 .collect();
13225 builder.add_findings(&finding_inputs);
13226
13227 if cr_config.graph.include_account_links {
13229 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13230 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13231 for std_record in &standard_records {
13232 if let Some(std_obj) =
13233 registry.get(&datasynth_core::models::compliance::StandardId::parse(
13234 &std_record.standard_id,
13235 ))
13236 {
13237 for acct_type in &std_obj.applicable_account_types {
13238 account_links.push(datasynth_graph::AccountLinkInput {
13239 standard_id: std_record.standard_id.clone(),
13240 account_code: acct_type.clone(),
13241 account_name: acct_type.clone(),
13242 });
13243 }
13244 }
13245 }
13246 builder.add_account_links(&account_links);
13247 }
13248
13249 if cr_config.graph.include_control_links {
13251 let mut control_links = Vec::new();
13252 let sox_like_ids: Vec<String> = standard_records
13254 .iter()
13255 .filter(|r| {
13256 r.standard_id.starts_with("SOX")
13257 || r.standard_id.starts_with("PCAOB-AS-2201")
13258 })
13259 .map(|r| r.standard_id.clone())
13260 .collect();
13261 let control_ids = [
13263 ("C001", "Cash Controls"),
13264 ("C002", "Large Transaction Approval"),
13265 ("C010", "PO Approval"),
13266 ("C011", "Three-Way Match"),
13267 ("C020", "Revenue Recognition"),
13268 ("C021", "Credit Check"),
13269 ("C030", "Manual JE Approval"),
13270 ("C031", "Period Close Review"),
13271 ("C032", "Account Reconciliation"),
13272 ("C040", "Payroll Processing"),
13273 ("C050", "Fixed Asset Capitalization"),
13274 ("C060", "Intercompany Elimination"),
13275 ];
13276 for sox_id in &sox_like_ids {
13277 for (ctrl_id, ctrl_name) in &control_ids {
13278 control_links.push(datasynth_graph::ControlLinkInput {
13279 standard_id: sox_id.clone(),
13280 control_id: ctrl_id.to_string(),
13281 control_name: ctrl_name.to_string(),
13282 });
13283 }
13284 }
13285 builder.add_control_links(&control_links);
13286 }
13287
13288 if cr_config.graph.include_company_links {
13290 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13291 .iter()
13292 .enumerate()
13293 .map(|(i, f)| datasynth_graph::FilingNodeInput {
13294 filing_id: format!("F{:04}", i + 1),
13295 filing_type: f.filing_type.to_string(),
13296 company_code: f.company_code.clone(),
13297 jurisdiction: f.jurisdiction.clone(),
13298 status: format!("{:?}", f.status),
13299 })
13300 .collect();
13301 builder.add_filings(&filing_inputs);
13302 }
13303
13304 let graph = builder.build();
13305 info!(
13306 " Compliance graph: {} nodes, {} edges",
13307 graph.nodes.len(),
13308 graph.edges.len()
13309 );
13310 Some(graph)
13311 } else {
13312 None
13313 };
13314
13315 self.check_resources_with_log("post-compliance-regulations")?;
13316
13317 Ok(ComplianceRegulationsSnapshot {
13318 standard_records,
13319 cross_reference_records,
13320 jurisdiction_records,
13321 audit_procedures,
13322 findings,
13323 filings,
13324 compliance_graph,
13325 })
13326 }
13327
13328 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13330 use super::lineage::LineageGraphBuilder;
13331
13332 let mut builder = LineageGraphBuilder::new();
13333
13334 builder.add_config_section("config:global", "Global Config");
13336 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13337 builder.add_config_section("config:transactions", "Transaction Config");
13338
13339 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13341 builder.add_generator_phase("phase:je", "Journal Entry Generation");
13342
13343 builder.configured_by("phase:coa", "config:chart_of_accounts");
13345 builder.configured_by("phase:je", "config:transactions");
13346
13347 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13349 builder.produced_by("output:je", "phase:je");
13350
13351 if self.phase_config.generate_master_data {
13353 builder.add_config_section("config:master_data", "Master Data Config");
13354 builder.add_generator_phase("phase:master_data", "Master Data Generation");
13355 builder.configured_by("phase:master_data", "config:master_data");
13356 builder.input_to("phase:master_data", "phase:je");
13357 }
13358
13359 if self.phase_config.generate_document_flows {
13360 builder.add_config_section("config:document_flows", "Document Flow Config");
13361 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13362 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13363 builder.configured_by("phase:p2p", "config:document_flows");
13364 builder.configured_by("phase:o2c", "config:document_flows");
13365
13366 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13367 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13368 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13369 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13370 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13371
13372 builder.produced_by("output:po", "phase:p2p");
13373 builder.produced_by("output:gr", "phase:p2p");
13374 builder.produced_by("output:vi", "phase:p2p");
13375 builder.produced_by("output:so", "phase:o2c");
13376 builder.produced_by("output:ci", "phase:o2c");
13377 }
13378
13379 if self.phase_config.inject_anomalies {
13380 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13381 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13382 builder.configured_by("phase:anomaly", "config:fraud");
13383 builder.add_output_file(
13384 "output:labels",
13385 "Anomaly Labels",
13386 "labels/anomaly_labels.csv",
13387 );
13388 builder.produced_by("output:labels", "phase:anomaly");
13389 }
13390
13391 if self.phase_config.generate_audit {
13392 builder.add_config_section("config:audit", "Audit Config");
13393 builder.add_generator_phase("phase:audit", "Audit Data Generation");
13394 builder.configured_by("phase:audit", "config:audit");
13395 }
13396
13397 if self.phase_config.generate_banking {
13398 builder.add_config_section("config:banking", "Banking Config");
13399 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13400 builder.configured_by("phase:banking", "config:banking");
13401 }
13402
13403 if self.config.llm.enabled {
13404 builder.add_config_section("config:llm", "LLM Enrichment Config");
13405 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13406 builder.configured_by("phase:llm_enrichment", "config:llm");
13407 }
13408
13409 if self.config.diffusion.enabled {
13410 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13411 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13412 builder.configured_by("phase:diffusion", "config:diffusion");
13413 }
13414
13415 if self.config.causal.enabled {
13416 builder.add_config_section("config:causal", "Causal Generation Config");
13417 builder.add_generator_phase("phase:causal", "Causal Overlay");
13418 builder.configured_by("phase:causal", "config:causal");
13419 }
13420
13421 builder.build()
13422 }
13423
13424 fn compute_company_revenue(
13433 entries: &[JournalEntry],
13434 company_code: &str,
13435 ) -> rust_decimal::Decimal {
13436 use rust_decimal::Decimal;
13437 let mut revenue = Decimal::ZERO;
13438 for je in entries {
13439 if je.header.company_code != company_code {
13440 continue;
13441 }
13442 for line in &je.lines {
13443 if line.gl_account.starts_with('4') {
13444 revenue += line.credit_amount - line.debit_amount;
13446 }
13447 }
13448 }
13449 revenue.max(Decimal::ZERO)
13450 }
13451
13452 fn compute_entity_net_assets(
13456 entries: &[JournalEntry],
13457 entity_code: &str,
13458 ) -> rust_decimal::Decimal {
13459 use rust_decimal::Decimal;
13460 let mut asset_net = Decimal::ZERO;
13461 let mut liability_net = Decimal::ZERO;
13462 for je in entries {
13463 if je.header.company_code != entity_code {
13464 continue;
13465 }
13466 for line in &je.lines {
13467 if line.gl_account.starts_with('1') {
13468 asset_net += line.debit_amount - line.credit_amount;
13469 } else if line.gl_account.starts_with('2') {
13470 liability_net += line.credit_amount - line.debit_amount;
13471 }
13472 }
13473 }
13474 asset_net - liability_net
13475 }
13476}
13477
13478fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13480 match format {
13481 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13482 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13483 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13484 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13485 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13486 }
13487}
13488
13489fn compute_trial_balance_entries(
13494 entries: &[JournalEntry],
13495 entity_code: &str,
13496 fiscal_year: i32,
13497 coa: Option<&ChartOfAccounts>,
13498) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13499 use std::collections::BTreeMap;
13500
13501 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13502 BTreeMap::new();
13503
13504 for je in entries {
13505 for line in &je.lines {
13506 let entry = balances.entry(line.account_code.clone()).or_default();
13507 entry.0 += line.debit_amount;
13508 entry.1 += line.credit_amount;
13509 }
13510 }
13511
13512 balances
13513 .into_iter()
13514 .map(
13515 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13516 account_description: coa
13517 .and_then(|c| c.get_account(&account_code))
13518 .map(|a| a.description().to_string())
13519 .unwrap_or_else(|| account_code.clone()),
13520 account_code,
13521 debit_balance: debit,
13522 credit_balance: credit,
13523 net_balance: debit - credit,
13524 entity_code: entity_code.to_string(),
13525 period: format!("FY{}", fiscal_year),
13526 },
13527 )
13528 .collect()
13529}
13530
13531#[cfg(test)]
13532#[allow(clippy::unwrap_used)]
13533mod tests {
13534 use super::*;
13535 use datasynth_config::schema::*;
13536
13537 fn create_test_config() -> GeneratorConfig {
13538 GeneratorConfig {
13539 global: GlobalConfig {
13540 industry: IndustrySector::Manufacturing,
13541 start_date: "2024-01-01".to_string(),
13542 period_months: 1,
13543 seed: Some(42),
13544 parallel: false,
13545 group_currency: "USD".to_string(),
13546 presentation_currency: None,
13547 worker_threads: 0,
13548 memory_limit_mb: 0,
13549 fiscal_year_months: None,
13550 },
13551 companies: vec![CompanyConfig {
13552 code: "1000".to_string(),
13553 name: "Test Company".to_string(),
13554 currency: "USD".to_string(),
13555 functional_currency: None,
13556 country: "US".to_string(),
13557 annual_transaction_volume: TransactionVolume::TenK,
13558 volume_weight: 1.0,
13559 fiscal_year_variant: "K4".to_string(),
13560 }],
13561 chart_of_accounts: ChartOfAccountsConfig {
13562 complexity: CoAComplexity::Small,
13563 industry_specific: true,
13564 custom_accounts: None,
13565 min_hierarchy_depth: 2,
13566 max_hierarchy_depth: 4,
13567 },
13568 transactions: TransactionConfig::default(),
13569 output: OutputConfig::default(),
13570 fraud: FraudConfig::default(),
13571 internal_controls: InternalControlsConfig::default(),
13572 business_processes: BusinessProcessConfig::default(),
13573 user_personas: UserPersonaConfig::default(),
13574 templates: TemplateConfig::default(),
13575 approval: ApprovalConfig::default(),
13576 departments: DepartmentConfig::default(),
13577 master_data: MasterDataConfig::default(),
13578 document_flows: DocumentFlowConfig::default(),
13579 intercompany: IntercompanyConfig::default(),
13580 balance: BalanceConfig::default(),
13581 ocpm: OcpmConfig::default(),
13582 audit: AuditGenerationConfig::default(),
13583 banking: datasynth_banking::BankingConfig::default(),
13584 data_quality: DataQualitySchemaConfig::default(),
13585 scenario: ScenarioConfig::default(),
13586 temporal: TemporalDriftConfig::default(),
13587 graph_export: GraphExportConfig::default(),
13588 streaming: StreamingSchemaConfig::default(),
13589 rate_limit: RateLimitSchemaConfig::default(),
13590 temporal_attributes: TemporalAttributeSchemaConfig::default(),
13591 relationships: RelationshipSchemaConfig::default(),
13592 accounting_standards: AccountingStandardsConfig::default(),
13593 audit_standards: AuditStandardsConfig::default(),
13594 distributions: Default::default(),
13595 temporal_patterns: Default::default(),
13596 vendor_network: VendorNetworkSchemaConfig::default(),
13597 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13598 relationship_strength: RelationshipStrengthSchemaConfig::default(),
13599 cross_process_links: CrossProcessLinksSchemaConfig::default(),
13600 organizational_events: OrganizationalEventsSchemaConfig::default(),
13601 behavioral_drift: BehavioralDriftSchemaConfig::default(),
13602 market_drift: MarketDriftSchemaConfig::default(),
13603 drift_labeling: DriftLabelingSchemaConfig::default(),
13604 anomaly_injection: Default::default(),
13605 industry_specific: Default::default(),
13606 fingerprint_privacy: Default::default(),
13607 quality_gates: Default::default(),
13608 compliance: Default::default(),
13609 webhooks: Default::default(),
13610 llm: Default::default(),
13611 diffusion: Default::default(),
13612 causal: Default::default(),
13613 source_to_pay: Default::default(),
13614 financial_reporting: Default::default(),
13615 hr: Default::default(),
13616 manufacturing: Default::default(),
13617 sales_quotes: Default::default(),
13618 tax: Default::default(),
13619 treasury: Default::default(),
13620 project_accounting: Default::default(),
13621 esg: Default::default(),
13622 country_packs: None,
13623 scenarios: Default::default(),
13624 session: Default::default(),
13625 compliance_regulations: Default::default(),
13626 }
13627 }
13628
13629 #[test]
13630 fn test_enhanced_orchestrator_creation() {
13631 let config = create_test_config();
13632 let orchestrator = EnhancedOrchestrator::with_defaults(config);
13633 assert!(orchestrator.is_ok());
13634 }
13635
13636 #[test]
13637 fn test_minimal_generation() {
13638 let config = create_test_config();
13639 let phase_config = PhaseConfig {
13640 generate_master_data: false,
13641 generate_document_flows: false,
13642 generate_journal_entries: true,
13643 inject_anomalies: false,
13644 show_progress: false,
13645 ..Default::default()
13646 };
13647
13648 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13649 let result = orchestrator.generate();
13650
13651 assert!(result.is_ok());
13652 let result = result.unwrap();
13653 assert!(!result.journal_entries.is_empty());
13654 }
13655
13656 #[test]
13657 fn test_master_data_generation() {
13658 let config = create_test_config();
13659 let phase_config = PhaseConfig {
13660 generate_master_data: true,
13661 generate_document_flows: false,
13662 generate_journal_entries: false,
13663 inject_anomalies: false,
13664 show_progress: false,
13665 vendors_per_company: 5,
13666 customers_per_company: 5,
13667 materials_per_company: 10,
13668 assets_per_company: 5,
13669 employees_per_company: 10,
13670 ..Default::default()
13671 };
13672
13673 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13674 let result = orchestrator.generate().unwrap();
13675
13676 assert!(!result.master_data.vendors.is_empty());
13677 assert!(!result.master_data.customers.is_empty());
13678 assert!(!result.master_data.materials.is_empty());
13679 }
13680
13681 #[test]
13682 fn test_document_flow_generation() {
13683 let config = create_test_config();
13684 let phase_config = PhaseConfig {
13685 generate_master_data: true,
13686 generate_document_flows: true,
13687 generate_journal_entries: false,
13688 inject_anomalies: false,
13689 inject_data_quality: false,
13690 validate_balances: false,
13691 generate_ocpm_events: false,
13692 show_progress: false,
13693 vendors_per_company: 5,
13694 customers_per_company: 5,
13695 materials_per_company: 10,
13696 assets_per_company: 5,
13697 employees_per_company: 10,
13698 p2p_chains: 5,
13699 o2c_chains: 5,
13700 ..Default::default()
13701 };
13702
13703 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13704 let result = orchestrator.generate().unwrap();
13705
13706 assert!(!result.document_flows.p2p_chains.is_empty());
13708 assert!(!result.document_flows.o2c_chains.is_empty());
13709
13710 assert!(!result.document_flows.purchase_orders.is_empty());
13712 assert!(!result.document_flows.sales_orders.is_empty());
13713 }
13714
13715 #[test]
13716 fn test_anomaly_injection() {
13717 let config = create_test_config();
13718 let phase_config = PhaseConfig {
13719 generate_master_data: false,
13720 generate_document_flows: false,
13721 generate_journal_entries: true,
13722 inject_anomalies: true,
13723 show_progress: false,
13724 ..Default::default()
13725 };
13726
13727 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13728 let result = orchestrator.generate().unwrap();
13729
13730 assert!(!result.journal_entries.is_empty());
13732
13733 assert!(result.anomaly_labels.summary.is_some());
13736 }
13737
13738 #[test]
13739 fn test_full_generation_pipeline() {
13740 let config = create_test_config();
13741 let phase_config = PhaseConfig {
13742 generate_master_data: true,
13743 generate_document_flows: true,
13744 generate_journal_entries: true,
13745 inject_anomalies: false,
13746 inject_data_quality: false,
13747 validate_balances: true,
13748 generate_ocpm_events: false,
13749 show_progress: false,
13750 vendors_per_company: 3,
13751 customers_per_company: 3,
13752 materials_per_company: 5,
13753 assets_per_company: 3,
13754 employees_per_company: 5,
13755 p2p_chains: 3,
13756 o2c_chains: 3,
13757 ..Default::default()
13758 };
13759
13760 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13761 let result = orchestrator.generate().unwrap();
13762
13763 assert!(!result.master_data.vendors.is_empty());
13765 assert!(!result.master_data.customers.is_empty());
13766 assert!(!result.document_flows.p2p_chains.is_empty());
13767 assert!(!result.document_flows.o2c_chains.is_empty());
13768 assert!(!result.journal_entries.is_empty());
13769 assert!(result.statistics.accounts_count > 0);
13770
13771 assert!(!result.subledger.ap_invoices.is_empty());
13773 assert!(!result.subledger.ar_invoices.is_empty());
13774
13775 assert!(result.balance_validation.validated);
13777 assert!(result.balance_validation.entries_processed > 0);
13778 }
13779
13780 #[test]
13781 fn test_subledger_linking() {
13782 let config = create_test_config();
13783 let phase_config = PhaseConfig {
13784 generate_master_data: true,
13785 generate_document_flows: true,
13786 generate_journal_entries: false,
13787 inject_anomalies: false,
13788 inject_data_quality: false,
13789 validate_balances: false,
13790 generate_ocpm_events: false,
13791 show_progress: false,
13792 vendors_per_company: 5,
13793 customers_per_company: 5,
13794 materials_per_company: 10,
13795 assets_per_company: 3,
13796 employees_per_company: 5,
13797 p2p_chains: 5,
13798 o2c_chains: 5,
13799 ..Default::default()
13800 };
13801
13802 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13803 let result = orchestrator.generate().unwrap();
13804
13805 assert!(!result.document_flows.vendor_invoices.is_empty());
13807 assert!(!result.document_flows.customer_invoices.is_empty());
13808
13809 assert!(!result.subledger.ap_invoices.is_empty());
13811 assert!(!result.subledger.ar_invoices.is_empty());
13812
13813 assert_eq!(
13815 result.subledger.ap_invoices.len(),
13816 result.document_flows.vendor_invoices.len()
13817 );
13818
13819 assert_eq!(
13821 result.subledger.ar_invoices.len(),
13822 result.document_flows.customer_invoices.len()
13823 );
13824
13825 assert_eq!(
13827 result.statistics.ap_invoice_count,
13828 result.subledger.ap_invoices.len()
13829 );
13830 assert_eq!(
13831 result.statistics.ar_invoice_count,
13832 result.subledger.ar_invoices.len()
13833 );
13834 }
13835
13836 #[test]
13837 fn test_balance_validation() {
13838 let config = create_test_config();
13839 let phase_config = PhaseConfig {
13840 generate_master_data: false,
13841 generate_document_flows: false,
13842 generate_journal_entries: true,
13843 inject_anomalies: false,
13844 validate_balances: true,
13845 show_progress: false,
13846 ..Default::default()
13847 };
13848
13849 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13850 let result = orchestrator.generate().unwrap();
13851
13852 assert!(result.balance_validation.validated);
13854 assert!(result.balance_validation.entries_processed > 0);
13855
13856 assert!(!result.balance_validation.has_unbalanced_entries);
13858
13859 assert_eq!(
13861 result.balance_validation.total_debits,
13862 result.balance_validation.total_credits
13863 );
13864 }
13865
13866 #[test]
13867 fn test_statistics_accuracy() {
13868 let config = create_test_config();
13869 let phase_config = PhaseConfig {
13870 generate_master_data: true,
13871 generate_document_flows: false,
13872 generate_journal_entries: true,
13873 inject_anomalies: false,
13874 show_progress: false,
13875 vendors_per_company: 10,
13876 customers_per_company: 20,
13877 materials_per_company: 15,
13878 assets_per_company: 5,
13879 employees_per_company: 8,
13880 ..Default::default()
13881 };
13882
13883 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13884 let result = orchestrator.generate().unwrap();
13885
13886 assert_eq!(
13888 result.statistics.vendor_count,
13889 result.master_data.vendors.len()
13890 );
13891 assert_eq!(
13892 result.statistics.customer_count,
13893 result.master_data.customers.len()
13894 );
13895 assert_eq!(
13896 result.statistics.material_count,
13897 result.master_data.materials.len()
13898 );
13899 assert_eq!(
13900 result.statistics.total_entries as usize,
13901 result.journal_entries.len()
13902 );
13903 }
13904
13905 #[test]
13906 fn test_phase_config_defaults() {
13907 let config = PhaseConfig::default();
13908 assert!(config.generate_master_data);
13909 assert!(config.generate_document_flows);
13910 assert!(config.generate_journal_entries);
13911 assert!(!config.inject_anomalies);
13912 assert!(config.validate_balances);
13913 assert!(config.show_progress);
13914 assert!(config.vendors_per_company > 0);
13915 assert!(config.customers_per_company > 0);
13916 }
13917
13918 #[test]
13919 fn test_get_coa_before_generation() {
13920 let config = create_test_config();
13921 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13922
13923 assert!(orchestrator.get_coa().is_none());
13925 }
13926
13927 #[test]
13928 fn test_get_coa_after_generation() {
13929 let config = create_test_config();
13930 let phase_config = PhaseConfig {
13931 generate_master_data: false,
13932 generate_document_flows: false,
13933 generate_journal_entries: true,
13934 inject_anomalies: false,
13935 show_progress: false,
13936 ..Default::default()
13937 };
13938
13939 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13940 let _ = orchestrator.generate().unwrap();
13941
13942 assert!(orchestrator.get_coa().is_some());
13944 }
13945
13946 #[test]
13947 fn test_get_master_data() {
13948 let config = create_test_config();
13949 let phase_config = PhaseConfig {
13950 generate_master_data: true,
13951 generate_document_flows: false,
13952 generate_journal_entries: false,
13953 inject_anomalies: false,
13954 show_progress: false,
13955 vendors_per_company: 5,
13956 customers_per_company: 5,
13957 materials_per_company: 5,
13958 assets_per_company: 5,
13959 employees_per_company: 5,
13960 ..Default::default()
13961 };
13962
13963 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13964 let result = orchestrator.generate().unwrap();
13965
13966 assert!(!result.master_data.vendors.is_empty());
13968 }
13969
13970 #[test]
13971 fn test_with_progress_builder() {
13972 let config = create_test_config();
13973 let orchestrator = EnhancedOrchestrator::with_defaults(config)
13974 .unwrap()
13975 .with_progress(false);
13976
13977 assert!(!orchestrator.phase_config.show_progress);
13979 }
13980
13981 #[test]
13982 fn test_multi_company_generation() {
13983 let mut config = create_test_config();
13984 config.companies.push(CompanyConfig {
13985 code: "2000".to_string(),
13986 name: "Subsidiary".to_string(),
13987 currency: "EUR".to_string(),
13988 functional_currency: None,
13989 country: "DE".to_string(),
13990 annual_transaction_volume: TransactionVolume::TenK,
13991 volume_weight: 0.5,
13992 fiscal_year_variant: "K4".to_string(),
13993 });
13994
13995 let phase_config = PhaseConfig {
13996 generate_master_data: true,
13997 generate_document_flows: false,
13998 generate_journal_entries: true,
13999 inject_anomalies: false,
14000 show_progress: false,
14001 vendors_per_company: 5,
14002 customers_per_company: 5,
14003 materials_per_company: 5,
14004 assets_per_company: 5,
14005 employees_per_company: 5,
14006 ..Default::default()
14007 };
14008
14009 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14010 let result = orchestrator.generate().unwrap();
14011
14012 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
14015 assert!(result.statistics.companies_count == 2);
14016 }
14017
14018 #[test]
14019 fn test_empty_master_data_skips_document_flows() {
14020 let config = create_test_config();
14021 let phase_config = PhaseConfig {
14022 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
14025 inject_anomalies: false,
14026 show_progress: false,
14027 ..Default::default()
14028 };
14029
14030 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14031 let result = orchestrator.generate().unwrap();
14032
14033 assert!(result.document_flows.p2p_chains.is_empty());
14035 assert!(result.document_flows.o2c_chains.is_empty());
14036 }
14037
14038 #[test]
14039 fn test_journal_entry_line_item_count() {
14040 let config = create_test_config();
14041 let phase_config = PhaseConfig {
14042 generate_master_data: false,
14043 generate_document_flows: false,
14044 generate_journal_entries: true,
14045 inject_anomalies: false,
14046 show_progress: false,
14047 ..Default::default()
14048 };
14049
14050 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14051 let result = orchestrator.generate().unwrap();
14052
14053 let calculated_line_items: u64 = result
14055 .journal_entries
14056 .iter()
14057 .map(|e| e.line_count() as u64)
14058 .sum();
14059 assert_eq!(result.statistics.total_line_items, calculated_line_items);
14060 }
14061
14062 #[test]
14063 fn test_audit_generation() {
14064 let config = create_test_config();
14065 let phase_config = PhaseConfig {
14066 generate_master_data: false,
14067 generate_document_flows: false,
14068 generate_journal_entries: true,
14069 inject_anomalies: false,
14070 show_progress: false,
14071 generate_audit: true,
14072 audit_engagements: 2,
14073 workpapers_per_engagement: 5,
14074 evidence_per_workpaper: 2,
14075 risks_per_engagement: 3,
14076 findings_per_engagement: 2,
14077 judgments_per_engagement: 2,
14078 ..Default::default()
14079 };
14080
14081 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14082 let result = orchestrator.generate().unwrap();
14083
14084 assert_eq!(result.audit.engagements.len(), 2);
14086 assert!(!result.audit.workpapers.is_empty());
14087 assert!(!result.audit.evidence.is_empty());
14088 assert!(!result.audit.risk_assessments.is_empty());
14089 assert!(!result.audit.findings.is_empty());
14090 assert!(!result.audit.judgments.is_empty());
14091
14092 assert!(
14094 !result.audit.confirmations.is_empty(),
14095 "ISA 505 confirmations should be generated"
14096 );
14097 assert!(
14098 !result.audit.confirmation_responses.is_empty(),
14099 "ISA 505 confirmation responses should be generated"
14100 );
14101 assert!(
14102 !result.audit.procedure_steps.is_empty(),
14103 "ISA 330 procedure steps should be generated"
14104 );
14105 assert!(
14107 !result.audit.analytical_results.is_empty(),
14108 "ISA 520 analytical procedures should be generated"
14109 );
14110 assert!(
14111 !result.audit.ia_functions.is_empty(),
14112 "ISA 610 IA functions should be generated (one per engagement)"
14113 );
14114 assert!(
14115 !result.audit.related_parties.is_empty(),
14116 "ISA 550 related parties should be generated"
14117 );
14118
14119 assert_eq!(
14121 result.statistics.audit_engagement_count,
14122 result.audit.engagements.len()
14123 );
14124 assert_eq!(
14125 result.statistics.audit_workpaper_count,
14126 result.audit.workpapers.len()
14127 );
14128 assert_eq!(
14129 result.statistics.audit_evidence_count,
14130 result.audit.evidence.len()
14131 );
14132 assert_eq!(
14133 result.statistics.audit_risk_count,
14134 result.audit.risk_assessments.len()
14135 );
14136 assert_eq!(
14137 result.statistics.audit_finding_count,
14138 result.audit.findings.len()
14139 );
14140 assert_eq!(
14141 result.statistics.audit_judgment_count,
14142 result.audit.judgments.len()
14143 );
14144 assert_eq!(
14145 result.statistics.audit_confirmation_count,
14146 result.audit.confirmations.len()
14147 );
14148 assert_eq!(
14149 result.statistics.audit_confirmation_response_count,
14150 result.audit.confirmation_responses.len()
14151 );
14152 assert_eq!(
14153 result.statistics.audit_procedure_step_count,
14154 result.audit.procedure_steps.len()
14155 );
14156 assert_eq!(
14157 result.statistics.audit_sample_count,
14158 result.audit.samples.len()
14159 );
14160 assert_eq!(
14161 result.statistics.audit_analytical_result_count,
14162 result.audit.analytical_results.len()
14163 );
14164 assert_eq!(
14165 result.statistics.audit_ia_function_count,
14166 result.audit.ia_functions.len()
14167 );
14168 assert_eq!(
14169 result.statistics.audit_ia_report_count,
14170 result.audit.ia_reports.len()
14171 );
14172 assert_eq!(
14173 result.statistics.audit_related_party_count,
14174 result.audit.related_parties.len()
14175 );
14176 assert_eq!(
14177 result.statistics.audit_related_party_transaction_count,
14178 result.audit.related_party_transactions.len()
14179 );
14180 }
14181
14182 #[test]
14183 fn test_new_phases_disabled_by_default() {
14184 let config = create_test_config();
14185 assert!(!config.llm.enabled);
14187 assert!(!config.diffusion.enabled);
14188 assert!(!config.causal.enabled);
14189
14190 let phase_config = PhaseConfig {
14191 generate_master_data: false,
14192 generate_document_flows: false,
14193 generate_journal_entries: true,
14194 inject_anomalies: false,
14195 show_progress: false,
14196 ..Default::default()
14197 };
14198
14199 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14200 let result = orchestrator.generate().unwrap();
14201
14202 assert_eq!(result.statistics.llm_enrichment_ms, 0);
14204 assert_eq!(result.statistics.llm_vendors_enriched, 0);
14205 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14206 assert_eq!(result.statistics.diffusion_samples_generated, 0);
14207 assert_eq!(result.statistics.causal_generation_ms, 0);
14208 assert_eq!(result.statistics.causal_samples_generated, 0);
14209 assert!(result.statistics.causal_validation_passed.is_none());
14210 assert_eq!(result.statistics.counterfactual_pair_count, 0);
14211 assert!(result.counterfactual_pairs.is_empty());
14212 }
14213
14214 #[test]
14215 fn test_counterfactual_generation_enabled() {
14216 let config = create_test_config();
14217 let phase_config = PhaseConfig {
14218 generate_master_data: false,
14219 generate_document_flows: false,
14220 generate_journal_entries: true,
14221 inject_anomalies: false,
14222 show_progress: false,
14223 generate_counterfactuals: true,
14224 generate_period_close: false, ..Default::default()
14226 };
14227
14228 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14229 let result = orchestrator.generate().unwrap();
14230
14231 if !result.journal_entries.is_empty() {
14233 assert_eq!(
14234 result.counterfactual_pairs.len(),
14235 result.journal_entries.len()
14236 );
14237 assert_eq!(
14238 result.statistics.counterfactual_pair_count,
14239 result.journal_entries.len()
14240 );
14241 let ids: std::collections::HashSet<_> = result
14243 .counterfactual_pairs
14244 .iter()
14245 .map(|p| p.pair_id.clone())
14246 .collect();
14247 assert_eq!(ids.len(), result.counterfactual_pairs.len());
14248 }
14249 }
14250
14251 #[test]
14252 fn test_llm_enrichment_enabled() {
14253 let mut config = create_test_config();
14254 config.llm.enabled = true;
14255 config.llm.max_vendor_enrichments = 3;
14256
14257 let phase_config = PhaseConfig {
14258 generate_master_data: true,
14259 generate_document_flows: false,
14260 generate_journal_entries: false,
14261 inject_anomalies: false,
14262 show_progress: false,
14263 vendors_per_company: 5,
14264 customers_per_company: 3,
14265 materials_per_company: 3,
14266 assets_per_company: 3,
14267 employees_per_company: 3,
14268 ..Default::default()
14269 };
14270
14271 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14272 let result = orchestrator.generate().unwrap();
14273
14274 assert!(result.statistics.llm_vendors_enriched > 0);
14276 assert!(result.statistics.llm_vendors_enriched <= 3);
14277 }
14278
14279 #[test]
14280 fn test_diffusion_enhancement_enabled() {
14281 let mut config = create_test_config();
14282 config.diffusion.enabled = true;
14283 config.diffusion.n_steps = 50;
14284 config.diffusion.sample_size = 20;
14285
14286 let phase_config = PhaseConfig {
14287 generate_master_data: false,
14288 generate_document_flows: false,
14289 generate_journal_entries: true,
14290 inject_anomalies: false,
14291 show_progress: false,
14292 ..Default::default()
14293 };
14294
14295 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14296 let result = orchestrator.generate().unwrap();
14297
14298 assert_eq!(result.statistics.diffusion_samples_generated, 20);
14300 }
14301
14302 #[test]
14303 fn test_causal_overlay_enabled() {
14304 let mut config = create_test_config();
14305 config.causal.enabled = true;
14306 config.causal.template = "fraud_detection".to_string();
14307 config.causal.sample_size = 100;
14308 config.causal.validate = true;
14309
14310 let phase_config = PhaseConfig {
14311 generate_master_data: false,
14312 generate_document_flows: false,
14313 generate_journal_entries: true,
14314 inject_anomalies: false,
14315 show_progress: false,
14316 ..Default::default()
14317 };
14318
14319 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14320 let result = orchestrator.generate().unwrap();
14321
14322 assert_eq!(result.statistics.causal_samples_generated, 100);
14324 assert!(result.statistics.causal_validation_passed.is_some());
14326 }
14327
14328 #[test]
14329 fn test_causal_overlay_revenue_cycle_template() {
14330 let mut config = create_test_config();
14331 config.causal.enabled = true;
14332 config.causal.template = "revenue_cycle".to_string();
14333 config.causal.sample_size = 50;
14334 config.causal.validate = false;
14335
14336 let phase_config = PhaseConfig {
14337 generate_master_data: false,
14338 generate_document_flows: false,
14339 generate_journal_entries: true,
14340 inject_anomalies: false,
14341 show_progress: false,
14342 ..Default::default()
14343 };
14344
14345 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14346 let result = orchestrator.generate().unwrap();
14347
14348 assert_eq!(result.statistics.causal_samples_generated, 50);
14350 assert!(result.statistics.causal_validation_passed.is_none());
14352 }
14353
14354 #[test]
14355 fn test_all_new_phases_enabled_together() {
14356 let mut config = create_test_config();
14357 config.llm.enabled = true;
14358 config.llm.max_vendor_enrichments = 2;
14359 config.diffusion.enabled = true;
14360 config.diffusion.n_steps = 20;
14361 config.diffusion.sample_size = 10;
14362 config.causal.enabled = true;
14363 config.causal.sample_size = 50;
14364 config.causal.validate = true;
14365
14366 let phase_config = PhaseConfig {
14367 generate_master_data: true,
14368 generate_document_flows: false,
14369 generate_journal_entries: true,
14370 inject_anomalies: false,
14371 show_progress: false,
14372 vendors_per_company: 5,
14373 customers_per_company: 3,
14374 materials_per_company: 3,
14375 assets_per_company: 3,
14376 employees_per_company: 3,
14377 ..Default::default()
14378 };
14379
14380 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14381 let result = orchestrator.generate().unwrap();
14382
14383 assert!(result.statistics.llm_vendors_enriched > 0);
14385 assert_eq!(result.statistics.diffusion_samples_generated, 10);
14386 assert_eq!(result.statistics.causal_samples_generated, 50);
14387 assert!(result.statistics.causal_validation_passed.is_some());
14388 }
14389
14390 #[test]
14391 fn test_statistics_serialization_with_new_fields() {
14392 let stats = EnhancedGenerationStatistics {
14393 total_entries: 100,
14394 total_line_items: 500,
14395 llm_enrichment_ms: 42,
14396 llm_vendors_enriched: 10,
14397 diffusion_enhancement_ms: 100,
14398 diffusion_samples_generated: 50,
14399 causal_generation_ms: 200,
14400 causal_samples_generated: 100,
14401 causal_validation_passed: Some(true),
14402 ..Default::default()
14403 };
14404
14405 let json = serde_json::to_string(&stats).unwrap();
14406 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14407
14408 assert_eq!(deserialized.llm_enrichment_ms, 42);
14409 assert_eq!(deserialized.llm_vendors_enriched, 10);
14410 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14411 assert_eq!(deserialized.diffusion_samples_generated, 50);
14412 assert_eq!(deserialized.causal_generation_ms, 200);
14413 assert_eq!(deserialized.causal_samples_generated, 100);
14414 assert_eq!(deserialized.causal_validation_passed, Some(true));
14415 }
14416
14417 #[test]
14418 fn test_statistics_backward_compat_deserialization() {
14419 let old_json = r#"{
14421 "total_entries": 100,
14422 "total_line_items": 500,
14423 "accounts_count": 50,
14424 "companies_count": 1,
14425 "period_months": 12,
14426 "vendor_count": 10,
14427 "customer_count": 20,
14428 "material_count": 15,
14429 "asset_count": 5,
14430 "employee_count": 8,
14431 "p2p_chain_count": 5,
14432 "o2c_chain_count": 5,
14433 "ap_invoice_count": 5,
14434 "ar_invoice_count": 5,
14435 "ocpm_event_count": 0,
14436 "ocpm_object_count": 0,
14437 "ocpm_case_count": 0,
14438 "audit_engagement_count": 0,
14439 "audit_workpaper_count": 0,
14440 "audit_evidence_count": 0,
14441 "audit_risk_count": 0,
14442 "audit_finding_count": 0,
14443 "audit_judgment_count": 0,
14444 "anomalies_injected": 0,
14445 "data_quality_issues": 0,
14446 "banking_customer_count": 0,
14447 "banking_account_count": 0,
14448 "banking_transaction_count": 0,
14449 "banking_suspicious_count": 0,
14450 "graph_export_count": 0,
14451 "graph_node_count": 0,
14452 "graph_edge_count": 0
14453 }"#;
14454
14455 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14456
14457 assert_eq!(stats.llm_enrichment_ms, 0);
14459 assert_eq!(stats.llm_vendors_enriched, 0);
14460 assert_eq!(stats.diffusion_enhancement_ms, 0);
14461 assert_eq!(stats.diffusion_samples_generated, 0);
14462 assert_eq!(stats.causal_generation_ms, 0);
14463 assert_eq!(stats.causal_samples_generated, 0);
14464 assert!(stats.causal_validation_passed.is_none());
14465 }
14466}