1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use rayon::prelude::*;
178
179fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
191 #[allow(clippy::field_reassign_with_default)]
192 {
193 let mut s = DataQualityStats::default();
194 s.total_records = n_entries;
195 s.missing_values.total_records = n_entries;
196 s.format_variations.total_processed = n_entries;
197 s.duplicates.total_processed = n_entries;
198 s
199 }
200}
201
202fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
203 let payment_behavior = &schema_config.payment_behavior;
204 let late_dist = &payment_behavior.late_payment_days_distribution;
205
206 P2PGeneratorConfig {
207 three_way_match_rate: schema_config.three_way_match_rate,
208 partial_delivery_rate: schema_config.partial_delivery_rate,
209 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
210 price_variance_rate: schema_config.price_variance_rate,
211 max_price_variance_percent: schema_config.max_price_variance_percent,
212 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
213 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
214 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
215 payment_method_distribution: vec![
216 (PaymentMethod::BankTransfer, 0.60),
217 (PaymentMethod::Check, 0.25),
218 (PaymentMethod::Wire, 0.10),
219 (PaymentMethod::CreditCard, 0.05),
220 ],
221 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
222 payment_behavior: P2PPaymentBehavior {
223 late_payment_rate: payment_behavior.late_payment_rate,
224 late_payment_distribution: LatePaymentDistribution {
225 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
226 late_8_to_14: late_dist.late_8_to_14,
227 very_late_15_to_30: late_dist.very_late_15_to_30,
228 severely_late_31_to_60: late_dist.severely_late_31_to_60,
229 extremely_late_over_60: late_dist.extremely_late_over_60,
230 },
231 partial_payment_rate: payment_behavior.partial_payment_rate,
232 payment_correction_rate: payment_behavior.payment_correction_rate,
233 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
234 },
235 }
236}
237
238fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
240 let payment_behavior = &schema_config.payment_behavior;
241
242 O2CGeneratorConfig {
243 credit_check_failure_rate: schema_config.credit_check_failure_rate,
244 partial_shipment_rate: schema_config.partial_shipment_rate,
245 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
246 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
247 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
248 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
249 bad_debt_rate: schema_config.bad_debt_rate,
250 returns_rate: schema_config.return_rate,
251 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
252 payment_method_distribution: vec![
253 (PaymentMethod::BankTransfer, 0.50),
254 (PaymentMethod::Check, 0.30),
255 (PaymentMethod::Wire, 0.15),
256 (PaymentMethod::CreditCard, 0.05),
257 ],
258 payment_behavior: O2CPaymentBehavior {
259 partial_payment_rate: payment_behavior.partial_payments.rate,
260 short_payment_rate: payment_behavior.short_payments.rate,
261 max_short_percent: payment_behavior.short_payments.max_short_percent,
262 on_account_rate: payment_behavior.on_account_payments.rate,
263 payment_correction_rate: payment_behavior.payment_corrections.rate,
264 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
265 },
266 }
267}
268
269#[derive(Debug, Clone)]
271pub struct PhaseConfig {
272 pub generate_master_data: bool,
274 pub generate_document_flows: bool,
276 pub generate_ocpm_events: bool,
278 pub generate_journal_entries: bool,
280 pub inject_anomalies: bool,
282 pub inject_data_quality: bool,
284 pub validate_balances: bool,
286 pub show_progress: bool,
288 pub vendors_per_company: usize,
290 pub customers_per_company: usize,
292 pub materials_per_company: usize,
294 pub assets_per_company: usize,
296 pub employees_per_company: usize,
298 pub p2p_chains: usize,
300 pub o2c_chains: usize,
302 pub generate_audit: bool,
304 pub audit_engagements: usize,
306 pub workpapers_per_engagement: usize,
308 pub evidence_per_workpaper: usize,
310 pub risks_per_engagement: usize,
312 pub findings_per_engagement: usize,
314 pub judgments_per_engagement: usize,
316 pub generate_banking: bool,
318 pub generate_graph_export: bool,
320 pub generate_sourcing: bool,
322 pub generate_bank_reconciliation: bool,
324 pub generate_financial_statements: bool,
326 pub generate_accounting_standards: bool,
328 pub generate_manufacturing: bool,
330 pub generate_sales_kpi_budgets: bool,
332 pub generate_tax: bool,
334 pub generate_esg: bool,
336 pub generate_intercompany: bool,
338 pub generate_evolution_events: bool,
340 pub generate_counterfactuals: bool,
342 pub generate_compliance_regulations: bool,
344 pub generate_period_close: bool,
346 pub generate_hr: bool,
348 pub generate_treasury: bool,
350 pub generate_project_accounting: bool,
352 pub generate_legal_documents: bool,
356 pub generate_it_controls: bool,
360 pub generate_analytics_metadata: bool,
365}
366
367impl Default for PhaseConfig {
368 fn default() -> Self {
369 Self {
370 generate_master_data: true,
371 generate_document_flows: true,
372 generate_ocpm_events: false, generate_journal_entries: true,
374 inject_anomalies: false,
375 inject_data_quality: false, validate_balances: true,
377 show_progress: true,
378 vendors_per_company: 50,
379 customers_per_company: 100,
380 materials_per_company: 200,
381 assets_per_company: 50,
382 employees_per_company: 100,
383 p2p_chains: 100,
384 o2c_chains: 100,
385 generate_audit: false, audit_engagements: 5,
387 workpapers_per_engagement: 20,
388 evidence_per_workpaper: 5,
389 risks_per_engagement: 15,
390 findings_per_engagement: 8,
391 judgments_per_engagement: 10,
392 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
414 }
415}
416
417impl PhaseConfig {
418 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
423 Self {
424 generate_master_data: true,
426 generate_document_flows: true,
427 generate_journal_entries: true,
428 validate_balances: true,
429 generate_period_close: true,
430 generate_evolution_events: true,
431 show_progress: true,
432
433 generate_audit: cfg.audit.enabled,
435 generate_banking: cfg.banking.enabled,
436 generate_graph_export: cfg.graph_export.enabled,
437 generate_sourcing: cfg.source_to_pay.enabled,
438 generate_intercompany: cfg.intercompany.enabled,
439 generate_financial_statements: cfg.financial_reporting.enabled,
440 generate_bank_reconciliation: cfg.financial_reporting.enabled,
441 generate_accounting_standards: cfg.accounting_standards.enabled,
442 generate_manufacturing: cfg.manufacturing.enabled,
443 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
444 generate_tax: cfg.tax.enabled,
445 generate_esg: cfg.esg.enabled,
446 generate_ocpm_events: cfg.ocpm.enabled,
447 generate_compliance_regulations: cfg.compliance_regulations.enabled,
448 generate_hr: cfg.hr.enabled,
449 generate_treasury: cfg.treasury.enabled,
450 generate_project_accounting: cfg.project_accounting.enabled,
451
452 generate_legal_documents: cfg.compliance_regulations.enabled
456 && cfg.compliance_regulations.legal_documents.enabled,
457 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
460 generate_analytics_metadata: cfg.analytics_metadata.enabled,
463
464 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
466
467 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
468 inject_data_quality: cfg.data_quality.enabled,
469
470 vendors_per_company: 50,
472 customers_per_company: 100,
473 materials_per_company: 200,
474 assets_per_company: 50,
475 employees_per_company: 100,
476 p2p_chains: 100,
477 o2c_chains: 100,
478 audit_engagements: 5,
479 workpapers_per_engagement: 20,
480 evidence_per_workpaper: 5,
481 risks_per_engagement: 15,
482 findings_per_engagement: 8,
483 judgments_per_engagement: 10,
484 }
485 }
486}
487
488#[derive(Debug, Clone, Default)]
490pub struct MasterDataSnapshot {
491 pub vendors: Vec<Vendor>,
493 pub customers: Vec<Customer>,
495 pub materials: Vec<Material>,
497 pub assets: Vec<FixedAsset>,
499 pub employees: Vec<Employee>,
501 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
503 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
505 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
509}
510
511#[derive(Debug, Clone)]
513pub struct HypergraphExportInfo {
514 pub node_count: usize,
516 pub edge_count: usize,
518 pub hyperedge_count: usize,
520 pub output_path: PathBuf,
522}
523
524#[derive(Debug, Clone, Default)]
526pub struct DocumentFlowSnapshot {
527 pub p2p_chains: Vec<P2PDocumentChain>,
529 pub o2c_chains: Vec<O2CDocumentChain>,
531 pub purchase_orders: Vec<documents::PurchaseOrder>,
533 pub goods_receipts: Vec<documents::GoodsReceipt>,
535 pub vendor_invoices: Vec<documents::VendorInvoice>,
537 pub sales_orders: Vec<documents::SalesOrder>,
539 pub deliveries: Vec<documents::Delivery>,
541 pub customer_invoices: Vec<documents::CustomerInvoice>,
543 pub payments: Vec<documents::Payment>,
545 pub document_references: Vec<documents::DocumentReference>,
548}
549
550#[derive(Debug, Clone, Default)]
552pub struct SubledgerSnapshot {
553 pub ap_invoices: Vec<APInvoice>,
555 pub ar_invoices: Vec<ARInvoice>,
557 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
559 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
561 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
563 pub ar_aging_reports: Vec<ARAgingReport>,
565 pub ap_aging_reports: Vec<APAgingReport>,
567 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
569 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
571 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
573 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
575}
576
577#[derive(Debug, Clone, Default)]
579pub struct OcpmSnapshot {
580 pub event_log: Option<OcpmEventLog>,
582 pub event_count: usize,
584 pub object_count: usize,
586 pub case_count: usize,
588}
589
590#[derive(Debug, Clone, Default)]
592pub struct AuditSnapshot {
593 pub engagements: Vec<AuditEngagement>,
595 pub workpapers: Vec<Workpaper>,
597 pub evidence: Vec<AuditEvidence>,
599 pub risk_assessments: Vec<RiskAssessment>,
601 pub findings: Vec<AuditFinding>,
603 pub judgments: Vec<ProfessionalJudgment>,
605 pub confirmations: Vec<ExternalConfirmation>,
607 pub confirmation_responses: Vec<ConfirmationResponse>,
609 pub procedure_steps: Vec<AuditProcedureStep>,
611 pub samples: Vec<AuditSample>,
613 pub analytical_results: Vec<AnalyticalProcedureResult>,
615 pub ia_functions: Vec<InternalAuditFunction>,
617 pub ia_reports: Vec<InternalAuditReport>,
619 pub related_parties: Vec<RelatedParty>,
621 pub related_party_transactions: Vec<RelatedPartyTransaction>,
623 pub component_auditors: Vec<ComponentAuditor>,
626 pub group_audit_plan: Option<GroupAuditPlan>,
628 pub component_instructions: Vec<ComponentInstruction>,
630 pub component_reports: Vec<ComponentAuditorReport>,
632 pub engagement_letters: Vec<EngagementLetter>,
635 pub subsequent_events: Vec<SubsequentEvent>,
638 pub service_organizations: Vec<ServiceOrganization>,
641 pub soc_reports: Vec<SocReport>,
643 pub user_entity_controls: Vec<UserEntityControl>,
645 pub going_concern_assessments:
648 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
649 pub accounting_estimates:
652 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
653 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
656 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
658 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
661 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
663 pub materiality_calculations:
666 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
667 pub combined_risk_assessments:
670 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
671 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
674 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
676 pub significant_transaction_classes:
679 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
680 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
683 pub analytical_relationships:
686 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
687 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
690 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
693 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
696 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
701 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
707 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
711 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
714}
715
716#[derive(Debug, Clone, Default)]
718pub struct BankingSnapshot {
719 pub customers: Vec<BankingCustomer>,
721 pub accounts: Vec<BankAccount>,
723 pub transactions: Vec<BankTransaction>,
725 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
727 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
729 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
731 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
733 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
735 pub suspicious_count: usize,
737 pub scenario_count: usize,
739}
740
741#[derive(Debug, Clone, Default, Serialize)]
743pub struct GraphExportSnapshot {
744 pub exported: bool,
746 pub graph_count: usize,
748 pub exports: HashMap<String, GraphExportInfo>,
750}
751
752#[derive(Debug, Clone, Serialize)]
754pub struct GraphExportInfo {
755 pub name: String,
757 pub format: String,
759 pub output_path: PathBuf,
761 pub node_count: usize,
763 pub edge_count: usize,
765}
766
767#[derive(Debug, Clone, Default)]
769pub struct SourcingSnapshot {
770 pub spend_analyses: Vec<SpendAnalysis>,
772 pub sourcing_projects: Vec<SourcingProject>,
774 pub qualifications: Vec<SupplierQualification>,
776 pub rfx_events: Vec<RfxEvent>,
778 pub bids: Vec<SupplierBid>,
780 pub bid_evaluations: Vec<BidEvaluation>,
782 pub contracts: Vec<ProcurementContract>,
784 pub catalog_items: Vec<CatalogItem>,
786 pub scorecards: Vec<SupplierScorecard>,
788}
789
790#[derive(Debug, Clone, Serialize, Deserialize)]
792pub struct PeriodTrialBalance {
793 pub fiscal_year: u16,
795 pub fiscal_period: u8,
797 pub period_start: NaiveDate,
799 pub period_end: NaiveDate,
801 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
803}
804
805#[derive(Debug, Clone, Default)]
807pub struct FinancialReportingSnapshot {
808 pub financial_statements: Vec<FinancialStatement>,
811 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
814 pub consolidated_statements: Vec<FinancialStatement>,
816 pub consolidation_schedules: Vec<ConsolidationSchedule>,
818 pub bank_reconciliations: Vec<BankReconciliation>,
820 pub trial_balances: Vec<PeriodTrialBalance>,
822 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
824 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
826 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
828}
829
830#[derive(Debug, Clone, Default)]
832pub struct HrSnapshot {
833 pub payroll_runs: Vec<PayrollRun>,
835 pub payroll_line_items: Vec<PayrollLineItem>,
837 pub time_entries: Vec<TimeEntry>,
839 pub expense_reports: Vec<ExpenseReport>,
841 pub benefit_enrollments: Vec<BenefitEnrollment>,
843 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
845 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
847 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
849 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
851 pub pension_journal_entries: Vec<JournalEntry>,
853 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
855 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
857 pub stock_comp_journal_entries: Vec<JournalEntry>,
859 pub payroll_run_count: usize,
861 pub payroll_line_item_count: usize,
863 pub time_entry_count: usize,
865 pub expense_report_count: usize,
867 pub benefit_enrollment_count: usize,
869 pub pension_plan_count: usize,
871 pub stock_grant_count: usize,
873}
874
875#[derive(Debug, Clone, Default)]
877pub struct AccountingStandardsSnapshot {
878 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
880 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
882 pub business_combinations:
884 Vec<datasynth_core::models::business_combination::BusinessCombination>,
885 pub business_combination_journal_entries: Vec<JournalEntry>,
887 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
889 pub ecl_provision_movements:
891 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
892 pub ecl_journal_entries: Vec<JournalEntry>,
894 pub provisions: Vec<datasynth_core::models::provision::Provision>,
896 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
898 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
900 pub provision_journal_entries: Vec<JournalEntry>,
902 pub currency_translation_results:
904 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
905 pub revenue_contract_count: usize,
907 pub impairment_test_count: usize,
909 pub business_combination_count: usize,
911 pub ecl_model_count: usize,
913 pub provision_count: usize,
915 pub currency_translation_count: usize,
917 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
921 pub fair_value_measurements:
923 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
924 pub framework_differences:
926 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
927 pub framework_reconciliations:
929 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
930 pub lease_count: usize,
932 pub fair_value_measurement_count: usize,
933 pub framework_difference_count: usize,
934}
935
936#[derive(Debug, Clone, Default)]
938pub struct ComplianceRegulationsSnapshot {
939 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
941 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
943 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
945 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
947 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
949 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
951 pub compliance_graph: Option<datasynth_graph::Graph>,
953}
954
955#[derive(Debug, Clone, Default)]
957pub struct ManufacturingSnapshot {
958 pub production_orders: Vec<ProductionOrder>,
960 pub quality_inspections: Vec<QualityInspection>,
962 pub cycle_counts: Vec<CycleCount>,
964 pub bom_components: Vec<BomComponent>,
966 pub inventory_movements: Vec<InventoryMovement>,
968 pub production_order_count: usize,
970 pub quality_inspection_count: usize,
972 pub cycle_count_count: usize,
974 pub bom_component_count: usize,
976 pub inventory_movement_count: usize,
978}
979
980#[derive(Debug, Clone, Default)]
982pub struct SalesKpiBudgetsSnapshot {
983 pub sales_quotes: Vec<SalesQuote>,
985 pub kpis: Vec<ManagementKpi>,
987 pub budgets: Vec<Budget>,
989 pub sales_quote_count: usize,
991 pub kpi_count: usize,
993 pub budget_line_count: usize,
995}
996
997#[derive(Debug, Clone, Default)]
999pub struct AnomalyLabels {
1000 pub labels: Vec<LabeledAnomaly>,
1002 pub summary: Option<AnomalySummary>,
1004 pub by_type: HashMap<String, usize>,
1006}
1007
1008#[derive(Debug, Clone, Default)]
1010pub struct BalanceValidationResult {
1011 pub validated: bool,
1013 pub is_balanced: bool,
1015 pub entries_processed: u64,
1017 pub total_debits: rust_decimal::Decimal,
1019 pub total_credits: rust_decimal::Decimal,
1021 pub accounts_tracked: usize,
1023 pub companies_tracked: usize,
1025 pub validation_errors: Vec<ValidationError>,
1027 pub has_unbalanced_entries: bool,
1029}
1030
1031#[derive(Debug, Clone, Default)]
1033pub struct TaxSnapshot {
1034 pub jurisdictions: Vec<TaxJurisdiction>,
1036 pub codes: Vec<TaxCode>,
1038 pub tax_lines: Vec<TaxLine>,
1040 pub tax_returns: Vec<TaxReturn>,
1042 pub tax_provisions: Vec<TaxProvision>,
1044 pub withholding_records: Vec<WithholdingTaxRecord>,
1046 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1048 pub jurisdiction_count: usize,
1050 pub code_count: usize,
1052 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1054 pub tax_posting_journal_entries: Vec<JournalEntry>,
1056}
1057
1058#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1060pub struct IntercompanySnapshot {
1061 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1063 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1065 pub seller_journal_entries: Vec<JournalEntry>,
1067 pub buyer_journal_entries: Vec<JournalEntry>,
1069 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1071 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1073 #[serde(skip)]
1075 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1076 pub matched_pair_count: usize,
1078 pub elimination_entry_count: usize,
1080 pub match_rate: f64,
1082}
1083
1084#[derive(Debug, Clone, Default)]
1086pub struct EsgSnapshot {
1087 pub emissions: Vec<EmissionRecord>,
1089 pub energy: Vec<EnergyConsumption>,
1091 pub water: Vec<WaterUsage>,
1093 pub waste: Vec<WasteRecord>,
1095 pub diversity: Vec<WorkforceDiversityMetric>,
1097 pub pay_equity: Vec<PayEquityMetric>,
1099 pub safety_incidents: Vec<SafetyIncident>,
1101 pub safety_metrics: Vec<SafetyMetric>,
1103 pub governance: Vec<GovernanceMetric>,
1105 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1107 pub materiality: Vec<MaterialityAssessment>,
1109 pub disclosures: Vec<EsgDisclosure>,
1111 pub climate_scenarios: Vec<ClimateScenario>,
1113 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1115 pub emission_count: usize,
1117 pub disclosure_count: usize,
1119}
1120
1121#[derive(Debug, Clone, Default)]
1123pub struct TreasurySnapshot {
1124 pub cash_positions: Vec<CashPosition>,
1126 pub cash_forecasts: Vec<CashForecast>,
1128 pub cash_pools: Vec<CashPool>,
1130 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1132 pub hedging_instruments: Vec<HedgingInstrument>,
1134 pub hedge_relationships: Vec<HedgeRelationship>,
1136 pub debt_instruments: Vec<DebtInstrument>,
1138 pub bank_guarantees: Vec<BankGuarantee>,
1140 pub netting_runs: Vec<NettingRun>,
1142 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1144 pub journal_entries: Vec<JournalEntry>,
1147}
1148
1149#[derive(Debug, Clone, Default)]
1151pub struct ProjectAccountingSnapshot {
1152 pub projects: Vec<Project>,
1154 pub cost_lines: Vec<ProjectCostLine>,
1156 pub revenue_records: Vec<ProjectRevenue>,
1158 pub earned_value_metrics: Vec<EarnedValueMetric>,
1160 pub change_orders: Vec<ChangeOrder>,
1162 pub milestones: Vec<ProjectMilestone>,
1164}
1165
1166#[derive(Debug, Default)]
1168pub struct EnhancedGenerationResult {
1169 pub chart_of_accounts: ChartOfAccounts,
1171 pub master_data: MasterDataSnapshot,
1173 pub document_flows: DocumentFlowSnapshot,
1175 pub subledger: SubledgerSnapshot,
1177 pub ocpm: OcpmSnapshot,
1179 pub audit: AuditSnapshot,
1181 pub banking: BankingSnapshot,
1183 pub graph_export: GraphExportSnapshot,
1185 pub sourcing: SourcingSnapshot,
1187 pub financial_reporting: FinancialReportingSnapshot,
1189 pub hr: HrSnapshot,
1191 pub accounting_standards: AccountingStandardsSnapshot,
1193 pub manufacturing: ManufacturingSnapshot,
1195 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1197 pub tax: TaxSnapshot,
1199 pub esg: EsgSnapshot,
1201 pub treasury: TreasurySnapshot,
1203 pub project_accounting: ProjectAccountingSnapshot,
1205 pub process_evolution: Vec<ProcessEvolutionEvent>,
1207 pub organizational_events: Vec<OrganizationalEvent>,
1209 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1211 pub intercompany: IntercompanySnapshot,
1213 pub journal_entries: Vec<JournalEntry>,
1215 pub anomaly_labels: AnomalyLabels,
1217 pub balance_validation: BalanceValidationResult,
1219 pub data_quality_stats: DataQualityStats,
1221 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1223 pub statistics: EnhancedGenerationStatistics,
1225 pub lineage: Option<super::lineage::LineageGraph>,
1227 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1229 pub internal_controls: Vec<InternalControl>,
1231 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1235 pub opening_balances: Vec<GeneratedOpeningBalance>,
1237 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1239 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1241 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1243 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1245 pub temporal_vendor_chains:
1247 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1248 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1250 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1252 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1254 pub compliance_regulations: ComplianceRegulationsSnapshot,
1256 pub analytics_metadata: AnalyticsMetadataSnapshot,
1260 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1264 pub interconnectivity: InterconnectivitySnapshot,
1270}
1271
1272#[derive(Debug, Clone, Default)]
1278pub struct InterconnectivitySnapshot {
1279 pub vendor_tiers: Vec<(String, u8)>,
1282 pub vendor_clusters: Vec<(String, String)>,
1286 pub customer_value_segments: Vec<(String, String)>,
1289 pub customer_lifecycle_stages: Vec<(String, String)>,
1293 pub industry_metadata: Vec<String>,
1296}
1297
1298#[derive(Debug, Clone, Default)]
1300pub struct AnalyticsMetadataSnapshot {
1301 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1303 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1305 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1307 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1309}
1310
1311#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1313pub struct EnhancedGenerationStatistics {
1314 pub total_entries: u64,
1316 pub total_line_items: u64,
1318 pub accounts_count: usize,
1320 pub companies_count: usize,
1322 pub period_months: u32,
1324 pub vendor_count: usize,
1326 pub customer_count: usize,
1327 pub material_count: usize,
1328 pub asset_count: usize,
1329 pub employee_count: usize,
1330 pub p2p_chain_count: usize,
1332 pub o2c_chain_count: usize,
1333 pub ap_invoice_count: usize,
1335 pub ar_invoice_count: usize,
1336 pub ocpm_event_count: usize,
1338 pub ocpm_object_count: usize,
1339 pub ocpm_case_count: usize,
1340 pub audit_engagement_count: usize,
1342 pub audit_workpaper_count: usize,
1343 pub audit_evidence_count: usize,
1344 pub audit_risk_count: usize,
1345 pub audit_finding_count: usize,
1346 pub audit_judgment_count: usize,
1347 #[serde(default)]
1349 pub audit_confirmation_count: usize,
1350 #[serde(default)]
1351 pub audit_confirmation_response_count: usize,
1352 #[serde(default)]
1354 pub audit_procedure_step_count: usize,
1355 #[serde(default)]
1356 pub audit_sample_count: usize,
1357 #[serde(default)]
1359 pub audit_analytical_result_count: usize,
1360 #[serde(default)]
1362 pub audit_ia_function_count: usize,
1363 #[serde(default)]
1364 pub audit_ia_report_count: usize,
1365 #[serde(default)]
1367 pub audit_related_party_count: usize,
1368 #[serde(default)]
1369 pub audit_related_party_transaction_count: usize,
1370 pub anomalies_injected: usize,
1372 pub data_quality_issues: usize,
1374 pub banking_customer_count: usize,
1376 pub banking_account_count: usize,
1377 pub banking_transaction_count: usize,
1378 pub banking_suspicious_count: usize,
1379 pub graph_export_count: usize,
1381 pub graph_node_count: usize,
1382 pub graph_edge_count: usize,
1383 #[serde(default)]
1385 pub llm_enrichment_ms: u64,
1386 #[serde(default)]
1388 pub llm_vendors_enriched: usize,
1389 #[serde(default)]
1391 pub llm_customers_enriched: usize,
1392 #[serde(default)]
1394 pub llm_materials_enriched: usize,
1395 #[serde(default)]
1397 pub llm_findings_enriched: usize,
1398 #[serde(default)]
1400 pub diffusion_enhancement_ms: u64,
1401 #[serde(default)]
1403 pub diffusion_samples_generated: usize,
1404 #[serde(default, skip_serializing_if = "Option::is_none")]
1407 pub neural_hybrid_weight: Option<f64>,
1408 #[serde(default, skip_serializing_if = "Option::is_none")]
1410 pub neural_hybrid_strategy: Option<String>,
1411 #[serde(default, skip_serializing_if = "Option::is_none")]
1413 pub neural_routed_column_count: Option<usize>,
1414 #[serde(default)]
1416 pub causal_generation_ms: u64,
1417 #[serde(default)]
1419 pub causal_samples_generated: usize,
1420 #[serde(default)]
1422 pub causal_validation_passed: Option<bool>,
1423 #[serde(default)]
1425 pub sourcing_project_count: usize,
1426 #[serde(default)]
1427 pub rfx_event_count: usize,
1428 #[serde(default)]
1429 pub bid_count: usize,
1430 #[serde(default)]
1431 pub contract_count: usize,
1432 #[serde(default)]
1433 pub catalog_item_count: usize,
1434 #[serde(default)]
1435 pub scorecard_count: usize,
1436 #[serde(default)]
1438 pub financial_statement_count: usize,
1439 #[serde(default)]
1440 pub bank_reconciliation_count: usize,
1441 #[serde(default)]
1443 pub payroll_run_count: usize,
1444 #[serde(default)]
1445 pub time_entry_count: usize,
1446 #[serde(default)]
1447 pub expense_report_count: usize,
1448 #[serde(default)]
1449 pub benefit_enrollment_count: usize,
1450 #[serde(default)]
1451 pub pension_plan_count: usize,
1452 #[serde(default)]
1453 pub stock_grant_count: usize,
1454 #[serde(default)]
1456 pub revenue_contract_count: usize,
1457 #[serde(default)]
1458 pub impairment_test_count: usize,
1459 #[serde(default)]
1460 pub business_combination_count: usize,
1461 #[serde(default)]
1462 pub ecl_model_count: usize,
1463 #[serde(default)]
1464 pub provision_count: usize,
1465 #[serde(default)]
1467 pub production_order_count: usize,
1468 #[serde(default)]
1469 pub quality_inspection_count: usize,
1470 #[serde(default)]
1471 pub cycle_count_count: usize,
1472 #[serde(default)]
1473 pub bom_component_count: usize,
1474 #[serde(default)]
1475 pub inventory_movement_count: usize,
1476 #[serde(default)]
1478 pub sales_quote_count: usize,
1479 #[serde(default)]
1480 pub kpi_count: usize,
1481 #[serde(default)]
1482 pub budget_line_count: usize,
1483 #[serde(default)]
1485 pub tax_jurisdiction_count: usize,
1486 #[serde(default)]
1487 pub tax_code_count: usize,
1488 #[serde(default)]
1490 pub esg_emission_count: usize,
1491 #[serde(default)]
1492 pub esg_disclosure_count: usize,
1493 #[serde(default)]
1495 pub ic_matched_pair_count: usize,
1496 #[serde(default)]
1497 pub ic_elimination_count: usize,
1498 #[serde(default)]
1500 pub ic_transaction_count: usize,
1501 #[serde(default)]
1503 pub fa_subledger_count: usize,
1504 #[serde(default)]
1506 pub inventory_subledger_count: usize,
1507 #[serde(default)]
1509 pub treasury_debt_instrument_count: usize,
1510 #[serde(default)]
1512 pub treasury_hedging_instrument_count: usize,
1513 #[serde(default)]
1515 pub project_count: usize,
1516 #[serde(default)]
1518 pub project_change_order_count: usize,
1519 #[serde(default)]
1521 pub tax_provision_count: usize,
1522 #[serde(default)]
1524 pub opening_balance_count: usize,
1525 #[serde(default)]
1527 pub subledger_reconciliation_count: usize,
1528 #[serde(default)]
1530 pub tax_line_count: usize,
1531 #[serde(default)]
1533 pub project_cost_line_count: usize,
1534 #[serde(default)]
1536 pub cash_position_count: usize,
1537 #[serde(default)]
1539 pub cash_forecast_count: usize,
1540 #[serde(default)]
1542 pub cash_pool_count: usize,
1543 #[serde(default)]
1545 pub process_evolution_event_count: usize,
1546 #[serde(default)]
1548 pub organizational_event_count: usize,
1549 #[serde(default)]
1551 pub counterfactual_pair_count: usize,
1552 #[serde(default)]
1554 pub red_flag_count: usize,
1555 #[serde(default)]
1557 pub collusion_ring_count: usize,
1558 #[serde(default)]
1560 pub temporal_version_chain_count: usize,
1561 #[serde(default)]
1563 pub entity_relationship_node_count: usize,
1564 #[serde(default)]
1566 pub entity_relationship_edge_count: usize,
1567 #[serde(default)]
1569 pub cross_process_link_count: usize,
1570 #[serde(default)]
1572 pub disruption_event_count: usize,
1573 #[serde(default)]
1575 pub industry_gl_account_count: usize,
1576 #[serde(default)]
1578 pub period_close_je_count: usize,
1579}
1580
1581pub struct EnhancedOrchestrator {
1583 config: GeneratorConfig,
1584 phase_config: PhaseConfig,
1585 coa: Option<Arc<ChartOfAccounts>>,
1586 master_data: MasterDataSnapshot,
1587 seed: u64,
1588 multi_progress: Option<MultiProgress>,
1589 resource_guard: ResourceGuard,
1591 output_path: Option<PathBuf>,
1593 copula_generators: Vec<CopulaGeneratorSpec>,
1595 country_pack_registry: datasynth_core::CountryPackRegistry,
1597 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1599 template_provider: datasynth_core::templates::SharedTemplateProvider,
1606 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1613}
1614
1615impl EnhancedOrchestrator {
1616 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1618 datasynth_config::validate_config(&config)?;
1619
1620 let seed = config.global.seed.unwrap_or_else(rand::random);
1621
1622 let resource_guard = Self::build_resource_guard(&config, None);
1624
1625 let country_pack_registry = match &config.country_packs {
1627 Some(cp) => {
1628 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1629 .map_err(|e| SynthError::config(e.to_string()))?
1630 }
1631 None => datasynth_core::CountryPackRegistry::builtin_only()
1632 .map_err(|e| SynthError::config(e.to_string()))?,
1633 };
1634
1635 let template_provider = Self::build_template_provider(&config)?;
1639
1640 let temporal_context = Self::build_temporal_context(&config)?;
1644
1645 Ok(Self {
1646 config,
1647 phase_config,
1648 coa: None,
1649 master_data: MasterDataSnapshot::default(),
1650 seed,
1651 multi_progress: None,
1652 resource_guard,
1653 output_path: None,
1654 copula_generators: Vec::new(),
1655 country_pack_registry,
1656 phase_sink: None,
1657 template_provider,
1658 temporal_context,
1659 })
1660 }
1661
1662 fn build_temporal_context(
1668 config: &GeneratorConfig,
1669 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1670 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1671
1672 let tp = &config.temporal_patterns;
1673 if !tp.enabled || !tp.business_days.enabled {
1674 return Ok(None);
1675 }
1676
1677 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1678 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1679 let end_date = start_date + chrono::Months::new(config.global.period_months);
1680
1681 let region_code = tp
1682 .calendars
1683 .regions
1684 .first()
1685 .cloned()
1686 .unwrap_or_else(|| "US".to_string());
1687 let region = parse_region_code(®ion_code);
1688
1689 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1690 }
1691
1692 fn build_template_provider(
1700 config: &GeneratorConfig,
1701 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1702 use datasynth_core::templates::{
1703 loader::{MergeStrategy, TemplateLoader},
1704 DefaultTemplateProvider,
1705 };
1706 use std::sync::Arc;
1707
1708 let provider = match &config.templates.path {
1709 None => DefaultTemplateProvider::new(),
1710 Some(path) => {
1711 let data = if path.is_dir() {
1712 TemplateLoader::load_from_directory(path)
1713 } else {
1714 TemplateLoader::load_from_file(path)
1715 }
1716 .map_err(|e| {
1717 SynthError::config(format!(
1718 "Failed to load templates from {}: {e}",
1719 path.display()
1720 ))
1721 })?;
1722 let strategy = match config.templates.merge_strategy {
1723 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1724 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1725 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1726 MergeStrategy::MergePreferFile
1727 }
1728 };
1729 DefaultTemplateProvider::with_templates(data, strategy)
1730 }
1731 };
1732 Ok(Arc::new(provider))
1733 }
1734
1735 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1737 Self::new(config, PhaseConfig::default())
1738 }
1739
1740 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1742 self.phase_sink = Some(sink);
1743 self
1744 }
1745
1746 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1748 self.phase_sink = Some(sink);
1749 }
1750
1751 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1753 if let Some(ref sink) = self.phase_sink {
1754 for item in items {
1755 if let Ok(value) = serde_json::to_value(item) {
1756 if let Err(e) = sink.emit(phase, type_name, &value) {
1757 warn!(
1758 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1759 );
1760 }
1761 }
1762 }
1763 if let Err(e) = sink.phase_complete(phase) {
1764 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1765 }
1766 }
1767 }
1768
1769 pub fn with_progress(mut self, show: bool) -> Self {
1771 self.phase_config.show_progress = show;
1772 if show {
1773 self.multi_progress = Some(MultiProgress::new());
1774 }
1775 self
1776 }
1777
1778 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1780 let path = path.into();
1781 self.output_path = Some(path.clone());
1782 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1784 self
1785 }
1786
1787 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1789 &self.country_pack_registry
1790 }
1791
1792 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1794 self.country_pack_registry.get_by_str(country)
1795 }
1796
1797 fn primary_country_code(&self) -> &str {
1800 self.config
1801 .companies
1802 .first()
1803 .map(|c| c.country.as_str())
1804 .unwrap_or("US")
1805 }
1806
1807 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1809 self.country_pack_for(self.primary_country_code())
1810 }
1811
1812 fn resolve_coa_framework(&self) -> CoAFramework {
1814 if self.config.accounting_standards.enabled {
1815 match self.config.accounting_standards.framework {
1816 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1817 return CoAFramework::FrenchPcg;
1818 }
1819 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1820 return CoAFramework::GermanSkr04;
1821 }
1822 _ => {}
1823 }
1824 }
1825 let pack = self.primary_pack();
1827 match pack.accounting.framework.as_str() {
1828 "french_gaap" => CoAFramework::FrenchPcg,
1829 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1830 _ => CoAFramework::UsGaap,
1831 }
1832 }
1833
1834 pub fn has_copulas(&self) -> bool {
1839 !self.copula_generators.is_empty()
1840 }
1841
1842 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1848 &self.copula_generators
1849 }
1850
1851 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1855 &mut self.copula_generators
1856 }
1857
1858 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1862 self.copula_generators
1863 .iter_mut()
1864 .find(|c| c.name == copula_name)
1865 .map(|c| c.generator.sample())
1866 }
1867
1868 pub fn from_fingerprint(
1891 fingerprint_path: &std::path::Path,
1892 phase_config: PhaseConfig,
1893 scale: f64,
1894 ) -> SynthResult<Self> {
1895 info!("Loading fingerprint from: {}", fingerprint_path.display());
1896
1897 let reader = FingerprintReader::new();
1899 let fingerprint = reader
1900 .read_from_file(fingerprint_path)
1901 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1902
1903 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1904 }
1905
1906 pub fn from_fingerprint_data(
1913 fingerprint: Fingerprint,
1914 phase_config: PhaseConfig,
1915 scale: f64,
1916 ) -> SynthResult<Self> {
1917 info!(
1918 "Synthesizing config from fingerprint (version: {}, tables: {})",
1919 fingerprint.manifest.version,
1920 fingerprint.schema.tables.len()
1921 );
1922
1923 let seed: u64 = rand::random();
1925 info!("Fingerprint synthesis seed: {}", seed);
1926
1927 let options = SynthesisOptions {
1929 scale,
1930 seed: Some(seed),
1931 preserve_correlations: true,
1932 inject_anomalies: true,
1933 };
1934 let synthesizer = ConfigSynthesizer::with_options(options);
1935
1936 let synthesis_result = synthesizer
1938 .synthesize_full(&fingerprint, seed)
1939 .map_err(|e| {
1940 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1941 })?;
1942
1943 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1945 Self::base_config_for_industry(industry)
1946 } else {
1947 Self::base_config_for_industry("manufacturing")
1948 };
1949
1950 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1952
1953 info!(
1955 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1956 fingerprint.schema.tables.len(),
1957 scale,
1958 synthesis_result.copula_generators.len()
1959 );
1960
1961 if !synthesis_result.copula_generators.is_empty() {
1962 for spec in &synthesis_result.copula_generators {
1963 info!(
1964 " Copula '{}' for table '{}': {} columns",
1965 spec.name,
1966 spec.table,
1967 spec.columns.len()
1968 );
1969 }
1970 }
1971
1972 let mut orchestrator = Self::new(config, phase_config)?;
1974
1975 orchestrator.copula_generators = synthesis_result.copula_generators;
1977
1978 Ok(orchestrator)
1979 }
1980
1981 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1983 use datasynth_config::presets::create_preset;
1984 use datasynth_config::TransactionVolume;
1985 use datasynth_core::models::{CoAComplexity, IndustrySector};
1986
1987 let sector = match industry.to_lowercase().as_str() {
1988 "manufacturing" => IndustrySector::Manufacturing,
1989 "retail" => IndustrySector::Retail,
1990 "financial" | "financial_services" => IndustrySector::FinancialServices,
1991 "healthcare" => IndustrySector::Healthcare,
1992 "technology" | "tech" => IndustrySector::Technology,
1993 _ => IndustrySector::Manufacturing,
1994 };
1995
1996 create_preset(
1998 sector,
1999 1, 12, CoAComplexity::Medium,
2002 TransactionVolume::TenK,
2003 )
2004 }
2005
2006 fn apply_config_patch(
2008 mut config: GeneratorConfig,
2009 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2010 ) -> GeneratorConfig {
2011 use datasynth_fingerprint::synthesis::ConfigValue;
2012
2013 for (key, value) in patch.values() {
2014 match (key.as_str(), value) {
2015 ("transactions.count", ConfigValue::Integer(n)) => {
2018 info!(
2019 "Fingerprint suggests {} transactions (apply via company volumes)",
2020 n
2021 );
2022 }
2023 ("global.period_months", ConfigValue::Integer(n)) => {
2024 config.global.period_months = (*n).clamp(1, 120) as u32;
2025 }
2026 ("global.start_date", ConfigValue::String(s)) => {
2027 config.global.start_date = s.clone();
2028 }
2029 ("global.seed", ConfigValue::Integer(n)) => {
2030 config.global.seed = Some(*n as u64);
2031 }
2032 ("fraud.enabled", ConfigValue::Bool(b)) => {
2033 config.fraud.enabled = *b;
2034 }
2035 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2036 config.fraud.fraud_rate = *f;
2037 }
2038 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2039 config.data_quality.enabled = *b;
2040 }
2041 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2043 config.fraud.enabled = *b;
2044 }
2045 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2046 config.fraud.fraud_rate = *f;
2047 }
2048 _ => {
2049 debug!("Ignoring unknown config patch key: {}", key);
2050 }
2051 }
2052 }
2053
2054 config
2055 }
2056
2057 fn build_resource_guard(
2059 config: &GeneratorConfig,
2060 output_path: Option<PathBuf>,
2061 ) -> ResourceGuard {
2062 let mut builder = ResourceGuardBuilder::new();
2063
2064 if config.global.memory_limit_mb > 0 {
2066 builder = builder.memory_limit(config.global.memory_limit_mb);
2067 }
2068
2069 if let Some(path) = output_path {
2071 builder = builder.output_path(path).min_free_disk(100); }
2073
2074 builder = builder.conservative();
2076
2077 builder.build()
2078 }
2079
2080 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2085 self.resource_guard.check()
2086 }
2087
2088 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2090 let level = self.resource_guard.check()?;
2091
2092 if level != DegradationLevel::Normal {
2093 warn!(
2094 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2095 phase,
2096 level,
2097 self.resource_guard.current_memory_mb(),
2098 self.resource_guard.available_disk_mb()
2099 );
2100 }
2101
2102 Ok(level)
2103 }
2104
2105 fn get_degradation_actions(&self) -> DegradationActions {
2107 self.resource_guard.get_actions()
2108 }
2109
2110 fn check_memory_limit(&self) -> SynthResult<()> {
2112 self.check_resources()?;
2113 Ok(())
2114 }
2115
2116 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2118 info!("Starting enhanced generation workflow");
2119 info!(
2120 "Config: industry={:?}, period_months={}, companies={}",
2121 self.config.global.industry,
2122 self.config.global.period_months,
2123 self.config.companies.len()
2124 );
2125
2126 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2129 datasynth_core::serde_decimal::set_numeric_native(is_native);
2130 struct NumericModeGuard;
2131 impl Drop for NumericModeGuard {
2132 fn drop(&mut self) {
2133 datasynth_core::serde_decimal::set_numeric_native(false);
2134 }
2135 }
2136 let _numeric_guard = if is_native {
2137 Some(NumericModeGuard)
2138 } else {
2139 None
2140 };
2141
2142 let initial_level = self.check_resources_with_log("initial")?;
2144 if initial_level == DegradationLevel::Emergency {
2145 return Err(SynthError::resource(
2146 "Insufficient resources to start generation",
2147 ));
2148 }
2149
2150 let mut stats = EnhancedGenerationStatistics {
2151 companies_count: self.config.companies.len(),
2152 period_months: self.config.global.period_months,
2153 ..Default::default()
2154 };
2155
2156 let coa = self.phase_chart_of_accounts(&mut stats)?;
2158
2159 self.phase_master_data(&mut stats)?;
2161
2162 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2164 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2165 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2166
2167 let (mut document_flows, mut subledger, fa_journal_entries) =
2169 self.phase_document_flows(&mut stats)?;
2170
2171 self.emit_phase_items(
2173 "document_flows",
2174 "PurchaseOrder",
2175 &document_flows.purchase_orders,
2176 );
2177 self.emit_phase_items(
2178 "document_flows",
2179 "GoodsReceipt",
2180 &document_flows.goods_receipts,
2181 );
2182 self.emit_phase_items(
2183 "document_flows",
2184 "VendorInvoice",
2185 &document_flows.vendor_invoices,
2186 );
2187 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2188 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2189
2190 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2192
2193 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2198 .iter()
2199 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2200 .collect();
2201 if !opening_balance_jes.is_empty() {
2202 debug!(
2203 "Prepending {} opening balance JEs to entries",
2204 opening_balance_jes.len()
2205 );
2206 }
2207
2208 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2210
2211 if !opening_balance_jes.is_empty() {
2214 let mut combined = opening_balance_jes;
2215 combined.extend(entries);
2216 entries = combined;
2217 }
2218
2219 if !fa_journal_entries.is_empty() {
2221 debug!(
2222 "Appending {} FA acquisition JEs to main entries",
2223 fa_journal_entries.len()
2224 );
2225 entries.extend(fa_journal_entries);
2226 }
2227
2228 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2230
2231 let actions = self.get_degradation_actions();
2233
2234 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2236
2237 if !sourcing.contracts.is_empty() {
2240 let mut linked_count = 0usize;
2241 let po_vendor_pairs: Vec<(String, String)> = document_flows
2243 .p2p_chains
2244 .iter()
2245 .map(|chain| {
2246 (
2247 chain.purchase_order.vendor_id.clone(),
2248 chain.purchase_order.header.document_id.clone(),
2249 )
2250 })
2251 .collect();
2252
2253 for chain in &mut document_flows.p2p_chains {
2254 if chain.purchase_order.contract_id.is_none() {
2255 if let Some(contract) = sourcing
2256 .contracts
2257 .iter()
2258 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2259 {
2260 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2261 linked_count += 1;
2262 }
2263 }
2264 }
2265
2266 for contract in &mut sourcing.contracts {
2268 let po_ids: Vec<String> = po_vendor_pairs
2269 .iter()
2270 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2271 .map(|(_, po_id)| po_id.clone())
2272 .collect();
2273 if !po_ids.is_empty() {
2274 contract.purchase_order_ids = po_ids;
2275 }
2276 }
2277
2278 if linked_count > 0 {
2279 debug!(
2280 "Linked {} purchase orders to S2C contracts by vendor match",
2281 linked_count
2282 );
2283 }
2284 }
2285
2286 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2288
2289 if !intercompany.seller_journal_entries.is_empty()
2291 || !intercompany.buyer_journal_entries.is_empty()
2292 {
2293 let ic_je_count = intercompany.seller_journal_entries.len()
2294 + intercompany.buyer_journal_entries.len();
2295 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2296 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2297 debug!(
2298 "Appended {} IC journal entries to main entries",
2299 ic_je_count
2300 );
2301 }
2302
2303 if !intercompany.elimination_entries.is_empty() {
2305 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2306 &intercompany.elimination_entries,
2307 );
2308 if !elim_jes.is_empty() {
2309 debug!(
2310 "Appended {} elimination journal entries to main entries",
2311 elim_jes.len()
2312 );
2313 let elim_debit: rust_decimal::Decimal =
2315 elim_jes.iter().map(|je| je.total_debit()).sum();
2316 let elim_credit: rust_decimal::Decimal =
2317 elim_jes.iter().map(|je| je.total_credit()).sum();
2318 let elim_diff = (elim_debit - elim_credit).abs();
2319 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2321 return Err(datasynth_core::error::SynthError::generation(format!(
2322 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2323 elim_debit, elim_credit, elim_diff, tolerance
2324 )));
2325 }
2326 debug!(
2327 "IC elimination balance verified: debits={}, credits={} (diff={})",
2328 elim_debit, elim_credit, elim_diff
2329 );
2330 entries.extend(elim_jes);
2331 }
2332 }
2333
2334 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2336 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2337 document_flows
2338 .customer_invoices
2339 .extend(ic_docs.seller_invoices.iter().cloned());
2340 document_flows
2341 .purchase_orders
2342 .extend(ic_docs.buyer_orders.iter().cloned());
2343 document_flows
2344 .goods_receipts
2345 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2346 document_flows
2347 .vendor_invoices
2348 .extend(ic_docs.buyer_invoices.iter().cloned());
2349 debug!(
2350 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2351 ic_docs.seller_invoices.len(),
2352 ic_docs.buyer_orders.len(),
2353 ic_docs.buyer_goods_receipts.len(),
2354 ic_docs.buyer_invoices.len(),
2355 );
2356 }
2357 }
2358
2359 let hr = self.phase_hr_data(&mut stats)?;
2361
2362 if !hr.payroll_runs.is_empty() {
2364 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2365 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2366 entries.extend(payroll_jes);
2367 }
2368
2369 if !hr.pension_journal_entries.is_empty() {
2371 debug!(
2372 "Generated {} JEs from pension plans",
2373 hr.pension_journal_entries.len()
2374 );
2375 entries.extend(hr.pension_journal_entries.iter().cloned());
2376 }
2377
2378 if !hr.stock_comp_journal_entries.is_empty() {
2380 debug!(
2381 "Generated {} JEs from stock-based compensation",
2382 hr.stock_comp_journal_entries.len()
2383 );
2384 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2385 }
2386
2387 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2389
2390 if !manufacturing_snap.production_orders.is_empty() {
2392 let currency = self
2393 .config
2394 .companies
2395 .first()
2396 .map(|c| c.currency.as_str())
2397 .unwrap_or("USD");
2398 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2399 &manufacturing_snap.production_orders,
2400 &manufacturing_snap.quality_inspections,
2401 currency,
2402 );
2403 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2404 entries.extend(mfg_jes);
2405 }
2406
2407 if !manufacturing_snap.quality_inspections.is_empty() {
2409 let framework = match self.config.accounting_standards.framework {
2410 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2411 _ => "US_GAAP",
2412 };
2413 for company in &self.config.companies {
2414 let company_orders: Vec<_> = manufacturing_snap
2415 .production_orders
2416 .iter()
2417 .filter(|o| o.company_code == company.code)
2418 .cloned()
2419 .collect();
2420 let company_inspections: Vec<_> = manufacturing_snap
2421 .quality_inspections
2422 .iter()
2423 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2424 .cloned()
2425 .collect();
2426 if company_inspections.is_empty() {
2427 continue;
2428 }
2429 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2430 let warranty_result = warranty_gen.generate(
2431 &company.code,
2432 &company_orders,
2433 &company_inspections,
2434 &company.currency,
2435 framework,
2436 );
2437 if !warranty_result.journal_entries.is_empty() {
2438 debug!(
2439 "Generated {} warranty provision JEs for {}",
2440 warranty_result.journal_entries.len(),
2441 company.code
2442 );
2443 entries.extend(warranty_result.journal_entries);
2444 }
2445 }
2446 }
2447
2448 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2450 {
2451 let cogs_currency = self
2452 .config
2453 .companies
2454 .first()
2455 .map(|c| c.currency.as_str())
2456 .unwrap_or("USD");
2457 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2458 &document_flows.deliveries,
2459 &manufacturing_snap.production_orders,
2460 cogs_currency,
2461 );
2462 if !cogs_jes.is_empty() {
2463 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2464 entries.extend(cogs_jes);
2465 }
2466 }
2467
2468 if !manufacturing_snap.inventory_movements.is_empty()
2474 && !subledger.inventory_positions.is_empty()
2475 {
2476 use datasynth_core::models::MovementType as MfgMovementType;
2477 let mut receipt_count = 0usize;
2478 let mut issue_count = 0usize;
2479 for movement in &manufacturing_snap.inventory_movements {
2480 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2482 p.material_id == movement.material_code
2483 && p.company_code == movement.entity_code
2484 }) {
2485 match movement.movement_type {
2486 MfgMovementType::GoodsReceipt => {
2487 pos.add_quantity(
2489 movement.quantity,
2490 movement.value,
2491 movement.movement_date,
2492 );
2493 receipt_count += 1;
2494 }
2495 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2496 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2498 issue_count += 1;
2499 }
2500 _ => {}
2501 }
2502 }
2503 }
2504 debug!(
2505 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2506 manufacturing_snap.inventory_movements.len(),
2507 receipt_count,
2508 issue_count,
2509 );
2510 }
2511
2512 if !entries.is_empty() {
2515 stats.total_entries = entries.len() as u64;
2516 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2517 debug!(
2518 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2519 stats.total_entries, stats.total_line_items
2520 );
2521 }
2522
2523 if self.config.internal_controls.enabled && !entries.is_empty() {
2525 info!("Phase 7b: Applying internal controls to journal entries");
2526 let control_config = ControlGeneratorConfig {
2527 exception_rate: self.config.internal_controls.exception_rate,
2528 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2529 enable_sox_marking: true,
2530 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2531 self.config.internal_controls.sox_materiality_threshold,
2532 )
2533 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2534 ..Default::default()
2535 };
2536 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2537 for entry in &mut entries {
2538 control_gen.apply_controls(entry, &coa);
2539 }
2540 let with_controls = entries
2541 .iter()
2542 .filter(|e| !e.header.control_ids.is_empty())
2543 .count();
2544 info!(
2545 "Applied controls to {} entries ({} with control IDs assigned)",
2546 entries.len(),
2547 with_controls
2548 );
2549 }
2550
2551 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2555 .iter()
2556 .filter(|e| e.header.sod_violation)
2557 .filter_map(|e| {
2558 e.header.sod_conflict_type.map(|ct| {
2559 use datasynth_core::models::{RiskLevel, SodViolation};
2560 let severity = match ct {
2561 datasynth_core::models::SodConflictType::PaymentReleaser
2562 | datasynth_core::models::SodConflictType::RequesterApprover => {
2563 RiskLevel::Critical
2564 }
2565 datasynth_core::models::SodConflictType::PreparerApprover
2566 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2567 | datasynth_core::models::SodConflictType::JournalEntryPoster
2568 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2569 RiskLevel::High
2570 }
2571 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2572 RiskLevel::Medium
2573 }
2574 };
2575 let action = format!(
2576 "SoD conflict {:?} on entry {} ({})",
2577 ct, e.header.document_id, e.header.company_code
2578 );
2579 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2580 })
2581 })
2582 .collect();
2583 if !sod_violations.is_empty() {
2584 info!(
2585 "Phase 7c: Extracted {} SoD violations from {} entries",
2586 sod_violations.len(),
2587 entries.len()
2588 );
2589 }
2590
2591 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2593
2594 {
2602 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2603 if self.config.fraud.enabled && doc_rate > 0.0 {
2604 use datasynth_core::fraud_propagation::{
2605 inject_document_fraud, propagate_documents_to_entries,
2606 };
2607 use datasynth_core::utils::weighted_select;
2608 use datasynth_core::FraudType;
2609 use rand_chacha::rand_core::SeedableRng;
2610
2611 let dist = &self.config.fraud.fraud_type_distribution;
2612 let fraud_type_weights: [(FraudType, f64); 8] = [
2613 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2614 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2615 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2616 (
2617 FraudType::ImproperCapitalization,
2618 dist.expense_capitalization,
2619 ),
2620 (FraudType::SplitTransaction, dist.split_transaction),
2621 (FraudType::TimingAnomaly, dist.timing_anomaly),
2622 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2623 (FraudType::DuplicatePayment, dist.duplicate_payment),
2624 ];
2625 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2626 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2627 if weights_sum <= 0.0 {
2628 FraudType::FictitiousEntry
2629 } else {
2630 *weighted_select(rng, &fraud_type_weights)
2631 }
2632 };
2633
2634 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2635 let mut doc_tagged = 0usize;
2636 macro_rules! inject_into {
2637 ($collection:expr) => {{
2638 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2639 $collection.iter_mut().map(|d| &mut d.header).collect();
2640 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2641 }};
2642 }
2643 inject_into!(document_flows.purchase_orders);
2644 inject_into!(document_flows.goods_receipts);
2645 inject_into!(document_flows.vendor_invoices);
2646 inject_into!(document_flows.payments);
2647 inject_into!(document_flows.sales_orders);
2648 inject_into!(document_flows.deliveries);
2649 inject_into!(document_flows.customer_invoices);
2650 if doc_tagged > 0 {
2651 info!(
2652 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2653 );
2654 }
2655
2656 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2657 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2658 Vec::new();
2659 headers.extend(
2660 document_flows
2661 .purchase_orders
2662 .iter()
2663 .map(|d| d.header.clone()),
2664 );
2665 headers.extend(
2666 document_flows
2667 .goods_receipts
2668 .iter()
2669 .map(|d| d.header.clone()),
2670 );
2671 headers.extend(
2672 document_flows
2673 .vendor_invoices
2674 .iter()
2675 .map(|d| d.header.clone()),
2676 );
2677 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2678 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2679 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2680 headers.extend(
2681 document_flows
2682 .customer_invoices
2683 .iter()
2684 .map(|d| d.header.clone()),
2685 );
2686 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2687 if propagated > 0 {
2688 info!(
2689 "Propagated document-level fraud to {propagated} derived journal entries"
2690 );
2691 }
2692 }
2693 }
2694 }
2695
2696 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2698
2699 {
2717 use datasynth_core::fraud_bias::{
2718 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2719 };
2720 use rand_chacha::rand_core::SeedableRng;
2721 let cfg = FraudBehavioralBiasConfig::default();
2722 if cfg.enabled {
2723 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2724 let mut swept = 0usize;
2725 for entry in entries.iter_mut() {
2726 if entry.header.is_fraud && !entry.header.is_anomaly {
2727 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2728 swept += 1;
2729 }
2730 }
2731 if swept > 0 {
2732 info!(
2733 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2734 (doc-propagated + je_generator intrinsic fraud)"
2735 );
2736 }
2737 }
2738 }
2739
2740 self.emit_phase_items(
2742 "anomaly_injection",
2743 "LabeledAnomaly",
2744 &anomaly_labels.labels,
2745 );
2746
2747 if self.config.fraud.propagate_to_document {
2755 use std::collections::HashMap;
2756 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2769 for je in &entries {
2770 if je.header.is_fraud {
2771 if let Some(ref fraud_type) = je.header.fraud_type {
2772 if let Some(ref reference) = je.header.reference {
2773 fraud_map.insert(reference.clone(), *fraud_type);
2775 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2778 if !bare.is_empty() {
2779 fraud_map.insert(bare.to_string(), *fraud_type);
2780 }
2781 }
2782 }
2783 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2785 }
2786 }
2787 }
2788 if !fraud_map.is_empty() {
2789 let mut propagated = 0usize;
2790 macro_rules! propagate_to {
2792 ($collection:expr) => {
2793 for doc in &mut $collection {
2794 if doc.header.propagate_fraud(&fraud_map) {
2795 propagated += 1;
2796 }
2797 }
2798 };
2799 }
2800 propagate_to!(document_flows.purchase_orders);
2801 propagate_to!(document_flows.goods_receipts);
2802 propagate_to!(document_flows.vendor_invoices);
2803 propagate_to!(document_flows.payments);
2804 propagate_to!(document_flows.sales_orders);
2805 propagate_to!(document_flows.deliveries);
2806 propagate_to!(document_flows.customer_invoices);
2807 if propagated > 0 {
2808 info!(
2809 "Propagated fraud labels to {} document flow records",
2810 propagated
2811 );
2812 }
2813 }
2814 }
2815
2816 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2818
2819 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2821
2822 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2824
2825 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2827
2828 let balance_validation = self.phase_balance_validation(&entries)?;
2830
2831 let subledger_reconciliation =
2833 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2834
2835 let (data_quality_stats, quality_issues) =
2837 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2838
2839 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2841
2842 {
2844 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2849 for je in &entries {
2850 if je.header.is_fraud || je.header.is_anomaly {
2851 continue;
2852 }
2853 let diff = (je.total_debit() - je.total_credit()).abs();
2854 if diff > tolerance {
2855 unbalanced_clean += 1;
2856 if unbalanced_clean <= 3 {
2857 warn!(
2858 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2859 je.header.document_id,
2860 je.total_debit(),
2861 je.total_credit(),
2862 diff
2863 );
2864 }
2865 }
2866 }
2867 if unbalanced_clean > 0 {
2868 return Err(datasynth_core::error::SynthError::generation(format!(
2869 "{} non-anomaly JEs are unbalanced (debits != credits). \
2870 First few logged above. Tolerance={}",
2871 unbalanced_clean, tolerance
2872 )));
2873 }
2874 debug!(
2875 "Phase 10c: All {} non-anomaly JEs individually balanced",
2876 entries
2877 .iter()
2878 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2879 .count()
2880 );
2881
2882 let company_codes: Vec<String> = self
2884 .config
2885 .companies
2886 .iter()
2887 .map(|c| c.code.clone())
2888 .collect();
2889 for company_code in &company_codes {
2890 let mut assets = rust_decimal::Decimal::ZERO;
2891 let mut liab_equity = rust_decimal::Decimal::ZERO;
2892
2893 for entry in &entries {
2894 if entry.header.company_code != *company_code {
2895 continue;
2896 }
2897 for line in &entry.lines {
2898 let acct = &line.gl_account;
2899 let net = line.debit_amount - line.credit_amount;
2900 if acct.starts_with('1') {
2902 assets += net;
2903 }
2904 else if acct.starts_with('2') || acct.starts_with('3') {
2906 liab_equity -= net; }
2908 }
2911 }
2912
2913 let bs_diff = (assets - liab_equity).abs();
2914 if bs_diff > tolerance {
2915 warn!(
2916 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2917 revenue/expense closing entries may not fully offset",
2918 company_code, assets, liab_equity, bs_diff
2919 );
2920 } else {
2924 debug!(
2925 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2926 company_code, assets, liab_equity, bs_diff
2927 );
2928 }
2929 }
2930
2931 info!("Phase 10c: All generation-time accounting assertions passed");
2932 }
2933
2934 let audit = self.phase_audit_data(&entries, &mut stats)?;
2936
2937 let mut banking = self.phase_banking_data(&mut stats)?;
2939
2940 if self.phase_config.generate_banking
2945 && !document_flows.payments.is_empty()
2946 && !banking.accounts.is_empty()
2947 {
2948 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2949 if bridge_rate > 0.0 {
2950 let mut bridge =
2951 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2952 self.seed,
2953 );
2954 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2955 &document_flows.payments,
2956 &banking.customers,
2957 &banking.accounts,
2958 bridge_rate,
2959 );
2960 info!(
2961 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2962 bridge_stats.bridged_count,
2963 bridge_stats.transactions_emitted,
2964 bridge_stats.fraud_propagated,
2965 );
2966 let bridged_count = bridged_txns.len();
2967 banking.transactions.extend(bridged_txns);
2968
2969 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2972 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2973 &mut banking.transactions,
2974 );
2975 }
2976
2977 banking.suspicious_count = banking
2979 .transactions
2980 .iter()
2981 .filter(|t| t.is_suspicious)
2982 .count();
2983 stats.banking_transaction_count = banking.transactions.len();
2984 stats.banking_suspicious_count = banking.suspicious_count;
2985 }
2986 }
2987
2988 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2990
2991 self.phase_llm_enrichment(&mut stats);
2993
2994 self.phase_diffusion_enhancement(&entries, &mut stats);
2996
2997 self.phase_causal_overlay(&mut stats);
2999
3000 let mut financial_reporting = self.phase_financial_reporting(
3004 &document_flows,
3005 &entries,
3006 &coa,
3007 &hr,
3008 &audit,
3009 &mut stats,
3010 )?;
3011
3012 {
3014 use datasynth_core::models::StatementType;
3015 for stmt in &financial_reporting.consolidated_statements {
3016 if stmt.statement_type == StatementType::BalanceSheet {
3017 let total_assets: rust_decimal::Decimal = stmt
3018 .line_items
3019 .iter()
3020 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3021 .map(|li| li.amount)
3022 .sum();
3023 let total_le: rust_decimal::Decimal = stmt
3024 .line_items
3025 .iter()
3026 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3027 .map(|li| li.amount)
3028 .sum();
3029 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3030 warn!(
3031 "BS equation imbalance: assets={}, L+E={}",
3032 total_assets, total_le
3033 );
3034 }
3035 }
3036 }
3037 }
3038
3039 let accounting_standards =
3041 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3042
3043 if !accounting_standards.ecl_journal_entries.is_empty() {
3045 debug!(
3046 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3047 accounting_standards.ecl_journal_entries.len()
3048 );
3049 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3050 }
3051
3052 if !accounting_standards.provision_journal_entries.is_empty() {
3054 debug!(
3055 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3056 accounting_standards.provision_journal_entries.len()
3057 );
3058 entries.extend(
3059 accounting_standards
3060 .provision_journal_entries
3061 .iter()
3062 .cloned(),
3063 );
3064 }
3065
3066 let mut ocpm = self.phase_ocpm_events(
3068 &document_flows,
3069 &sourcing,
3070 &hr,
3071 &manufacturing_snap,
3072 &banking,
3073 &audit,
3074 &financial_reporting,
3075 &mut stats,
3076 )?;
3077
3078 if let Some(ref event_log) = ocpm.event_log {
3080 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3081 }
3082
3083 if let Some(ref event_log) = ocpm.event_log {
3085 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3087 std::collections::HashMap::new();
3088 for (idx, event) in event_log.events.iter().enumerate() {
3089 if let Some(ref doc_ref) = event.document_ref {
3090 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3091 }
3092 }
3093
3094 if !doc_index.is_empty() {
3095 let mut annotated = 0usize;
3096 for entry in &mut entries {
3097 let doc_id_str = entry.header.document_id.to_string();
3098 let mut matched_indices: Vec<usize> = Vec::new();
3100 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3101 matched_indices.extend(indices);
3102 }
3103 if let Some(ref reference) = entry.header.reference {
3104 let bare_ref = reference
3105 .find(':')
3106 .map(|i| &reference[i + 1..])
3107 .unwrap_or(reference.as_str());
3108 if let Some(indices) = doc_index.get(bare_ref) {
3109 for &idx in indices {
3110 if !matched_indices.contains(&idx) {
3111 matched_indices.push(idx);
3112 }
3113 }
3114 }
3115 }
3116 if !matched_indices.is_empty() {
3118 for &idx in &matched_indices {
3119 let event = &event_log.events[idx];
3120 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3121 entry.header.ocpm_event_ids.push(event.event_id);
3122 }
3123 for obj_ref in &event.object_refs {
3124 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3125 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3126 }
3127 }
3128 if entry.header.ocpm_case_id.is_none() {
3129 entry.header.ocpm_case_id = event.case_id;
3130 }
3131 }
3132 annotated += 1;
3133 }
3134 }
3135 debug!(
3136 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3137 annotated
3138 );
3139 }
3140 }
3141
3142 if let Some(ref mut event_log) = ocpm.event_log {
3146 let synthesized =
3147 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3148 if synthesized > 0 {
3149 info!(
3150 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3151 );
3152 }
3153
3154 let anomaly_events =
3159 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3160 if anomaly_events > 0 {
3161 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3162 }
3163
3164 let p2p_cfg = &self.config.ocpm.p2p_process;
3169 let any_imperfection = p2p_cfg.rework_probability > 0.0
3170 || p2p_cfg.skip_step_probability > 0.0
3171 || p2p_cfg.out_of_order_probability > 0.0;
3172 if any_imperfection {
3173 use rand_chacha::rand_core::SeedableRng;
3174 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3175 rework_rate: p2p_cfg.rework_probability,
3176 skip_rate: p2p_cfg.skip_step_probability,
3177 out_of_order_rate: p2p_cfg.out_of_order_probability,
3178 };
3179 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3180 let stats =
3181 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3182 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3183 info!(
3184 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3185 stats.rework, stats.skipped, stats.out_of_order
3186 );
3187 }
3188 }
3189 }
3190
3191 let sales_kpi_budgets =
3193 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3194
3195 let treasury =
3199 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3200
3201 if !treasury.journal_entries.is_empty() {
3203 debug!(
3204 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3205 treasury.journal_entries.len()
3206 );
3207 entries.extend(treasury.journal_entries.iter().cloned());
3208 }
3209
3210 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3212
3213 if !tax.tax_posting_journal_entries.is_empty() {
3215 debug!(
3216 "Merging {} tax posting JEs into GL",
3217 tax.tax_posting_journal_entries.len()
3218 );
3219 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3220 }
3221
3222 {
3240 use datasynth_core::fraud_bias::{
3241 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3242 };
3243 use rand_chacha::rand_core::SeedableRng;
3244 let cfg = FraudBehavioralBiasConfig::default();
3245 if cfg.enabled {
3246 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3247 let mut swept = 0usize;
3248 for entry in entries.iter_mut() {
3249 if entry.header.is_fraud && !entry.header.is_anomaly {
3250 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3251 swept += 1;
3252 }
3253 }
3254 if swept > 0 {
3255 info!(
3256 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3257 non-anomaly fraud entries (covers late-added JEs from \
3258 ECL / provisions / treasury / tax / period-close)"
3259 );
3260 }
3261 }
3262 }
3263
3264 {
3268 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3269
3270 let framework_str = {
3271 use datasynth_config::schema::AccountingFrameworkConfig;
3272 match self
3273 .config
3274 .accounting_standards
3275 .framework
3276 .unwrap_or_default()
3277 {
3278 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3279 "IFRS"
3280 }
3281 _ => "US_GAAP",
3282 }
3283 };
3284
3285 let depreciation_total: rust_decimal::Decimal = entries
3287 .iter()
3288 .filter(|je| je.header.document_type == "CL")
3289 .flat_map(|je| je.lines.iter())
3290 .filter(|l| l.gl_account.starts_with("6000"))
3291 .map(|l| l.debit_amount)
3292 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3293
3294 let interest_paid: rust_decimal::Decimal = entries
3296 .iter()
3297 .flat_map(|je| je.lines.iter())
3298 .filter(|l| l.gl_account.starts_with("7100"))
3299 .map(|l| l.debit_amount)
3300 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3301
3302 let tax_paid: rust_decimal::Decimal = entries
3304 .iter()
3305 .flat_map(|je| je.lines.iter())
3306 .filter(|l| l.gl_account.starts_with("8000"))
3307 .map(|l| l.debit_amount)
3308 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3309
3310 let capex: rust_decimal::Decimal = entries
3312 .iter()
3313 .flat_map(|je| je.lines.iter())
3314 .filter(|l| l.gl_account.starts_with("1500"))
3315 .map(|l| l.debit_amount)
3316 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3317
3318 let dividends_paid: rust_decimal::Decimal = entries
3320 .iter()
3321 .flat_map(|je| je.lines.iter())
3322 .filter(|l| l.gl_account == "2170")
3323 .map(|l| l.debit_amount)
3324 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3325
3326 let cf_data = CashFlowSourceData {
3327 depreciation_total,
3328 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3330 delta_ap: rust_decimal::Decimal::ZERO,
3331 delta_inventory: rust_decimal::Decimal::ZERO,
3332 capex,
3333 debt_issuance: rust_decimal::Decimal::ZERO,
3334 debt_repayment: rust_decimal::Decimal::ZERO,
3335 interest_paid,
3336 tax_paid,
3337 dividends_paid,
3338 framework: framework_str.to_string(),
3339 };
3340
3341 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3342 if !enhanced_cf_items.is_empty() {
3343 use datasynth_core::models::StatementType;
3345 let merge_count = enhanced_cf_items.len();
3346 for stmt in financial_reporting
3347 .financial_statements
3348 .iter_mut()
3349 .chain(financial_reporting.consolidated_statements.iter_mut())
3350 .chain(
3351 financial_reporting
3352 .standalone_statements
3353 .values_mut()
3354 .flat_map(|v| v.iter_mut()),
3355 )
3356 {
3357 if stmt.statement_type == StatementType::CashFlowStatement {
3358 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3359 }
3360 }
3361 info!(
3362 "Enhanced cash flow: {} supplementary items merged into CF statements",
3363 merge_count
3364 );
3365 }
3366 }
3367
3368 self.generate_notes_to_financial_statements(
3371 &mut financial_reporting,
3372 &accounting_standards,
3373 &tax,
3374 &hr,
3375 &audit,
3376 &treasury,
3377 );
3378
3379 if self.config.companies.len() >= 2 && !entries.is_empty() {
3383 let companies: Vec<(String, String)> = self
3384 .config
3385 .companies
3386 .iter()
3387 .map(|c| (c.code.clone(), c.name.clone()))
3388 .collect();
3389 let ic_elim: rust_decimal::Decimal =
3390 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3391 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3392 .unwrap_or(NaiveDate::MIN);
3393 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3394 let period_label = format!(
3395 "{}-{:02}",
3396 end_date.year(),
3397 (end_date - chrono::Days::new(1)).month()
3398 );
3399
3400 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3401 let (je_segments, je_recon) =
3402 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3403 if !je_segments.is_empty() {
3404 info!(
3405 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3406 je_segments.len(),
3407 ic_elim,
3408 );
3409 if financial_reporting.segment_reports.is_empty() {
3411 financial_reporting.segment_reports = je_segments;
3412 financial_reporting.segment_reconciliations = vec![je_recon];
3413 } else {
3414 financial_reporting.segment_reports.extend(je_segments);
3415 financial_reporting.segment_reconciliations.push(je_recon);
3416 }
3417 }
3418 }
3419
3420 let esg_snap =
3422 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3423
3424 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3426
3427 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3429
3430 let disruption_events = self.phase_disruption_events(&mut stats)?;
3432
3433 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3435
3436 let (entity_relationship_graph, cross_process_links) =
3438 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3439
3440 let industry_output = self.phase_industry_data(&mut stats);
3442
3443 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3445
3446 if self.config.diffusion.enabled
3464 && (self.config.diffusion.backend == "neural"
3465 || self.config.diffusion.backend == "hybrid")
3466 {
3467 let neural = &self.config.diffusion.neural;
3468 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3469 stats.neural_hybrid_weight = Some(weight);
3470 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3471 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3472 warn!(
3473 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3474 the neural/hybrid training path is not yet shipped. Config \
3475 is captured in stats (weight={weight:.2}, strategy={}, \
3476 columns={}) but no neural training runs. Statistical \
3477 diffusion (backend='statistical') continues to work.",
3478 self.config.diffusion.backend,
3479 neural.hybrid_strategy,
3480 neural.neural_columns.len(),
3481 );
3482 }
3483
3484 self.phase_hypergraph_export(
3486 &coa,
3487 &entries,
3488 &document_flows,
3489 &sourcing,
3490 &hr,
3491 &manufacturing_snap,
3492 &banking,
3493 &audit,
3494 &financial_reporting,
3495 &ocpm,
3496 &compliance_regulations,
3497 &mut stats,
3498 )?;
3499
3500 if self.phase_config.generate_graph_export {
3503 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3504 }
3505
3506 if self.config.streaming.enabled {
3508 info!("Note: streaming config is enabled but batch mode does not use it");
3509 }
3510 if self.config.vendor_network.enabled {
3511 debug!("Vendor network config available; relationship graph generation is partial");
3512 }
3513 if self.config.customer_segmentation.enabled {
3514 debug!("Customer segmentation config available; segment-aware generation is partial");
3515 }
3516
3517 let resource_stats = self.resource_guard.stats();
3519 info!(
3520 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3521 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3522 resource_stats.disk.estimated_bytes_written,
3523 resource_stats.degradation_level
3524 );
3525
3526 if let Some(ref sink) = self.phase_sink {
3528 if let Err(e) = sink.flush() {
3529 warn!("Stream sink flush failed: {e}");
3530 }
3531 }
3532
3533 let lineage = self.build_lineage_graph();
3535
3536 let gate_result = if self.config.quality_gates.enabled {
3538 let profile_name = &self.config.quality_gates.profile;
3539 match datasynth_eval::gates::get_profile(profile_name) {
3540 Some(profile) => {
3541 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3543
3544 if balance_validation.validated {
3546 eval.coherence.balance =
3547 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3548 equation_balanced: balance_validation.is_balanced,
3549 max_imbalance: (balance_validation.total_debits
3550 - balance_validation.total_credits)
3551 .abs(),
3552 periods_evaluated: 1,
3553 periods_imbalanced: if balance_validation.is_balanced {
3554 0
3555 } else {
3556 1
3557 },
3558 period_results: Vec::new(),
3559 companies_evaluated: self.config.companies.len(),
3560 });
3561 }
3562
3563 eval.coherence.passes = balance_validation.is_balanced;
3565 if !balance_validation.is_balanced {
3566 eval.coherence
3567 .failures
3568 .push("Balance sheet equation not satisfied".to_string());
3569 }
3570
3571 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3573 eval.statistical.passes = !entries.is_empty();
3574
3575 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3578
3579 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3580 info!(
3581 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3582 profile_name, result.gates_passed, result.gates_total, result.summary
3583 );
3584 Some(result)
3585 }
3586 None => {
3587 warn!(
3588 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3589 profile_name
3590 );
3591 None
3592 }
3593 }
3594 } else {
3595 None
3596 };
3597
3598 let internal_controls = if self.config.internal_controls.enabled {
3600 InternalControl::standard_controls()
3601 } else {
3602 Vec::new()
3603 };
3604
3605 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3609
3610 let statistical_validation = self.phase_statistical_validation(&entries)?;
3615
3616 let interconnectivity = self.phase_interconnectivity();
3620
3621 Ok(EnhancedGenerationResult {
3622 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3623 master_data: std::mem::take(&mut self.master_data),
3624 document_flows,
3625 subledger,
3626 ocpm,
3627 audit,
3628 banking,
3629 graph_export,
3630 sourcing,
3631 financial_reporting,
3632 hr,
3633 accounting_standards,
3634 manufacturing: manufacturing_snap,
3635 sales_kpi_budgets,
3636 tax,
3637 esg: esg_snap,
3638 treasury,
3639 project_accounting,
3640 process_evolution,
3641 organizational_events,
3642 disruption_events,
3643 intercompany,
3644 journal_entries: entries,
3645 anomaly_labels,
3646 balance_validation,
3647 data_quality_stats,
3648 quality_issues,
3649 statistics: stats,
3650 lineage: Some(lineage),
3651 gate_result,
3652 internal_controls,
3653 sod_violations,
3654 opening_balances,
3655 subledger_reconciliation,
3656 counterfactual_pairs,
3657 red_flags,
3658 collusion_rings,
3659 temporal_vendor_chains,
3660 entity_relationship_graph,
3661 cross_process_links,
3662 industry_output,
3663 compliance_regulations,
3664 analytics_metadata,
3665 statistical_validation,
3666 interconnectivity,
3667 })
3668 }
3669
3670 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3674 use rand::{RngExt, SeedableRng};
3675 use rand_chacha::ChaCha8Rng;
3676
3677 let mut snap = InterconnectivitySnapshot::default();
3678 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3679
3680 let vn = &self.config.vendor_network;
3682 if vn.enabled {
3683 let total = self.master_data.vendors.len();
3684 if total > 0 {
3685 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3686 let remaining_after_t1 = total.saturating_sub(tier1_count);
3687 let depth = vn.depth.clamp(1, 3);
3688 let tier2_count = if depth >= 2 {
3689 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3690 (tier1_count * avg).min(remaining_after_t1)
3691 } else {
3692 0
3693 };
3694 let tier3_count = total
3695 .saturating_sub(tier1_count)
3696 .saturating_sub(tier2_count);
3697
3698 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3699 let tier = if idx < tier1_count {
3700 1
3701 } else if idx < tier1_count + tier2_count {
3702 2
3703 } else {
3704 3
3705 };
3706 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3707
3708 let cl = &vn.clusters;
3710 let roll: f64 = rng.random();
3711 let cluster = if roll < cl.reliable_strategic {
3712 "reliable_strategic"
3713 } else if roll < cl.reliable_strategic + cl.standard_operational {
3714 "standard_operational"
3715 } else if roll
3716 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3717 {
3718 "transactional"
3719 } else {
3720 "problematic"
3721 };
3722 snap.vendor_clusters
3723 .push((vendor.vendor_id.clone(), cluster.to_string()));
3724 }
3725 let _ = tier3_count; }
3727 }
3728
3729 let cs = &self.config.customer_segmentation;
3731 if cs.enabled {
3732 let seg = &cs.value_segments;
3733 for customer in &self.master_data.customers {
3734 let roll: f64 = rng.random();
3735 let value_segment = if roll < seg.enterprise.customer_share {
3736 "enterprise"
3737 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3738 "mid_market"
3739 } else if roll
3740 < seg.enterprise.customer_share
3741 + seg.mid_market.customer_share
3742 + seg.smb.customer_share
3743 {
3744 "smb"
3745 } else {
3746 "consumer"
3747 };
3748 snap.customer_value_segments
3749 .push((customer.customer_id.clone(), value_segment.to_string()));
3750
3751 let roll2: f64 = rng.random();
3752 let life = &cs.lifecycle;
3753 let lifecycle = if roll2 < life.prospect_rate {
3754 "prospect"
3755 } else if roll2 < life.prospect_rate + life.new_rate {
3756 "new"
3757 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3758 "growth"
3759 } else if roll2
3760 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3761 {
3762 "mature"
3763 } else if roll2
3764 < life.prospect_rate
3765 + life.new_rate
3766 + life.growth_rate
3767 + life.mature_rate
3768 + life.at_risk_rate
3769 {
3770 "at_risk"
3771 } else if roll2
3772 < life.prospect_rate
3773 + life.new_rate
3774 + life.growth_rate
3775 + life.mature_rate
3776 + life.at_risk_rate
3777 + life.churned_rate
3778 {
3779 "churned"
3780 } else {
3781 "won_back"
3782 };
3783 snap.customer_lifecycle_stages
3784 .push((customer.customer_id.clone(), lifecycle.to_string()));
3785 }
3786 }
3787
3788 let is = &self.config.industry_specific;
3790 if is.enabled {
3791 snap.industry_metadata.push(format!(
3792 "industry_specific.enabled=true (industry={:?})",
3793 self.config.global.industry
3794 ));
3795 }
3796
3797 snap
3798 }
3799
3800 fn phase_chart_of_accounts(
3806 &mut self,
3807 stats: &mut EnhancedGenerationStatistics,
3808 ) -> SynthResult<Arc<ChartOfAccounts>> {
3809 info!("Phase 1: Generating Chart of Accounts");
3810 let coa = self.generate_coa()?;
3811 stats.accounts_count = coa.account_count();
3812 info!(
3813 "Chart of Accounts generated: {} accounts",
3814 stats.accounts_count
3815 );
3816 self.check_resources_with_log("post-coa")?;
3817 Ok(coa)
3818 }
3819
3820 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3822 if self.phase_config.generate_master_data {
3823 info!("Phase 2: Generating Master Data");
3824 self.generate_master_data()?;
3825 stats.vendor_count = self.master_data.vendors.len();
3826 stats.customer_count = self.master_data.customers.len();
3827 stats.material_count = self.master_data.materials.len();
3828 stats.asset_count = self.master_data.assets.len();
3829 stats.employee_count = self.master_data.employees.len();
3830 info!(
3831 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3832 stats.vendor_count, stats.customer_count, stats.material_count,
3833 stats.asset_count, stats.employee_count
3834 );
3835 self.check_resources_with_log("post-master-data")?;
3836 } else {
3837 debug!("Phase 2: Skipped (master data generation disabled)");
3838 }
3839 Ok(())
3840 }
3841
3842 fn phase_document_flows(
3844 &mut self,
3845 stats: &mut EnhancedGenerationStatistics,
3846 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3847 let mut document_flows = DocumentFlowSnapshot::default();
3848 let mut subledger = SubledgerSnapshot::default();
3849 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3852
3853 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3854 info!("Phase 3: Generating Document Flows");
3855 self.generate_document_flows(&mut document_flows)?;
3856 stats.p2p_chain_count = document_flows.p2p_chains.len();
3857 stats.o2c_chain_count = document_flows.o2c_chains.len();
3858 info!(
3859 "Document flows generated: {} P2P chains, {} O2C chains",
3860 stats.p2p_chain_count, stats.o2c_chain_count
3861 );
3862
3863 debug!("Phase 3b: Linking document flows to subledgers");
3865 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3866 stats.ap_invoice_count = subledger.ap_invoices.len();
3867 stats.ar_invoice_count = subledger.ar_invoices.len();
3868 debug!(
3869 "Subledgers linked: {} AP invoices, {} AR invoices",
3870 stats.ap_invoice_count, stats.ar_invoice_count
3871 );
3872
3873 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3878 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3879 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3880 debug!("Payment settlements applied to AP and AR subledgers");
3881
3882 if let Ok(start_date) =
3885 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3886 {
3887 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3888 - chrono::Days::new(1);
3889 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3890 for company in &self.config.companies {
3897 let ar_report = ARAgingReport::from_invoices(
3898 company.code.clone(),
3899 &subledger.ar_invoices,
3900 as_of_date,
3901 );
3902 subledger.ar_aging_reports.push(ar_report);
3903
3904 let ap_report = APAgingReport::from_invoices(
3905 company.code.clone(),
3906 &subledger.ap_invoices,
3907 as_of_date,
3908 );
3909 subledger.ap_aging_reports.push(ap_report);
3910 }
3911 debug!(
3912 "AR/AP aging reports built: {} AR, {} AP",
3913 subledger.ar_aging_reports.len(),
3914 subledger.ap_aging_reports.len()
3915 );
3916
3917 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3919 {
3920 use datasynth_generators::DunningGenerator;
3921 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3922 for company in &self.config.companies {
3923 let currency = company.currency.as_str();
3924 let mut company_invoices: Vec<
3927 datasynth_core::models::subledger::ar::ARInvoice,
3928 > = subledger
3929 .ar_invoices
3930 .iter()
3931 .filter(|inv| inv.company_code == company.code)
3932 .cloned()
3933 .collect();
3934
3935 if company_invoices.is_empty() {
3936 continue;
3937 }
3938
3939 let result = dunning_gen.execute_dunning_run(
3940 &company.code,
3941 as_of_date,
3942 &mut company_invoices,
3943 currency,
3944 );
3945
3946 for updated in &company_invoices {
3948 if let Some(orig) = subledger
3949 .ar_invoices
3950 .iter_mut()
3951 .find(|i| i.invoice_number == updated.invoice_number)
3952 {
3953 orig.dunning_info = updated.dunning_info.clone();
3954 }
3955 }
3956
3957 subledger.dunning_runs.push(result.dunning_run);
3958 subledger.dunning_letters.extend(result.letters);
3959 dunning_journal_entries.extend(result.journal_entries);
3961 }
3962 debug!(
3963 "Dunning runs complete: {} runs, {} letters",
3964 subledger.dunning_runs.len(),
3965 subledger.dunning_letters.len()
3966 );
3967 }
3968 }
3969
3970 self.check_resources_with_log("post-document-flows")?;
3971 } else {
3972 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3973 }
3974
3975 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3977 if !self.master_data.assets.is_empty() {
3978 debug!("Generating FA subledger records");
3979 let company_code = self
3980 .config
3981 .companies
3982 .first()
3983 .map(|c| c.code.as_str())
3984 .unwrap_or("1000");
3985 let currency = self
3986 .config
3987 .companies
3988 .first()
3989 .map(|c| c.currency.as_str())
3990 .unwrap_or("USD");
3991
3992 let mut fa_gen = datasynth_generators::FAGenerator::new(
3993 datasynth_generators::FAGeneratorConfig::default(),
3994 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3995 );
3996
3997 for asset in &self.master_data.assets {
3998 let (record, je) = fa_gen.generate_asset_acquisition(
3999 company_code,
4000 &format!("{:?}", asset.asset_class),
4001 &asset.description,
4002 asset.acquisition_date,
4003 currency,
4004 asset.cost_center.as_deref(),
4005 );
4006 subledger.fa_records.push(record);
4007 fa_journal_entries.push(je);
4008 }
4009
4010 stats.fa_subledger_count = subledger.fa_records.len();
4011 debug!(
4012 "FA subledger records generated: {} (with {} acquisition JEs)",
4013 stats.fa_subledger_count,
4014 fa_journal_entries.len()
4015 );
4016 }
4017
4018 if !self.master_data.materials.is_empty() {
4020 debug!("Generating Inventory subledger records");
4021 let first_company = self.config.companies.first();
4022 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4023 let inv_currency = first_company
4024 .map(|c| c.currency.clone())
4025 .unwrap_or_else(|| "USD".to_string());
4026
4027 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4028 datasynth_generators::InventoryGeneratorConfig::default(),
4029 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4030 inv_currency.clone(),
4031 );
4032
4033 for (i, material) in self.master_data.materials.iter().enumerate() {
4034 let plant = format!("PLANT{:02}", (i % 3) + 1);
4035 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4036 let initial_qty = rust_decimal::Decimal::from(
4037 material
4038 .safety_stock
4039 .to_string()
4040 .parse::<i64>()
4041 .unwrap_or(100),
4042 );
4043
4044 let position = inv_gen.generate_position(
4045 company_code,
4046 &plant,
4047 &storage_loc,
4048 &material.material_id,
4049 &material.description,
4050 initial_qty,
4051 Some(material.standard_cost),
4052 &inv_currency,
4053 );
4054 subledger.inventory_positions.push(position);
4055 }
4056
4057 stats.inventory_subledger_count = subledger.inventory_positions.len();
4058 debug!(
4059 "Inventory subledger records generated: {}",
4060 stats.inventory_subledger_count
4061 );
4062 }
4063
4064 if !subledger.fa_records.is_empty() {
4066 if let Ok(start_date) =
4067 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4068 {
4069 let company_code = self
4070 .config
4071 .companies
4072 .first()
4073 .map(|c| c.code.as_str())
4074 .unwrap_or("1000");
4075 let fiscal_year = start_date.year();
4076 let start_period = start_date.month();
4077 let end_period =
4078 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4079
4080 let depr_cfg = FaDepreciationScheduleConfig {
4081 fiscal_year,
4082 start_period,
4083 end_period,
4084 seed_offset: 800,
4085 };
4086 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4087 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4088 let run_count = runs.len();
4089 subledger.depreciation_runs = runs;
4090 debug!(
4091 "Depreciation runs generated: {} runs for {} periods",
4092 run_count, self.config.global.period_months
4093 );
4094 }
4095 }
4096
4097 if !subledger.inventory_positions.is_empty() {
4099 if let Ok(start_date) =
4100 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4101 {
4102 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4103 - chrono::Days::new(1);
4104
4105 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4106 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4107
4108 for company in &self.config.companies {
4109 let result = inv_val_gen.generate(
4110 &company.code,
4111 &subledger.inventory_positions,
4112 as_of_date,
4113 );
4114 subledger.inventory_valuations.push(result);
4115 }
4116 debug!(
4117 "Inventory valuations generated: {} company reports",
4118 subledger.inventory_valuations.len()
4119 );
4120 }
4121 }
4122
4123 Ok((document_flows, subledger, fa_journal_entries))
4124 }
4125
4126 #[allow(clippy::too_many_arguments)]
4128 fn phase_ocpm_events(
4129 &mut self,
4130 document_flows: &DocumentFlowSnapshot,
4131 sourcing: &SourcingSnapshot,
4132 hr: &HrSnapshot,
4133 manufacturing: &ManufacturingSnapshot,
4134 banking: &BankingSnapshot,
4135 audit: &AuditSnapshot,
4136 financial_reporting: &FinancialReportingSnapshot,
4137 stats: &mut EnhancedGenerationStatistics,
4138 ) -> SynthResult<OcpmSnapshot> {
4139 let degradation = self.check_resources()?;
4140 if degradation >= DegradationLevel::Reduced {
4141 debug!(
4142 "Phase skipped due to resource pressure (degradation: {:?})",
4143 degradation
4144 );
4145 return Ok(OcpmSnapshot::default());
4146 }
4147 if self.phase_config.generate_ocpm_events {
4148 info!("Phase 3c: Generating OCPM Events");
4149 let ocpm_snapshot = self.generate_ocpm_events(
4150 document_flows,
4151 sourcing,
4152 hr,
4153 manufacturing,
4154 banking,
4155 audit,
4156 financial_reporting,
4157 )?;
4158 stats.ocpm_event_count = ocpm_snapshot.event_count;
4159 stats.ocpm_object_count = ocpm_snapshot.object_count;
4160 stats.ocpm_case_count = ocpm_snapshot.case_count;
4161 info!(
4162 "OCPM events generated: {} events, {} objects, {} cases",
4163 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4164 );
4165 self.check_resources_with_log("post-ocpm")?;
4166 Ok(ocpm_snapshot)
4167 } else {
4168 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4169 Ok(OcpmSnapshot::default())
4170 }
4171 }
4172
4173 fn phase_journal_entries(
4175 &mut self,
4176 coa: &Arc<ChartOfAccounts>,
4177 document_flows: &DocumentFlowSnapshot,
4178 _stats: &mut EnhancedGenerationStatistics,
4179 ) -> SynthResult<Vec<JournalEntry>> {
4180 let mut entries = Vec::new();
4181
4182 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4184 debug!("Phase 4a: Generating JEs from document flows");
4185 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4186 debug!("Generated {} JEs from document flows", flow_entries.len());
4187 entries.extend(flow_entries);
4188 }
4189
4190 if self.phase_config.generate_journal_entries {
4192 info!("Phase 4: Generating Journal Entries");
4193 let je_entries = self.generate_journal_entries(coa)?;
4194 info!("Generated {} standalone journal entries", je_entries.len());
4195 entries.extend(je_entries);
4196 } else {
4197 debug!("Phase 4: Skipped (journal entry generation disabled)");
4198 }
4199
4200 if !entries.is_empty() {
4201 self.check_resources_with_log("post-journal-entries")?;
4204 }
4205
4206 Ok(entries)
4207 }
4208
4209 fn phase_anomaly_injection(
4211 &mut self,
4212 entries: &mut [JournalEntry],
4213 actions: &DegradationActions,
4214 stats: &mut EnhancedGenerationStatistics,
4215 ) -> SynthResult<AnomalyLabels> {
4216 if self.phase_config.inject_anomalies
4217 && !entries.is_empty()
4218 && !actions.skip_anomaly_injection
4219 {
4220 info!("Phase 5: Injecting Anomalies");
4221 let result = self.inject_anomalies(entries)?;
4222 stats.anomalies_injected = result.labels.len();
4223 info!("Injected {} anomalies", stats.anomalies_injected);
4224 self.check_resources_with_log("post-anomaly-injection")?;
4225 Ok(result)
4226 } else if actions.skip_anomaly_injection {
4227 warn!("Phase 5: Skipped due to resource degradation");
4228 Ok(AnomalyLabels::default())
4229 } else {
4230 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4231 Ok(AnomalyLabels::default())
4232 }
4233 }
4234
4235 fn phase_balance_validation(
4237 &mut self,
4238 entries: &[JournalEntry],
4239 ) -> SynthResult<BalanceValidationResult> {
4240 if self.phase_config.validate_balances && !entries.is_empty() {
4241 debug!("Phase 6: Validating Balances");
4242 let balance_validation = self.validate_journal_entries(entries)?;
4243 if balance_validation.is_balanced {
4244 debug!("Balance validation passed");
4245 } else {
4246 warn!(
4247 "Balance validation found {} errors",
4248 balance_validation.validation_errors.len()
4249 );
4250 }
4251 Ok(balance_validation)
4252 } else {
4253 Ok(BalanceValidationResult::default())
4254 }
4255 }
4256
4257 fn phase_data_quality_injection(
4259 &mut self,
4260 entries: &mut [JournalEntry],
4261 actions: &DegradationActions,
4262 stats: &mut EnhancedGenerationStatistics,
4263 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4264 if self.phase_config.inject_data_quality
4265 && !entries.is_empty()
4266 && !actions.skip_data_quality
4267 {
4268 info!("Phase 7: Injecting Data Quality Variations");
4269 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4270 stats.data_quality_issues = dq_stats.records_with_issues;
4271 info!("Injected {} data quality issues", stats.data_quality_issues);
4272 self.check_resources_with_log("post-data-quality")?;
4273 Ok((dq_stats, quality_issues))
4274 } else if actions.skip_data_quality {
4275 warn!("Phase 7: Skipped due to resource degradation");
4276 Ok((stats_with_denominator(entries.len()), Vec::new()))
4280 } else {
4281 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4282 Ok((stats_with_denominator(entries.len()), Vec::new()))
4283 }
4284 }
4285
4286 fn phase_period_close(
4296 &mut self,
4297 entries: &mut Vec<JournalEntry>,
4298 subledger: &SubledgerSnapshot,
4299 stats: &mut EnhancedGenerationStatistics,
4300 ) -> SynthResult<()> {
4301 if !self.phase_config.generate_period_close || entries.is_empty() {
4302 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4303 return Ok(());
4304 }
4305
4306 info!("Phase 10b: Generating period-close journal entries");
4307
4308 use datasynth_core::accounts::{
4309 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4310 };
4311 use rust_decimal::Decimal;
4312
4313 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4314 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4315 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4316 let close_date = end_date - chrono::Days::new(1);
4318
4319 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4324 .config
4325 .companies
4326 .iter()
4327 .map(|c| c.code.clone())
4328 .collect();
4329
4330 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4332 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4333
4334 let period_months = self.config.global.period_months;
4338 for asset in &subledger.fa_records {
4339 use datasynth_core::models::subledger::fa::AssetStatus;
4341 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4342 continue;
4343 }
4344 let useful_life_months = asset.useful_life_months();
4345 if useful_life_months == 0 {
4346 continue;
4348 }
4349 let salvage_value = asset.salvage_value();
4350 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4351 if depreciable_base == Decimal::ZERO {
4352 continue;
4353 }
4354 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4355 * Decimal::from(period_months))
4356 .round_dp(2);
4357 if period_depr <= Decimal::ZERO {
4358 continue;
4359 }
4360
4361 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4362 depr_header.document_type = "CL".to_string();
4363 depr_header.header_text = Some(format!(
4364 "Depreciation - {} {}",
4365 asset.asset_number, asset.description
4366 ));
4367 depr_header.created_by = "CLOSE_ENGINE".to_string();
4368 depr_header.source = TransactionSource::Automated;
4369 depr_header.business_process = Some(BusinessProcess::R2R);
4370
4371 let doc_id = depr_header.document_id;
4372 let mut depr_je = JournalEntry::new(depr_header);
4373
4374 depr_je.add_line(JournalEntryLine::debit(
4376 doc_id,
4377 1,
4378 expense_accounts::DEPRECIATION.to_string(),
4379 period_depr,
4380 ));
4381 depr_je.add_line(JournalEntryLine::credit(
4383 doc_id,
4384 2,
4385 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4386 period_depr,
4387 ));
4388
4389 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4390 close_jes.push(depr_je);
4391 }
4392
4393 if !subledger.fa_records.is_empty() {
4394 debug!(
4395 "Generated {} depreciation JEs from {} FA records",
4396 close_jes.len(),
4397 subledger.fa_records.len()
4398 );
4399 }
4400
4401 {
4405 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4406 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4407 if let Some(ctx) = &self.temporal_context {
4410 accrual_gen.set_temporal_context(Arc::clone(ctx));
4411 }
4412
4413 let accrual_items: &[(&str, &str, &str)] = &[
4415 ("Accrued Utilities", "6200", "2100"),
4416 ("Accrued Rent", "6300", "2100"),
4417 ("Accrued Interest", "6100", "2150"),
4418 ];
4419
4420 for company_code in &company_codes {
4421 let company_revenue: Decimal = entries
4423 .iter()
4424 .filter(|e| e.header.company_code == *company_code)
4425 .flat_map(|e| e.lines.iter())
4426 .filter(|l| l.gl_account.starts_with('4'))
4427 .map(|l| l.credit_amount - l.debit_amount)
4428 .fold(Decimal::ZERO, |acc, v| acc + v);
4429
4430 if company_revenue <= Decimal::ZERO {
4431 continue;
4432 }
4433
4434 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4436 if accrual_base <= Decimal::ZERO {
4437 continue;
4438 }
4439
4440 for (description, expense_acct, liability_acct) in accrual_items {
4441 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4442 company_code,
4443 description,
4444 accrual_base,
4445 expense_acct,
4446 liability_acct,
4447 close_date,
4448 None,
4449 );
4450 close_jes.push(accrual_je);
4451 if let Some(rev_je) = reversal_je {
4452 close_jes.push(rev_je);
4453 }
4454 }
4455 }
4456
4457 debug!(
4458 "Generated accrual entries for {} companies",
4459 company_codes.len()
4460 );
4461 }
4462
4463 for company_code in &company_codes {
4464 let mut total_revenue = Decimal::ZERO;
4469 let mut total_expenses = Decimal::ZERO;
4470
4471 for entry in entries.iter() {
4472 if entry.header.company_code != *company_code {
4473 continue;
4474 }
4475 for line in &entry.lines {
4476 let category = AccountCategory::from_account(&line.gl_account);
4477 match category {
4478 AccountCategory::Revenue => {
4479 total_revenue += line.credit_amount - line.debit_amount;
4481 }
4482 AccountCategory::Cogs
4483 | AccountCategory::OperatingExpense
4484 | AccountCategory::OtherIncomeExpense
4485 | AccountCategory::Tax => {
4486 total_expenses += line.debit_amount - line.credit_amount;
4488 }
4489 _ => {}
4490 }
4491 }
4492 }
4493
4494 let pre_tax_income = total_revenue - total_expenses;
4495
4496 if pre_tax_income == Decimal::ZERO {
4498 debug!(
4499 "Company {}: no pre-tax income, skipping period close",
4500 company_code
4501 );
4502 continue;
4503 }
4504
4505 if pre_tax_income > Decimal::ZERO {
4507 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4509
4510 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4511 tax_header.document_type = "CL".to_string();
4512 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4513 tax_header.created_by = "CLOSE_ENGINE".to_string();
4514 tax_header.source = TransactionSource::Automated;
4515 tax_header.business_process = Some(BusinessProcess::R2R);
4516
4517 let doc_id = tax_header.document_id;
4518 let mut tax_je = JournalEntry::new(tax_header);
4519
4520 tax_je.add_line(JournalEntryLine::debit(
4522 doc_id,
4523 1,
4524 tax_accounts::TAX_EXPENSE.to_string(),
4525 tax_amount,
4526 ));
4527 tax_je.add_line(JournalEntryLine::credit(
4529 doc_id,
4530 2,
4531 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4532 tax_amount,
4533 ));
4534
4535 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4536 close_jes.push(tax_je);
4537 } else {
4538 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4541 if dta_amount > Decimal::ZERO {
4542 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4543 dta_header.document_type = "CL".to_string();
4544 dta_header.header_text =
4545 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4546 dta_header.created_by = "CLOSE_ENGINE".to_string();
4547 dta_header.source = TransactionSource::Automated;
4548 dta_header.business_process = Some(BusinessProcess::R2R);
4549
4550 let doc_id = dta_header.document_id;
4551 let mut dta_je = JournalEntry::new(dta_header);
4552
4553 dta_je.add_line(JournalEntryLine::debit(
4555 doc_id,
4556 1,
4557 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4558 dta_amount,
4559 ));
4560 dta_je.add_line(JournalEntryLine::credit(
4563 doc_id,
4564 2,
4565 tax_accounts::TAX_EXPENSE.to_string(),
4566 dta_amount,
4567 ));
4568
4569 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4570 close_jes.push(dta_je);
4571 debug!(
4572 "Company {}: loss year — recognised DTA of {}",
4573 company_code, dta_amount
4574 );
4575 }
4576 }
4577
4578 let tax_provision = if pre_tax_income > Decimal::ZERO {
4584 (pre_tax_income * tax_rate).round_dp(2)
4585 } else {
4586 Decimal::ZERO
4587 };
4588 let net_income = pre_tax_income - tax_provision;
4589
4590 if net_income > Decimal::ZERO {
4591 use datasynth_generators::DividendGenerator;
4592 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4594 let currency_str = self
4595 .config
4596 .companies
4597 .iter()
4598 .find(|c| c.code == *company_code)
4599 .map(|c| c.currency.as_str())
4600 .unwrap_or("USD");
4601 let div_result = div_gen.generate(
4602 company_code,
4603 close_date,
4604 Decimal::new(1, 0), dividend_amount,
4606 currency_str,
4607 );
4608 let div_je_count = div_result.journal_entries.len();
4609 close_jes.extend(div_result.journal_entries);
4610 debug!(
4611 "Company {}: declared dividend of {} ({} JEs)",
4612 company_code, dividend_amount, div_je_count
4613 );
4614 }
4615
4616 if net_income != Decimal::ZERO {
4621 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4622 close_header.document_type = "CL".to_string();
4623 close_header.header_text =
4624 Some(format!("Income statement close - {}", company_code));
4625 close_header.created_by = "CLOSE_ENGINE".to_string();
4626 close_header.source = TransactionSource::Automated;
4627 close_header.business_process = Some(BusinessProcess::R2R);
4628
4629 let doc_id = close_header.document_id;
4630 let mut close_je = JournalEntry::new(close_header);
4631
4632 let abs_net_income = net_income.abs();
4633
4634 if net_income > Decimal::ZERO {
4635 close_je.add_line(JournalEntryLine::debit(
4637 doc_id,
4638 1,
4639 equity_accounts::INCOME_SUMMARY.to_string(),
4640 abs_net_income,
4641 ));
4642 close_je.add_line(JournalEntryLine::credit(
4643 doc_id,
4644 2,
4645 equity_accounts::RETAINED_EARNINGS.to_string(),
4646 abs_net_income,
4647 ));
4648 } else {
4649 close_je.add_line(JournalEntryLine::debit(
4651 doc_id,
4652 1,
4653 equity_accounts::RETAINED_EARNINGS.to_string(),
4654 abs_net_income,
4655 ));
4656 close_je.add_line(JournalEntryLine::credit(
4657 doc_id,
4658 2,
4659 equity_accounts::INCOME_SUMMARY.to_string(),
4660 abs_net_income,
4661 ));
4662 }
4663
4664 debug_assert!(
4665 close_je.is_balanced(),
4666 "Income statement closing JE must be balanced"
4667 );
4668 close_jes.push(close_je);
4669 }
4670 }
4671
4672 let close_count = close_jes.len();
4673 if close_count > 0 {
4674 info!("Generated {} period-close journal entries", close_count);
4675 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4676 entries.extend(close_jes);
4677 stats.period_close_je_count = close_count;
4678
4679 stats.total_entries = entries.len() as u64;
4681 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4682 } else {
4683 debug!("No period-close entries generated (no income statement activity)");
4684 }
4685
4686 Ok(())
4687 }
4688
4689 fn phase_audit_data(
4691 &mut self,
4692 entries: &[JournalEntry],
4693 stats: &mut EnhancedGenerationStatistics,
4694 ) -> SynthResult<AuditSnapshot> {
4695 if self.phase_config.generate_audit {
4696 info!("Phase 8: Generating Audit Data");
4697 let audit_snapshot = self.generate_audit_data(entries)?;
4698 stats.audit_engagement_count = audit_snapshot.engagements.len();
4699 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4700 stats.audit_evidence_count = audit_snapshot.evidence.len();
4701 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4702 stats.audit_finding_count = audit_snapshot.findings.len();
4703 stats.audit_judgment_count = audit_snapshot.judgments.len();
4704 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4705 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4706 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4707 stats.audit_sample_count = audit_snapshot.samples.len();
4708 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4709 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4710 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4711 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4712 stats.audit_related_party_transaction_count =
4713 audit_snapshot.related_party_transactions.len();
4714 info!(
4715 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4716 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4717 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4718 {} RP transactions",
4719 stats.audit_engagement_count,
4720 stats.audit_workpaper_count,
4721 stats.audit_evidence_count,
4722 stats.audit_risk_count,
4723 stats.audit_finding_count,
4724 stats.audit_judgment_count,
4725 stats.audit_confirmation_count,
4726 stats.audit_procedure_step_count,
4727 stats.audit_sample_count,
4728 stats.audit_analytical_result_count,
4729 stats.audit_ia_function_count,
4730 stats.audit_ia_report_count,
4731 stats.audit_related_party_count,
4732 stats.audit_related_party_transaction_count,
4733 );
4734 self.check_resources_with_log("post-audit")?;
4735 Ok(audit_snapshot)
4736 } else {
4737 debug!("Phase 8: Skipped (audit generation disabled)");
4738 Ok(AuditSnapshot::default())
4739 }
4740 }
4741
4742 fn phase_banking_data(
4744 &mut self,
4745 stats: &mut EnhancedGenerationStatistics,
4746 ) -> SynthResult<BankingSnapshot> {
4747 if self.phase_config.generate_banking {
4748 info!("Phase 9: Generating Banking KYC/AML Data");
4749 let banking_snapshot = self.generate_banking_data()?;
4750 stats.banking_customer_count = banking_snapshot.customers.len();
4751 stats.banking_account_count = banking_snapshot.accounts.len();
4752 stats.banking_transaction_count = banking_snapshot.transactions.len();
4753 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4754 info!(
4755 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4756 stats.banking_customer_count, stats.banking_account_count,
4757 stats.banking_transaction_count, stats.banking_suspicious_count
4758 );
4759 self.check_resources_with_log("post-banking")?;
4760 Ok(banking_snapshot)
4761 } else {
4762 debug!("Phase 9: Skipped (banking generation disabled)");
4763 Ok(BankingSnapshot::default())
4764 }
4765 }
4766
4767 fn phase_graph_export(
4769 &mut self,
4770 entries: &[JournalEntry],
4771 coa: &Arc<ChartOfAccounts>,
4772 stats: &mut EnhancedGenerationStatistics,
4773 ) -> SynthResult<GraphExportSnapshot> {
4774 if self.phase_config.generate_graph_export && !entries.is_empty() {
4775 info!("Phase 10: Exporting Accounting Network Graphs");
4776 match self.export_graphs(entries, coa, stats) {
4777 Ok(snapshot) => {
4778 info!(
4779 "Graph export complete: {} graphs ({} nodes, {} edges)",
4780 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4781 );
4782 Ok(snapshot)
4783 }
4784 Err(e) => {
4785 warn!("Phase 10: Graph export failed: {}", e);
4786 Ok(GraphExportSnapshot::default())
4787 }
4788 }
4789 } else {
4790 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4791 Ok(GraphExportSnapshot::default())
4792 }
4793 }
4794
4795 #[allow(clippy::too_many_arguments)]
4797 fn phase_hypergraph_export(
4798 &self,
4799 coa: &Arc<ChartOfAccounts>,
4800 entries: &[JournalEntry],
4801 document_flows: &DocumentFlowSnapshot,
4802 sourcing: &SourcingSnapshot,
4803 hr: &HrSnapshot,
4804 manufacturing: &ManufacturingSnapshot,
4805 banking: &BankingSnapshot,
4806 audit: &AuditSnapshot,
4807 financial_reporting: &FinancialReportingSnapshot,
4808 ocpm: &OcpmSnapshot,
4809 compliance: &ComplianceRegulationsSnapshot,
4810 stats: &mut EnhancedGenerationStatistics,
4811 ) -> SynthResult<()> {
4812 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4813 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4814 match self.export_hypergraph(
4815 coa,
4816 entries,
4817 document_flows,
4818 sourcing,
4819 hr,
4820 manufacturing,
4821 banking,
4822 audit,
4823 financial_reporting,
4824 ocpm,
4825 compliance,
4826 stats,
4827 ) {
4828 Ok(info) => {
4829 info!(
4830 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4831 info.node_count, info.edge_count, info.hyperedge_count
4832 );
4833 }
4834 Err(e) => {
4835 warn!("Phase 10b: Hypergraph export failed: {}", e);
4836 }
4837 }
4838 } else {
4839 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4840 }
4841 Ok(())
4842 }
4843
4844 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4850 if !self.config.llm.enabled {
4851 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4852 return;
4853 }
4854
4855 info!("Phase 11: Starting LLM Enrichment");
4856 let start = std::time::Instant::now();
4857
4858 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4859 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4862 let schema_provider = &self.config.llm.provider;
4863 let api_key_env = match schema_provider.as_str() {
4864 "openai" => Some("OPENAI_API_KEY"),
4865 "anthropic" => Some("ANTHROPIC_API_KEY"),
4866 "custom" => Some("LLM_API_KEY"),
4867 _ => None,
4868 };
4869 if let Some(key_env) = api_key_env {
4870 if std::env::var(key_env).is_ok() {
4871 let llm_config = datasynth_core::llm::LlmConfig {
4872 model: self.config.llm.model.clone(),
4873 api_key_env: key_env.to_string(),
4874 ..datasynth_core::llm::LlmConfig::default()
4875 };
4876 match HttpLlmProvider::new(llm_config) {
4877 Ok(p) => Arc::new(p),
4878 Err(e) => {
4879 warn!(
4880 "Failed to create HttpLlmProvider: {}; falling back to mock",
4881 e
4882 );
4883 Arc::new(MockLlmProvider::new(self.seed))
4884 }
4885 }
4886 } else {
4887 Arc::new(MockLlmProvider::new(self.seed))
4888 }
4889 } else {
4890 Arc::new(MockLlmProvider::new(self.seed))
4891 }
4892 };
4893 let industry = format!("{:?}", self.config.global.industry);
4897
4898 let vendor_enricher =
4899 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
4900 let max_vendors = self
4901 .config
4902 .llm
4903 .max_vendor_enrichments
4904 .min(self.master_data.vendors.len());
4905 let mut vendors_enriched = 0usize;
4906 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
4907 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4908 Ok(name) => {
4909 vendor.name = name;
4910 vendors_enriched += 1;
4911 }
4912 Err(e) => warn!(
4913 "LLM vendor enrichment failed for {}: {}",
4914 vendor.vendor_id, e
4915 ),
4916 }
4917 }
4918
4919 let mut customers_enriched = 0usize;
4920 if self.config.llm.enrich_customers {
4921 let customer_enricher =
4922 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
4923 &provider,
4924 ));
4925 let max_customers = self
4926 .config
4927 .llm
4928 .max_customer_enrichments
4929 .min(self.master_data.customers.len());
4930 for customer in self.master_data.customers.iter_mut().take(max_customers) {
4931 match customer_enricher.enrich_customer_name(
4932 &industry,
4933 "general",
4934 &customer.country,
4935 ) {
4936 Ok(name) => {
4937 customer.name = name;
4938 customers_enriched += 1;
4939 }
4940 Err(e) => warn!(
4941 "LLM customer enrichment failed for {}: {}",
4942 customer.customer_id, e
4943 ),
4944 }
4945 }
4946 }
4947
4948 let mut materials_enriched = 0usize;
4949 if self.config.llm.enrich_materials {
4950 let material_enricher =
4951 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
4952 &provider,
4953 ));
4954 let max_materials = self
4955 .config
4956 .llm
4957 .max_material_enrichments
4958 .min(self.master_data.materials.len());
4959 for material in self.master_data.materials.iter_mut().take(max_materials) {
4960 let material_type = format!("{:?}", material.material_type);
4961 match material_enricher.enrich_material_description(&material_type, &industry) {
4962 Ok(desc) => {
4963 material.description = desc;
4964 materials_enriched += 1;
4965 }
4966 Err(e) => warn!(
4967 "LLM material enrichment failed for {}: {}",
4968 material.material_id, e
4969 ),
4970 }
4971 }
4972 }
4973
4974 (vendors_enriched, customers_enriched, materials_enriched)
4975 }));
4976
4977 match result {
4978 Ok((v, c, m)) => {
4979 stats.llm_vendors_enriched = v;
4980 stats.llm_customers_enriched = c;
4981 stats.llm_materials_enriched = m;
4982 let elapsed = start.elapsed();
4983 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4984 info!(
4985 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
4986 v, c, m, stats.llm_enrichment_ms
4987 );
4988 }
4989 Err(_) => {
4990 let elapsed = start.elapsed();
4991 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4992 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4993 }
4994 }
4995 }
4996
4997 fn phase_diffusion_enhancement(
5009 &self,
5010 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5011 stats: &mut EnhancedGenerationStatistics,
5012 ) {
5013 if !self.config.diffusion.enabled {
5014 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5015 return;
5016 }
5017
5018 info!("Phase 12: Starting Diffusion Enhancement");
5019 let start = std::time::Instant::now();
5020
5021 let backend_choice = self.config.diffusion.backend.as_str();
5022 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5023
5024 if use_neural {
5025 #[cfg(feature = "neural")]
5026 {
5027 match self.run_neural_diffusion_phase(entries) {
5028 Ok(sample_count) => {
5029 stats.diffusion_samples_generated = sample_count;
5030 let elapsed = start.elapsed();
5031 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5032 info!(
5033 "Phase 12 complete ({}): {} samples in {}ms",
5034 backend_choice, sample_count, stats.diffusion_enhancement_ms
5035 );
5036 return;
5037 }
5038 Err(e) => {
5039 warn!(
5040 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5041 );
5042 }
5044 }
5045 }
5046 #[cfg(not(feature = "neural"))]
5047 {
5048 warn!(
5049 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5050 not compiled in — falling back to statistical. Rebuild with \
5051 `--features neural` (or `neural-cuda` for GPU) to enable.",
5052 backend_choice
5053 );
5054 }
5055 } else if !matches!(backend_choice, "statistical" | "") {
5056 warn!(
5057 "Phase 12: unknown backend '{}', falling back to statistical",
5058 backend_choice
5059 );
5060 }
5061
5062 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5064 let means = vec![5000.0, 3.0, 2.0];
5065 let stds = vec![2000.0, 1.5, 1.0];
5066
5067 let diffusion_config = DiffusionConfig {
5068 n_steps: self.config.diffusion.n_steps,
5069 seed: self.seed,
5070 ..Default::default()
5071 };
5072
5073 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5074 let n_samples = self.config.diffusion.sample_size;
5075 let n_features = 3;
5076 backend.generate(n_samples, n_features, self.seed).len()
5077 }));
5078
5079 match result {
5080 Ok(sample_count) => {
5081 stats.diffusion_samples_generated = sample_count;
5082 let elapsed = start.elapsed();
5083 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5084 info!(
5085 "Phase 12 complete (statistical): {} samples in {}ms",
5086 sample_count, stats.diffusion_enhancement_ms
5087 );
5088 }
5089 Err(_) => {
5090 let elapsed = start.elapsed();
5091 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5092 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5093 }
5094 }
5095 }
5096
5097 #[cfg(feature = "neural")]
5102 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5103 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5104
5105 if entries.is_empty() {
5106 return Err(SynthError::generation(
5107 "neural diffusion: no journal entries available as training data",
5108 ));
5109 }
5110
5111 let training_data: Vec<Vec<f64>> = entries
5112 .iter()
5113 .take(5000)
5114 .map(|je| {
5115 let total_amount: f64 = je
5116 .lines
5117 .iter()
5118 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5119 .map(|l| {
5120 use rust_decimal::prelude::ToPrimitive;
5121 l.debit_amount.to_f64().unwrap_or(0.0)
5122 })
5123 .sum();
5124 let line_count = je.lines.len() as f64;
5125 let approval_level = je
5128 .header
5129 .approval_workflow
5130 .as_ref()
5131 .map(|w| w.required_levels as f64)
5132 .unwrap_or(1.0);
5133 vec![total_amount, line_count, approval_level]
5134 })
5135 .collect();
5136
5137 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5138
5139 let cfg = &self.config.diffusion;
5140 let neural_cfg = &cfg.neural;
5141
5142 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5143 neural_cfg.checkpoint_path.as_ref()
5144 {
5145 let path = std::path::Path::new(ckpt_path);
5146 info!(
5147 " Neural diffusion: loading checkpoint from {}",
5148 path.display()
5149 );
5150 NeuralDiffusionBackend::load(path)
5151 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5152 } else {
5153 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5154 info!(
5155 " Neural diffusion: training score network on {} rows × {} features, \
5156 {} epochs, hidden_dims={:?}",
5157 training_data.len(),
5158 n_features,
5159 neural_cfg.training_epochs,
5160 neural_cfg.hidden_dims
5161 );
5162 let training_config = NeuralTrainingConfig {
5163 n_steps: cfg.n_steps,
5164 schedule: cfg.schedule.clone(),
5165 hidden_dims: neural_cfg.hidden_dims.clone(),
5166 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5167 learning_rate: neural_cfg.learning_rate,
5168 epochs: neural_cfg.training_epochs,
5169 batch_size: neural_cfg.batch_size,
5170 };
5171 let (backend, report) =
5172 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5173 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5174 info!(
5175 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5176 report.epochs_completed, report.final_loss
5177 );
5178 backend
5179 };
5180
5181 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5182 Ok(samples.len())
5183 }
5184
5185 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5192 if !self.config.causal.enabled {
5193 debug!("Phase 13: Skipped (causal generation disabled)");
5194 return;
5195 }
5196
5197 info!("Phase 13: Starting Causal Overlay");
5198 let start = std::time::Instant::now();
5199
5200 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5201 let graph = match self.config.causal.template.as_str() {
5203 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5204 _ => CausalGraph::fraud_detection_template(),
5205 };
5206
5207 let scm = StructuralCausalModel::new(graph.clone())
5208 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5209
5210 let n_samples = self.config.causal.sample_size;
5211 let samples = scm
5212 .generate(n_samples, self.seed)
5213 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5214
5215 let validation_passed = if self.config.causal.validate {
5217 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5218 if report.valid {
5219 info!(
5220 "Causal validation passed: all {} checks OK",
5221 report.checks.len()
5222 );
5223 } else {
5224 warn!(
5225 "Causal validation: {} violations detected: {:?}",
5226 report.violations.len(),
5227 report.violations
5228 );
5229 }
5230 Some(report.valid)
5231 } else {
5232 None
5233 };
5234
5235 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5236 }));
5237
5238 match result {
5239 Ok(Ok((sample_count, validation_passed))) => {
5240 stats.causal_samples_generated = sample_count;
5241 stats.causal_validation_passed = validation_passed;
5242 let elapsed = start.elapsed();
5243 stats.causal_generation_ms = elapsed.as_millis() as u64;
5244 info!(
5245 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5246 sample_count, stats.causal_generation_ms, validation_passed,
5247 );
5248 }
5249 Ok(Err(e)) => {
5250 let elapsed = start.elapsed();
5251 stats.causal_generation_ms = elapsed.as_millis() as u64;
5252 warn!("Phase 13: Causal generation failed: {}", e);
5253 }
5254 Err(_) => {
5255 let elapsed = start.elapsed();
5256 stats.causal_generation_ms = elapsed.as_millis() as u64;
5257 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5258 }
5259 }
5260 }
5261
5262 fn phase_sourcing_data(
5264 &mut self,
5265 stats: &mut EnhancedGenerationStatistics,
5266 ) -> SynthResult<SourcingSnapshot> {
5267 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5268 debug!("Phase 14: Skipped (sourcing generation disabled)");
5269 return Ok(SourcingSnapshot::default());
5270 }
5271 let degradation = self.check_resources()?;
5272 if degradation >= DegradationLevel::Reduced {
5273 debug!(
5274 "Phase skipped due to resource pressure (degradation: {:?})",
5275 degradation
5276 );
5277 return Ok(SourcingSnapshot::default());
5278 }
5279
5280 info!("Phase 14: Generating S2C Sourcing Data");
5281 let seed = self.seed;
5282
5283 let vendor_ids: Vec<String> = self
5285 .master_data
5286 .vendors
5287 .iter()
5288 .map(|v| v.vendor_id.clone())
5289 .collect();
5290 if vendor_ids.is_empty() {
5291 debug!("Phase 14: Skipped (no vendors available)");
5292 return Ok(SourcingSnapshot::default());
5293 }
5294
5295 let categories: Vec<(String, String)> = vec![
5296 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5297 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5298 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5299 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5300 ("CAT-LOG".to_string(), "Logistics".to_string()),
5301 ];
5302 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5303 .iter()
5304 .map(|(id, name)| {
5305 (
5306 id.clone(),
5307 name.clone(),
5308 rust_decimal::Decimal::from(100_000),
5309 )
5310 })
5311 .collect();
5312
5313 let company_code = self
5314 .config
5315 .companies
5316 .first()
5317 .map(|c| c.code.as_str())
5318 .unwrap_or("1000");
5319 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5320 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5321 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5322 let fiscal_year = start_date.year() as u16;
5323 let owner_ids: Vec<String> = self
5324 .master_data
5325 .employees
5326 .iter()
5327 .take(5)
5328 .map(|e| e.employee_id.clone())
5329 .collect();
5330 let owner_id = owner_ids
5331 .first()
5332 .map(std::string::String::as_str)
5333 .unwrap_or("BUYER-001");
5334
5335 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5337 let spend_analyses =
5338 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5339
5340 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5342 let sourcing_projects = if owner_ids.is_empty() {
5343 Vec::new()
5344 } else {
5345 project_gen.generate(
5346 company_code,
5347 &categories_with_spend,
5348 &owner_ids,
5349 start_date,
5350 self.config.global.period_months,
5351 )
5352 };
5353 stats.sourcing_project_count = sourcing_projects.len();
5354
5355 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5357 let mut qual_gen = QualificationGenerator::new(seed + 2);
5358 let qualifications = qual_gen.generate(
5359 company_code,
5360 &qual_vendor_ids,
5361 sourcing_projects.first().map(|p| p.project_id.as_str()),
5362 owner_id,
5363 start_date,
5364 );
5365
5366 let mut rfx_gen = RfxGenerator::new(seed + 3);
5368 let rfx_events: Vec<RfxEvent> = sourcing_projects
5369 .iter()
5370 .map(|proj| {
5371 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5372 rfx_gen.generate(
5373 company_code,
5374 &proj.project_id,
5375 &proj.category_id,
5376 &qualified_vids,
5377 owner_id,
5378 start_date,
5379 50000.0,
5380 )
5381 })
5382 .collect();
5383 stats.rfx_event_count = rfx_events.len();
5384
5385 let mut bid_gen = BidGenerator::new(seed + 4);
5387 let mut all_bids = Vec::new();
5388 for rfx in &rfx_events {
5389 let bidder_count = vendor_ids.len().clamp(2, 5);
5390 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5391 let bids = bid_gen.generate(rfx, &responding, start_date);
5392 all_bids.extend(bids);
5393 }
5394 stats.bid_count = all_bids.len();
5395
5396 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5398 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5399 .iter()
5400 .map(|rfx| {
5401 let rfx_bids: Vec<SupplierBid> = all_bids
5402 .iter()
5403 .filter(|b| b.rfx_id == rfx.rfx_id)
5404 .cloned()
5405 .collect();
5406 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5407 })
5408 .collect();
5409
5410 let mut contract_gen = ContractGenerator::new(seed + 6);
5412 let contracts: Vec<ProcurementContract> = bid_evaluations
5413 .iter()
5414 .zip(rfx_events.iter())
5415 .filter_map(|(eval, rfx)| {
5416 eval.ranked_bids.first().and_then(|winner| {
5417 all_bids
5418 .iter()
5419 .find(|b| b.bid_id == winner.bid_id)
5420 .map(|winning_bid| {
5421 contract_gen.generate_from_bid(
5422 winning_bid,
5423 Some(&rfx.sourcing_project_id),
5424 &rfx.category_id,
5425 owner_id,
5426 start_date,
5427 )
5428 })
5429 })
5430 })
5431 .collect();
5432 stats.contract_count = contracts.len();
5433
5434 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5436 let catalog_items = catalog_gen.generate(&contracts);
5437 stats.catalog_item_count = catalog_items.len();
5438
5439 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5441 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5442 .iter()
5443 .fold(
5444 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5445 |mut acc, c| {
5446 acc.entry(c.vendor_id.clone()).or_default().push(c);
5447 acc
5448 },
5449 )
5450 .into_iter()
5451 .collect();
5452 let scorecards = scorecard_gen.generate(
5453 company_code,
5454 &vendor_contracts,
5455 start_date,
5456 end_date,
5457 owner_id,
5458 );
5459 stats.scorecard_count = scorecards.len();
5460
5461 let mut sourcing_projects = sourcing_projects;
5464 for project in &mut sourcing_projects {
5465 project.rfx_ids = rfx_events
5467 .iter()
5468 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5469 .map(|rfx| rfx.rfx_id.clone())
5470 .collect();
5471
5472 project.contract_id = contracts
5474 .iter()
5475 .find(|c| {
5476 c.sourcing_project_id
5477 .as_deref()
5478 .is_some_and(|sp| sp == project.project_id)
5479 })
5480 .map(|c| c.contract_id.clone());
5481
5482 project.spend_analysis_id = spend_analyses
5484 .iter()
5485 .find(|sa| sa.category_id == project.category_id)
5486 .map(|sa| sa.category_id.clone());
5487 }
5488
5489 info!(
5490 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5491 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5492 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5493 );
5494 self.check_resources_with_log("post-sourcing")?;
5495
5496 Ok(SourcingSnapshot {
5497 spend_analyses,
5498 sourcing_projects,
5499 qualifications,
5500 rfx_events,
5501 bids: all_bids,
5502 bid_evaluations,
5503 contracts,
5504 catalog_items,
5505 scorecards,
5506 })
5507 }
5508
5509 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5515 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5516
5517 let parent_code = self
5518 .config
5519 .companies
5520 .first()
5521 .map(|c| c.code.clone())
5522 .unwrap_or_else(|| "PARENT".to_string());
5523
5524 let mut group = GroupStructure::new(parent_code);
5525
5526 for company in self.config.companies.iter().skip(1) {
5527 let sub =
5528 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5529 group.add_subsidiary(sub);
5530 }
5531
5532 group
5533 }
5534
5535 fn phase_intercompany(
5537 &mut self,
5538 journal_entries: &[JournalEntry],
5539 stats: &mut EnhancedGenerationStatistics,
5540 ) -> SynthResult<IntercompanySnapshot> {
5541 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5543 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5544 return Ok(IntercompanySnapshot::default());
5545 }
5546
5547 if self.config.companies.len() < 2 {
5549 debug!(
5550 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5551 self.config.companies.len()
5552 );
5553 return Ok(IntercompanySnapshot::default());
5554 }
5555
5556 info!("Phase 14b: Generating Intercompany Transactions");
5557
5558 let group_structure = self.build_group_structure();
5561 debug!(
5562 "Group structure built: parent={}, subsidiaries={}",
5563 group_structure.parent_entity,
5564 group_structure.subsidiaries.len()
5565 );
5566
5567 let seed = self.seed;
5568 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5569 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5570 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5571
5572 let parent_code = self.config.companies[0].code.clone();
5575 let mut ownership_structure =
5576 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5577
5578 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5579 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5580 format!("REL{:03}", i + 1),
5581 parent_code.clone(),
5582 company.code.clone(),
5583 rust_decimal::Decimal::from(100), start_date,
5585 );
5586 ownership_structure.add_relationship(relationship);
5587 }
5588
5589 let tp_method = match self.config.intercompany.transfer_pricing_method {
5591 datasynth_config::schema::TransferPricingMethod::CostPlus => {
5592 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5593 }
5594 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5595 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5596 }
5597 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5598 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5599 }
5600 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5601 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5602 }
5603 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5604 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5605 }
5606 };
5607
5608 let ic_currency = self
5610 .config
5611 .companies
5612 .first()
5613 .map(|c| c.currency.clone())
5614 .unwrap_or_else(|| "USD".to_string());
5615 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5616 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5617 transfer_pricing_method: tp_method,
5618 markup_percent: rust_decimal::Decimal::from_f64_retain(
5619 self.config.intercompany.markup_percent,
5620 )
5621 .unwrap_or(rust_decimal::Decimal::from(5)),
5622 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5623 default_currency: ic_currency,
5624 ..Default::default()
5625 };
5626
5627 let mut ic_generator = datasynth_generators::ICGenerator::new(
5629 ic_gen_config,
5630 ownership_structure.clone(),
5631 seed + 50,
5632 );
5633
5634 let transactions_per_day = 3;
5637 let matched_pairs = ic_generator.generate_transactions_for_period(
5638 start_date,
5639 end_date,
5640 transactions_per_day,
5641 );
5642
5643 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5645 debug!(
5646 "Generated {} IC seller invoices, {} IC buyer POs",
5647 ic_doc_chains.seller_invoices.len(),
5648 ic_doc_chains.buyer_orders.len()
5649 );
5650
5651 let mut seller_entries = Vec::new();
5653 let mut buyer_entries = Vec::new();
5654 let fiscal_year = start_date.year();
5655
5656 for pair in &matched_pairs {
5657 let fiscal_period = pair.posting_date.month();
5658 let (seller_je, buyer_je) =
5659 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5660 seller_entries.push(seller_je);
5661 buyer_entries.push(buyer_je);
5662 }
5663
5664 let matching_config = datasynth_generators::ICMatchingConfig {
5666 base_currency: self
5667 .config
5668 .companies
5669 .first()
5670 .map(|c| c.currency.clone())
5671 .unwrap_or_else(|| "USD".to_string()),
5672 ..Default::default()
5673 };
5674 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5675 matching_engine.load_matched_pairs(&matched_pairs);
5676 let matching_result = matching_engine.run_matching(end_date);
5677
5678 let mut elimination_entries = Vec::new();
5680 if self.config.intercompany.generate_eliminations {
5681 let elim_config = datasynth_generators::EliminationConfig {
5682 consolidation_entity: "GROUP".to_string(),
5683 base_currency: self
5684 .config
5685 .companies
5686 .first()
5687 .map(|c| c.currency.clone())
5688 .unwrap_or_else(|| "USD".to_string()),
5689 ..Default::default()
5690 };
5691
5692 let mut elim_generator =
5693 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5694
5695 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5696 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5697 matching_result
5698 .matched_balances
5699 .iter()
5700 .chain(matching_result.unmatched_balances.iter())
5701 .cloned()
5702 .collect();
5703
5704 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5716 std::collections::HashMap::new();
5717 let mut equity_amounts: std::collections::HashMap<
5718 String,
5719 std::collections::HashMap<String, rust_decimal::Decimal>,
5720 > = std::collections::HashMap::new();
5721 {
5722 use rust_decimal::Decimal;
5723 let hundred = Decimal::from(100u32);
5724 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5728 for sub in &group_structure.subsidiaries {
5729 let net_assets = {
5730 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5731 if na > Decimal::ZERO {
5732 na
5733 } else {
5734 Decimal::from(1_000_000u64)
5735 }
5736 };
5737 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5739 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5740
5741 let mut eq_map = std::collections::HashMap::new();
5744 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5745 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5746 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5747 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5748 }
5749 }
5750
5751 let journal = elim_generator.generate_eliminations(
5752 &fiscal_period,
5753 end_date,
5754 &all_balances,
5755 &matched_pairs,
5756 &investment_amounts,
5757 &equity_amounts,
5758 );
5759
5760 elimination_entries = journal.entries.clone();
5761 }
5762
5763 let matched_pair_count = matched_pairs.len();
5764 let elimination_entry_count = elimination_entries.len();
5765 let match_rate = matching_result.match_rate;
5766
5767 stats.ic_matched_pair_count = matched_pair_count;
5768 stats.ic_elimination_count = elimination_entry_count;
5769 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5770
5771 info!(
5772 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5773 matched_pair_count,
5774 stats.ic_transaction_count,
5775 seller_entries.len(),
5776 buyer_entries.len(),
5777 elimination_entry_count,
5778 match_rate * 100.0
5779 );
5780 self.check_resources_with_log("post-intercompany")?;
5781
5782 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5786 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5787 use rust_decimal::Decimal;
5788
5789 let eight_pct = Decimal::new(8, 2); group_structure
5792 .subsidiaries
5793 .iter()
5794 .filter(|sub| {
5795 sub.nci_percentage > Decimal::ZERO
5796 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5797 })
5798 .map(|sub| {
5799 let net_assets_from_jes =
5803 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5804
5805 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5806 net_assets_from_jes.round_dp(2)
5807 } else {
5808 Decimal::from(1_000_000u64)
5810 };
5811
5812 let net_income = (net_assets * eight_pct).round_dp(2);
5814
5815 NciMeasurement::compute(
5816 sub.entity_code.clone(),
5817 sub.nci_percentage,
5818 net_assets,
5819 net_income,
5820 )
5821 })
5822 .collect()
5823 };
5824
5825 if !nci_measurements.is_empty() {
5826 info!(
5827 "NCI measurements: {} subsidiaries with non-controlling interests",
5828 nci_measurements.len()
5829 );
5830 }
5831
5832 Ok(IntercompanySnapshot {
5833 group_structure: Some(group_structure),
5834 matched_pairs,
5835 seller_journal_entries: seller_entries,
5836 buyer_journal_entries: buyer_entries,
5837 elimination_entries,
5838 nci_measurements,
5839 ic_document_chains: Some(ic_doc_chains),
5840 matched_pair_count,
5841 elimination_entry_count,
5842 match_rate,
5843 })
5844 }
5845
5846 fn phase_financial_reporting(
5848 &mut self,
5849 document_flows: &DocumentFlowSnapshot,
5850 journal_entries: &[JournalEntry],
5851 coa: &Arc<ChartOfAccounts>,
5852 _hr: &HrSnapshot,
5853 _audit: &AuditSnapshot,
5854 stats: &mut EnhancedGenerationStatistics,
5855 ) -> SynthResult<FinancialReportingSnapshot> {
5856 let fs_enabled = self.phase_config.generate_financial_statements
5857 || self.config.financial_reporting.enabled;
5858 let br_enabled = self.phase_config.generate_bank_reconciliation;
5859
5860 if !fs_enabled && !br_enabled {
5861 debug!("Phase 15: Skipped (financial reporting disabled)");
5862 return Ok(FinancialReportingSnapshot::default());
5863 }
5864
5865 info!("Phase 15: Generating Financial Reporting Data");
5866
5867 let seed = self.seed;
5868 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5869 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5870
5871 let mut financial_statements = Vec::new();
5872 let mut bank_reconciliations = Vec::new();
5873 let mut trial_balances = Vec::new();
5874 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5875 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5876 Vec::new();
5877 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5879 std::collections::HashMap::new();
5880 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5882 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5884
5885 if fs_enabled {
5893 let has_journal_entries = !journal_entries.is_empty();
5894
5895 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5898 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5900
5901 let elimination_entries: Vec<&JournalEntry> = journal_entries
5903 .iter()
5904 .filter(|je| je.header.is_elimination)
5905 .collect();
5906
5907 for period in 0..self.config.global.period_months {
5909 let period_start = start_date + chrono::Months::new(period);
5910 let period_end =
5911 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5912 let fiscal_year = period_end.year() as u16;
5913 let fiscal_period = period_end.month() as u8;
5914 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5915
5916 let mut entity_tb_map: std::collections::HashMap<
5919 String,
5920 std::collections::HashMap<String, rust_decimal::Decimal>,
5921 > = std::collections::HashMap::new();
5922
5923 for (company_idx, company) in self.config.companies.iter().enumerate() {
5925 let company_code = company.code.as_str();
5926 let currency = company.currency.as_str();
5927 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5930 let mut company_fs_gen =
5931 FinancialStatementGenerator::new(seed + company_seed_offset);
5932
5933 if has_journal_entries {
5934 let tb_entries = Self::build_cumulative_trial_balance(
5935 journal_entries,
5936 coa,
5937 company_code,
5938 start_date,
5939 period_end,
5940 fiscal_year,
5941 fiscal_period,
5942 );
5943
5944 let entity_cat_map =
5946 entity_tb_map.entry(company_code.to_string()).or_default();
5947 for tb_entry in &tb_entries {
5948 let net = tb_entry.debit_balance - tb_entry.credit_balance;
5949 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5950 }
5951
5952 let stmts = company_fs_gen.generate(
5953 company_code,
5954 currency,
5955 &tb_entries,
5956 period_start,
5957 period_end,
5958 fiscal_year,
5959 fiscal_period,
5960 None,
5961 "SYS-AUTOCLOSE",
5962 );
5963
5964 let mut entity_stmts = Vec::new();
5965 for stmt in stmts {
5966 if stmt.statement_type == StatementType::CashFlowStatement {
5967 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5968 let cf_items = Self::build_cash_flow_from_trial_balances(
5969 &tb_entries,
5970 None,
5971 net_income,
5972 );
5973 entity_stmts.push(FinancialStatement {
5974 cash_flow_items: cf_items,
5975 ..stmt
5976 });
5977 } else {
5978 entity_stmts.push(stmt);
5979 }
5980 }
5981
5982 financial_statements.extend(entity_stmts.clone());
5984
5985 standalone_statements
5987 .entry(company_code.to_string())
5988 .or_default()
5989 .extend(entity_stmts);
5990
5991 if company_idx == 0 {
5994 trial_balances.push(PeriodTrialBalance {
5995 fiscal_year,
5996 fiscal_period,
5997 period_start,
5998 period_end,
5999 entries: tb_entries,
6000 });
6001 }
6002 } else {
6003 let tb_entries = Self::build_trial_balance_from_entries(
6005 journal_entries,
6006 coa,
6007 company_code,
6008 fiscal_year,
6009 fiscal_period,
6010 );
6011
6012 let stmts = company_fs_gen.generate(
6013 company_code,
6014 currency,
6015 &tb_entries,
6016 period_start,
6017 period_end,
6018 fiscal_year,
6019 fiscal_period,
6020 None,
6021 "SYS-AUTOCLOSE",
6022 );
6023 financial_statements.extend(stmts.clone());
6024 standalone_statements
6025 .entry(company_code.to_string())
6026 .or_default()
6027 .extend(stmts);
6028
6029 if company_idx == 0 && !tb_entries.is_empty() {
6030 trial_balances.push(PeriodTrialBalance {
6031 fiscal_year,
6032 fiscal_period,
6033 period_start,
6034 period_end,
6035 entries: tb_entries,
6036 });
6037 }
6038 }
6039 }
6040
6041 let group_currency = self
6044 .config
6045 .companies
6046 .first()
6047 .map(|c| c.currency.as_str())
6048 .unwrap_or("USD");
6049
6050 let period_eliminations: Vec<JournalEntry> = elimination_entries
6052 .iter()
6053 .filter(|je| {
6054 je.header.fiscal_year == fiscal_year
6055 && je.header.fiscal_period == fiscal_period
6056 })
6057 .map(|je| (*je).clone())
6058 .collect();
6059
6060 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6061 &entity_tb_map,
6062 &period_eliminations,
6063 &period_label,
6064 );
6065
6066 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6069 .line_items
6070 .iter()
6071 .map(|li| {
6072 let net = li.post_elimination_total;
6073 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6074 (net, rust_decimal::Decimal::ZERO)
6075 } else {
6076 (rust_decimal::Decimal::ZERO, -net)
6077 };
6078 datasynth_generators::TrialBalanceEntry {
6079 account_code: li.account_category.clone(),
6080 account_name: li.account_category.clone(),
6081 category: li.account_category.clone(),
6082 debit_balance: debit,
6083 credit_balance: credit,
6084 }
6085 })
6086 .collect();
6087
6088 let mut cons_stmts = cons_gen.generate(
6089 "GROUP",
6090 group_currency,
6091 &cons_tb,
6092 period_start,
6093 period_end,
6094 fiscal_year,
6095 fiscal_period,
6096 None,
6097 "SYS-AUTOCLOSE",
6098 );
6099
6100 let bs_categories: &[&str] = &[
6104 "CASH",
6105 "RECEIVABLES",
6106 "INVENTORY",
6107 "FIXEDASSETS",
6108 "PAYABLES",
6109 "ACCRUEDLIABILITIES",
6110 "LONGTERMDEBT",
6111 "EQUITY",
6112 ];
6113 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6114 cons_line_items.into_iter().partition(|li| {
6115 let upper = li.label.to_uppercase();
6116 bs_categories.iter().any(|c| upper == *c)
6117 });
6118
6119 for stmt in &mut cons_stmts {
6120 stmt.is_consolidated = true;
6121 match stmt.statement_type {
6122 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6123 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6124 _ => {} }
6126 }
6127
6128 consolidated_statements.extend(cons_stmts);
6129 consolidation_schedules.push(schedule);
6130 }
6131
6132 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6138 info!(
6139 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6140 stats.financial_statement_count,
6141 consolidated_statements.len(),
6142 has_journal_entries
6143 );
6144
6145 let entity_seeds: Vec<SegmentSeed> = self
6150 .config
6151 .companies
6152 .iter()
6153 .map(|c| SegmentSeed {
6154 code: c.code.clone(),
6155 name: c.name.clone(),
6156 currency: c.currency.clone(),
6157 })
6158 .collect();
6159
6160 let mut seg_gen = SegmentGenerator::new(seed + 30);
6161
6162 for period in 0..self.config.global.period_months {
6167 let period_end =
6168 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6169 let fiscal_year = period_end.year() as u16;
6170 let fiscal_period = period_end.month() as u8;
6171 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6172
6173 use datasynth_core::models::StatementType;
6174
6175 let cons_is = consolidated_statements.iter().find(|s| {
6177 s.fiscal_year == fiscal_year
6178 && s.fiscal_period == fiscal_period
6179 && s.statement_type == StatementType::IncomeStatement
6180 });
6181 let cons_bs = consolidated_statements.iter().find(|s| {
6182 s.fiscal_year == fiscal_year
6183 && s.fiscal_period == fiscal_period
6184 && s.statement_type == StatementType::BalanceSheet
6185 });
6186
6187 let is_stmt = cons_is.or_else(|| {
6189 financial_statements.iter().find(|s| {
6190 s.fiscal_year == fiscal_year
6191 && s.fiscal_period == fiscal_period
6192 && s.statement_type == StatementType::IncomeStatement
6193 })
6194 });
6195 let bs_stmt = cons_bs.or_else(|| {
6196 financial_statements.iter().find(|s| {
6197 s.fiscal_year == fiscal_year
6198 && s.fiscal_period == fiscal_period
6199 && s.statement_type == StatementType::BalanceSheet
6200 })
6201 });
6202
6203 let consolidated_revenue = is_stmt
6204 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6205 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6207
6208 let consolidated_profit = is_stmt
6209 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6210 .map(|li| li.amount)
6211 .unwrap_or(rust_decimal::Decimal::ZERO);
6212
6213 let consolidated_assets = bs_stmt
6214 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6215 .map(|li| li.amount)
6216 .unwrap_or(rust_decimal::Decimal::ZERO);
6217
6218 if consolidated_revenue == rust_decimal::Decimal::ZERO
6220 && consolidated_assets == rust_decimal::Decimal::ZERO
6221 {
6222 continue;
6223 }
6224
6225 let group_code = self
6226 .config
6227 .companies
6228 .first()
6229 .map(|c| c.code.as_str())
6230 .unwrap_or("GROUP");
6231
6232 let total_depr: rust_decimal::Decimal = journal_entries
6235 .iter()
6236 .filter(|je| je.header.document_type == "CL")
6237 .flat_map(|je| je.lines.iter())
6238 .filter(|l| l.gl_account.starts_with("6000"))
6239 .map(|l| l.debit_amount)
6240 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6241 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6242 Some(total_depr)
6243 } else {
6244 None
6245 };
6246
6247 let (segs, recon) = seg_gen.generate(
6248 group_code,
6249 &period_label,
6250 consolidated_revenue,
6251 consolidated_profit,
6252 consolidated_assets,
6253 &entity_seeds,
6254 depr_param,
6255 );
6256 segment_reports.extend(segs);
6257 segment_reconciliations.push(recon);
6258 }
6259
6260 info!(
6261 "Segment reports generated: {} segments, {} reconciliations",
6262 segment_reports.len(),
6263 segment_reconciliations.len()
6264 );
6265 }
6266
6267 if br_enabled && !document_flows.payments.is_empty() {
6269 let employee_ids: Vec<String> = self
6270 .master_data
6271 .employees
6272 .iter()
6273 .map(|e| e.employee_id.clone())
6274 .collect();
6275 let mut br_gen =
6276 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6277
6278 for company in &self.config.companies {
6280 let company_payments: Vec<PaymentReference> = document_flows
6281 .payments
6282 .iter()
6283 .filter(|p| p.header.company_code == company.code)
6284 .map(|p| PaymentReference {
6285 id: p.header.document_id.clone(),
6286 amount: if p.is_vendor { p.amount } else { -p.amount },
6287 date: p.header.document_date,
6288 reference: p
6289 .check_number
6290 .clone()
6291 .or_else(|| p.wire_reference.clone())
6292 .unwrap_or_else(|| p.header.document_id.clone()),
6293 })
6294 .collect();
6295
6296 if company_payments.is_empty() {
6297 continue;
6298 }
6299
6300 let bank_account_id = format!("{}-MAIN", company.code);
6301
6302 for period in 0..self.config.global.period_months {
6304 let period_start = start_date + chrono::Months::new(period);
6305 let period_end =
6306 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6307
6308 let period_payments: Vec<PaymentReference> = company_payments
6309 .iter()
6310 .filter(|p| p.date >= period_start && p.date <= period_end)
6311 .cloned()
6312 .collect();
6313
6314 let recon = br_gen.generate(
6315 &company.code,
6316 &bank_account_id,
6317 period_start,
6318 period_end,
6319 &company.currency,
6320 &period_payments,
6321 );
6322 bank_reconciliations.push(recon);
6323 }
6324 }
6325 info!(
6326 "Bank reconciliations generated: {} reconciliations",
6327 bank_reconciliations.len()
6328 );
6329 }
6330
6331 stats.bank_reconciliation_count = bank_reconciliations.len();
6332 self.check_resources_with_log("post-financial-reporting")?;
6333
6334 if !trial_balances.is_empty() {
6335 info!(
6336 "Period-close trial balances captured: {} periods",
6337 trial_balances.len()
6338 );
6339 }
6340
6341 let notes_to_financial_statements = Vec::new();
6345
6346 Ok(FinancialReportingSnapshot {
6347 financial_statements,
6348 standalone_statements,
6349 consolidated_statements,
6350 consolidation_schedules,
6351 bank_reconciliations,
6352 trial_balances,
6353 segment_reports,
6354 segment_reconciliations,
6355 notes_to_financial_statements,
6356 })
6357 }
6358
6359 fn generate_notes_to_financial_statements(
6366 &self,
6367 financial_reporting: &mut FinancialReportingSnapshot,
6368 accounting_standards: &AccountingStandardsSnapshot,
6369 tax: &TaxSnapshot,
6370 hr: &HrSnapshot,
6371 audit: &AuditSnapshot,
6372 treasury: &TreasurySnapshot,
6373 ) {
6374 use datasynth_config::schema::AccountingFrameworkConfig;
6375 use datasynth_core::models::StatementType;
6376 use datasynth_generators::period_close::notes_generator::{
6377 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6378 };
6379
6380 let seed = self.seed;
6381 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6382 {
6383 Ok(d) => d,
6384 Err(_) => return,
6385 };
6386
6387 let mut notes_gen = NotesGenerator::new(seed + 4235);
6388
6389 for company in &self.config.companies {
6390 let last_period_end = start_date
6391 + chrono::Months::new(self.config.global.period_months)
6392 - chrono::Days::new(1);
6393 let fiscal_year = last_period_end.year() as u16;
6394
6395 let entity_is = financial_reporting
6397 .standalone_statements
6398 .get(&company.code)
6399 .and_then(|stmts| {
6400 stmts.iter().find(|s| {
6401 s.fiscal_year == fiscal_year
6402 && s.statement_type == StatementType::IncomeStatement
6403 })
6404 });
6405 let entity_bs = financial_reporting
6406 .standalone_statements
6407 .get(&company.code)
6408 .and_then(|stmts| {
6409 stmts.iter().find(|s| {
6410 s.fiscal_year == fiscal_year
6411 && s.statement_type == StatementType::BalanceSheet
6412 })
6413 });
6414
6415 let revenue_amount = entity_is
6417 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6418 .map(|li| li.amount);
6419 let ppe_gross = entity_bs
6420 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6421 .map(|li| li.amount);
6422
6423 let framework = match self
6424 .config
6425 .accounting_standards
6426 .framework
6427 .unwrap_or_default()
6428 {
6429 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6430 "IFRS".to_string()
6431 }
6432 _ => "US GAAP".to_string(),
6433 };
6434
6435 let (entity_dta, entity_dtl) = {
6438 let mut dta = rust_decimal::Decimal::ZERO;
6439 let mut dtl = rust_decimal::Decimal::ZERO;
6440 for rf in &tax.deferred_tax.rollforwards {
6441 if rf.entity_code == company.code {
6442 dta += rf.closing_dta;
6443 dtl += rf.closing_dtl;
6444 }
6445 }
6446 (
6447 if dta > rust_decimal::Decimal::ZERO {
6448 Some(dta)
6449 } else {
6450 None
6451 },
6452 if dtl > rust_decimal::Decimal::ZERO {
6453 Some(dtl)
6454 } else {
6455 None
6456 },
6457 )
6458 };
6459
6460 let entity_provisions: Vec<_> = accounting_standards
6463 .provisions
6464 .iter()
6465 .filter(|p| p.entity_code == company.code)
6466 .collect();
6467 let provision_count = entity_provisions.len();
6468 let total_provisions = if provision_count > 0 {
6469 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6470 } else {
6471 None
6472 };
6473
6474 let entity_pension_plan_count = hr
6476 .pension_plans
6477 .iter()
6478 .filter(|p| p.entity_code == company.code)
6479 .count();
6480 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6481 let sum: rust_decimal::Decimal = hr
6482 .pension_disclosures
6483 .iter()
6484 .filter(|d| {
6485 hr.pension_plans
6486 .iter()
6487 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6488 })
6489 .map(|d| d.net_pension_liability)
6490 .sum();
6491 let plan_assets_sum: rust_decimal::Decimal = hr
6492 .pension_plan_assets
6493 .iter()
6494 .filter(|a| {
6495 hr.pension_plans
6496 .iter()
6497 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6498 })
6499 .map(|a| a.fair_value_closing)
6500 .sum();
6501 if entity_pension_plan_count > 0 {
6502 Some(sum + plan_assets_sum)
6503 } else {
6504 None
6505 }
6506 };
6507 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6508 let sum: rust_decimal::Decimal = hr
6509 .pension_plan_assets
6510 .iter()
6511 .filter(|a| {
6512 hr.pension_plans
6513 .iter()
6514 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6515 })
6516 .map(|a| a.fair_value_closing)
6517 .sum();
6518 if entity_pension_plan_count > 0 {
6519 Some(sum)
6520 } else {
6521 None
6522 }
6523 };
6524
6525 let rp_count = audit.related_party_transactions.len();
6528 let se_count = audit.subsequent_events.len();
6529 let adjusting_count = audit
6530 .subsequent_events
6531 .iter()
6532 .filter(|e| {
6533 matches!(
6534 e.classification,
6535 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6536 )
6537 })
6538 .count();
6539
6540 let ctx = NotesGeneratorContext {
6541 entity_code: company.code.clone(),
6542 framework,
6543 period: format!("FY{}", fiscal_year),
6544 period_end: last_period_end,
6545 currency: company.currency.clone(),
6546 revenue_amount,
6547 total_ppe_gross: ppe_gross,
6548 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6549 deferred_tax_asset: entity_dta,
6551 deferred_tax_liability: entity_dtl,
6552 provision_count,
6554 total_provisions,
6555 pension_plan_count: entity_pension_plan_count,
6557 total_dbo: entity_total_dbo,
6558 total_plan_assets: entity_total_plan_assets,
6559 related_party_transaction_count: rp_count,
6561 subsequent_event_count: se_count,
6562 adjusting_event_count: adjusting_count,
6563 ..NotesGeneratorContext::default()
6564 };
6565
6566 let entity_notes = notes_gen.generate(&ctx);
6567 let standard_note_count = entity_notes.len() as u32;
6568 info!(
6569 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6570 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6571 );
6572 financial_reporting
6573 .notes_to_financial_statements
6574 .extend(entity_notes);
6575
6576 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6578 .debt_instruments
6579 .iter()
6580 .filter(|d| d.entity_id == company.code)
6581 .map(|d| {
6582 (
6583 format!("{:?}", d.instrument_type),
6584 d.principal,
6585 d.maturity_date.to_string(),
6586 )
6587 })
6588 .collect();
6589
6590 let hedge_count = treasury.hedge_relationships.len();
6591 let effective_hedges = treasury
6592 .hedge_relationships
6593 .iter()
6594 .filter(|h| h.is_effective)
6595 .count();
6596 let total_notional: rust_decimal::Decimal = treasury
6597 .hedging_instruments
6598 .iter()
6599 .map(|h| h.notional_amount)
6600 .sum();
6601 let total_fair_value: rust_decimal::Decimal = treasury
6602 .hedging_instruments
6603 .iter()
6604 .map(|h| h.fair_value)
6605 .sum();
6606
6607 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6609 .provisions
6610 .iter()
6611 .filter(|p| p.entity_code == company.code)
6612 .map(|p| p.id.as_str())
6613 .collect();
6614 let provision_movements: Vec<(
6615 String,
6616 rust_decimal::Decimal,
6617 rust_decimal::Decimal,
6618 rust_decimal::Decimal,
6619 )> = accounting_standards
6620 .provision_movements
6621 .iter()
6622 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6623 .map(|m| {
6624 let prov_type = accounting_standards
6625 .provisions
6626 .iter()
6627 .find(|p| p.id == m.provision_id)
6628 .map(|p| format!("{:?}", p.provision_type))
6629 .unwrap_or_else(|| "Unknown".to_string());
6630 (prov_type, m.opening, m.additions, m.closing)
6631 })
6632 .collect();
6633
6634 let enhanced_ctx = EnhancedNotesContext {
6635 entity_code: company.code.clone(),
6636 period: format!("FY{}", fiscal_year),
6637 currency: company.currency.clone(),
6638 finished_goods_value: rust_decimal::Decimal::ZERO,
6640 wip_value: rust_decimal::Decimal::ZERO,
6641 raw_materials_value: rust_decimal::Decimal::ZERO,
6642 debt_instruments,
6643 hedge_count,
6644 effective_hedges,
6645 total_notional,
6646 total_fair_value,
6647 provision_movements,
6648 };
6649
6650 let enhanced_notes =
6651 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6652 if !enhanced_notes.is_empty() {
6653 info!(
6654 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6655 company.code,
6656 enhanced_notes.len(),
6657 enhanced_ctx.debt_instruments.len(),
6658 hedge_count,
6659 enhanced_ctx.provision_movements.len(),
6660 );
6661 financial_reporting
6662 .notes_to_financial_statements
6663 .extend(enhanced_notes);
6664 }
6665 }
6666 }
6667
6668 fn build_trial_balance_from_entries(
6674 journal_entries: &[JournalEntry],
6675 coa: &ChartOfAccounts,
6676 company_code: &str,
6677 fiscal_year: u16,
6678 fiscal_period: u8,
6679 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6680 use rust_decimal::Decimal;
6681
6682 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6684 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6685
6686 for je in journal_entries {
6687 if je.header.company_code != company_code
6689 || je.header.fiscal_year != fiscal_year
6690 || je.header.fiscal_period != fiscal_period
6691 {
6692 continue;
6693 }
6694
6695 for line in &je.lines {
6696 let acct = &line.gl_account;
6697 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6698 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6699 }
6700 }
6701
6702 let mut all_accounts: Vec<&String> = account_debits
6704 .keys()
6705 .chain(account_credits.keys())
6706 .collect::<std::collections::HashSet<_>>()
6707 .into_iter()
6708 .collect();
6709 all_accounts.sort();
6710
6711 let mut entries = Vec::new();
6712
6713 for acct_number in all_accounts {
6714 let debit = account_debits
6715 .get(acct_number)
6716 .copied()
6717 .unwrap_or(Decimal::ZERO);
6718 let credit = account_credits
6719 .get(acct_number)
6720 .copied()
6721 .unwrap_or(Decimal::ZERO);
6722
6723 if debit.is_zero() && credit.is_zero() {
6724 continue;
6725 }
6726
6727 let account_name = coa
6729 .get_account(acct_number)
6730 .map(|gl| gl.short_description.clone())
6731 .unwrap_or_else(|| format!("Account {acct_number}"));
6732
6733 let category = Self::category_from_account_code(acct_number);
6738
6739 entries.push(datasynth_generators::TrialBalanceEntry {
6740 account_code: acct_number.clone(),
6741 account_name,
6742 category,
6743 debit_balance: debit,
6744 credit_balance: credit,
6745 });
6746 }
6747
6748 entries
6749 }
6750
6751 fn build_cumulative_trial_balance(
6758 journal_entries: &[JournalEntry],
6759 coa: &ChartOfAccounts,
6760 company_code: &str,
6761 start_date: NaiveDate,
6762 period_end: NaiveDate,
6763 fiscal_year: u16,
6764 fiscal_period: u8,
6765 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6766 use rust_decimal::Decimal;
6767
6768 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6770 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6771
6772 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6774 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6775
6776 for je in journal_entries {
6777 if je.header.company_code != company_code {
6778 continue;
6779 }
6780
6781 for line in &je.lines {
6782 let acct = &line.gl_account;
6783 let category = Self::category_from_account_code(acct);
6784 let is_bs_account = matches!(
6785 category.as_str(),
6786 "Cash"
6787 | "Receivables"
6788 | "Inventory"
6789 | "FixedAssets"
6790 | "Payables"
6791 | "AccruedLiabilities"
6792 | "LongTermDebt"
6793 | "Equity"
6794 );
6795
6796 if is_bs_account {
6797 if je.header.document_date <= period_end
6799 && je.header.document_date >= start_date
6800 {
6801 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6802 line.debit_amount;
6803 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6804 line.credit_amount;
6805 }
6806 } else {
6807 if je.header.fiscal_year == fiscal_year
6809 && je.header.fiscal_period == fiscal_period
6810 {
6811 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6812 line.debit_amount;
6813 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6814 line.credit_amount;
6815 }
6816 }
6817 }
6818 }
6819
6820 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6822 all_accounts.extend(bs_debits.keys().cloned());
6823 all_accounts.extend(bs_credits.keys().cloned());
6824 all_accounts.extend(is_debits.keys().cloned());
6825 all_accounts.extend(is_credits.keys().cloned());
6826
6827 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6828 sorted_accounts.sort();
6829
6830 let mut entries = Vec::new();
6831
6832 for acct_number in &sorted_accounts {
6833 let category = Self::category_from_account_code(acct_number);
6834 let is_bs_account = matches!(
6835 category.as_str(),
6836 "Cash"
6837 | "Receivables"
6838 | "Inventory"
6839 | "FixedAssets"
6840 | "Payables"
6841 | "AccruedLiabilities"
6842 | "LongTermDebt"
6843 | "Equity"
6844 );
6845
6846 let (debit, credit) = if is_bs_account {
6847 (
6848 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6849 bs_credits
6850 .get(acct_number)
6851 .copied()
6852 .unwrap_or(Decimal::ZERO),
6853 )
6854 } else {
6855 (
6856 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6857 is_credits
6858 .get(acct_number)
6859 .copied()
6860 .unwrap_or(Decimal::ZERO),
6861 )
6862 };
6863
6864 if debit.is_zero() && credit.is_zero() {
6865 continue;
6866 }
6867
6868 let account_name = coa
6869 .get_account(acct_number)
6870 .map(|gl| gl.short_description.clone())
6871 .unwrap_or_else(|| format!("Account {acct_number}"));
6872
6873 entries.push(datasynth_generators::TrialBalanceEntry {
6874 account_code: acct_number.clone(),
6875 account_name,
6876 category,
6877 debit_balance: debit,
6878 credit_balance: credit,
6879 });
6880 }
6881
6882 entries
6883 }
6884
6885 fn build_cash_flow_from_trial_balances(
6890 current_tb: &[datasynth_generators::TrialBalanceEntry],
6891 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6892 net_income: rust_decimal::Decimal,
6893 ) -> Vec<CashFlowItem> {
6894 use rust_decimal::Decimal;
6895
6896 let aggregate =
6898 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6899 let mut map: HashMap<String, Decimal> = HashMap::new();
6900 for entry in tb {
6901 let net = entry.debit_balance - entry.credit_balance;
6902 *map.entry(entry.category.clone()).or_default() += net;
6903 }
6904 map
6905 };
6906
6907 let current = aggregate(current_tb);
6908 let prior = prior_tb.map(aggregate);
6909
6910 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6912 *map.get(key).unwrap_or(&Decimal::ZERO)
6913 };
6914
6915 let change = |key: &str| -> Decimal {
6917 let curr = get(¤t, key);
6918 match &prior {
6919 Some(p) => curr - get(p, key),
6920 None => curr,
6921 }
6922 };
6923
6924 let fixed_asset_change = change("FixedAssets");
6927 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6928 -fixed_asset_change
6929 } else {
6930 Decimal::ZERO
6931 };
6932
6933 let ar_change = change("Receivables");
6935 let inventory_change = change("Inventory");
6936 let ap_change = change("Payables");
6938 let accrued_change = change("AccruedLiabilities");
6939
6940 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6941 + (-ap_change)
6942 + (-accrued_change);
6943
6944 let capex = if fixed_asset_change > Decimal::ZERO {
6946 -fixed_asset_change
6947 } else {
6948 Decimal::ZERO
6949 };
6950 let investing_cf = capex;
6951
6952 let debt_change = -change("LongTermDebt");
6954 let equity_change = -change("Equity");
6955 let financing_cf = debt_change + equity_change;
6956
6957 let net_change = operating_cf + investing_cf + financing_cf;
6958
6959 vec![
6960 CashFlowItem {
6961 item_code: "CF-NI".to_string(),
6962 label: "Net Income".to_string(),
6963 category: CashFlowCategory::Operating,
6964 amount: net_income,
6965 amount_prior: None,
6966 sort_order: 1,
6967 is_total: false,
6968 },
6969 CashFlowItem {
6970 item_code: "CF-DEP".to_string(),
6971 label: "Depreciation & Amortization".to_string(),
6972 category: CashFlowCategory::Operating,
6973 amount: depreciation_addback,
6974 amount_prior: None,
6975 sort_order: 2,
6976 is_total: false,
6977 },
6978 CashFlowItem {
6979 item_code: "CF-AR".to_string(),
6980 label: "Change in Accounts Receivable".to_string(),
6981 category: CashFlowCategory::Operating,
6982 amount: -ar_change,
6983 amount_prior: None,
6984 sort_order: 3,
6985 is_total: false,
6986 },
6987 CashFlowItem {
6988 item_code: "CF-AP".to_string(),
6989 label: "Change in Accounts Payable".to_string(),
6990 category: CashFlowCategory::Operating,
6991 amount: -ap_change,
6992 amount_prior: None,
6993 sort_order: 4,
6994 is_total: false,
6995 },
6996 CashFlowItem {
6997 item_code: "CF-INV".to_string(),
6998 label: "Change in Inventory".to_string(),
6999 category: CashFlowCategory::Operating,
7000 amount: -inventory_change,
7001 amount_prior: None,
7002 sort_order: 5,
7003 is_total: false,
7004 },
7005 CashFlowItem {
7006 item_code: "CF-OP".to_string(),
7007 label: "Net Cash from Operating Activities".to_string(),
7008 category: CashFlowCategory::Operating,
7009 amount: operating_cf,
7010 amount_prior: None,
7011 sort_order: 6,
7012 is_total: true,
7013 },
7014 CashFlowItem {
7015 item_code: "CF-CAPEX".to_string(),
7016 label: "Capital Expenditures".to_string(),
7017 category: CashFlowCategory::Investing,
7018 amount: capex,
7019 amount_prior: None,
7020 sort_order: 7,
7021 is_total: false,
7022 },
7023 CashFlowItem {
7024 item_code: "CF-INV-T".to_string(),
7025 label: "Net Cash from Investing Activities".to_string(),
7026 category: CashFlowCategory::Investing,
7027 amount: investing_cf,
7028 amount_prior: None,
7029 sort_order: 8,
7030 is_total: true,
7031 },
7032 CashFlowItem {
7033 item_code: "CF-DEBT".to_string(),
7034 label: "Net Borrowings / (Repayments)".to_string(),
7035 category: CashFlowCategory::Financing,
7036 amount: debt_change,
7037 amount_prior: None,
7038 sort_order: 9,
7039 is_total: false,
7040 },
7041 CashFlowItem {
7042 item_code: "CF-EQ".to_string(),
7043 label: "Equity Changes".to_string(),
7044 category: CashFlowCategory::Financing,
7045 amount: equity_change,
7046 amount_prior: None,
7047 sort_order: 10,
7048 is_total: false,
7049 },
7050 CashFlowItem {
7051 item_code: "CF-FIN-T".to_string(),
7052 label: "Net Cash from Financing Activities".to_string(),
7053 category: CashFlowCategory::Financing,
7054 amount: financing_cf,
7055 amount_prior: None,
7056 sort_order: 11,
7057 is_total: true,
7058 },
7059 CashFlowItem {
7060 item_code: "CF-NET".to_string(),
7061 label: "Net Change in Cash".to_string(),
7062 category: CashFlowCategory::Operating,
7063 amount: net_change,
7064 amount_prior: None,
7065 sort_order: 12,
7066 is_total: true,
7067 },
7068 ]
7069 }
7070
7071 fn calculate_net_income_from_tb(
7075 tb: &[datasynth_generators::TrialBalanceEntry],
7076 ) -> rust_decimal::Decimal {
7077 use rust_decimal::Decimal;
7078
7079 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7080 for entry in tb {
7081 let net = entry.debit_balance - entry.credit_balance;
7082 *aggregated.entry(entry.category.clone()).or_default() += net;
7083 }
7084
7085 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7086 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7087 let opex = *aggregated
7088 .get("OperatingExpenses")
7089 .unwrap_or(&Decimal::ZERO);
7090 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7091 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7092
7093 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7096 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7098 operating_income - tax
7099 }
7100
7101 fn category_from_account_code(code: &str) -> String {
7108 let prefix: String = code.chars().take(2).collect();
7109 match prefix.as_str() {
7110 "10" => "Cash",
7111 "11" => "Receivables",
7112 "12" | "13" | "14" => "Inventory",
7113 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7114 "20" => "Payables",
7115 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7116 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7117 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7118 "40" | "41" | "42" | "43" | "44" => "Revenue",
7119 "50" | "51" | "52" => "CostOfSales",
7120 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7121 "OperatingExpenses"
7122 }
7123 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7124 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7125 _ => "OperatingExpenses",
7126 }
7127 .to_string()
7128 }
7129
7130 fn phase_hr_data(
7132 &mut self,
7133 stats: &mut EnhancedGenerationStatistics,
7134 ) -> SynthResult<HrSnapshot> {
7135 if !self.phase_config.generate_hr {
7136 debug!("Phase 16: Skipped (HR generation disabled)");
7137 return Ok(HrSnapshot::default());
7138 }
7139
7140 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7141
7142 let seed = self.seed;
7143 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7144 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7145 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7146 let company_code = self
7147 .config
7148 .companies
7149 .first()
7150 .map(|c| c.code.as_str())
7151 .unwrap_or("1000");
7152 let currency = self
7153 .config
7154 .companies
7155 .first()
7156 .map(|c| c.currency.as_str())
7157 .unwrap_or("USD");
7158
7159 let employee_ids: Vec<String> = self
7160 .master_data
7161 .employees
7162 .iter()
7163 .map(|e| e.employee_id.clone())
7164 .collect();
7165
7166 if employee_ids.is_empty() {
7167 debug!("Phase 16: Skipped (no employees available)");
7168 return Ok(HrSnapshot::default());
7169 }
7170
7171 let cost_center_ids: Vec<String> = self
7174 .master_data
7175 .employees
7176 .iter()
7177 .filter_map(|e| e.cost_center.clone())
7178 .collect::<std::collections::HashSet<_>>()
7179 .into_iter()
7180 .collect();
7181
7182 let mut snapshot = HrSnapshot::default();
7183
7184 if self.config.hr.payroll.enabled {
7186 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7187 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7188
7189 let payroll_pack = self.primary_pack();
7191
7192 payroll_gen.set_country_pack(payroll_pack.clone());
7195
7196 let employees_with_salary: Vec<(
7197 String,
7198 rust_decimal::Decimal,
7199 Option<String>,
7200 Option<String>,
7201 )> = self
7202 .master_data
7203 .employees
7204 .iter()
7205 .map(|e| {
7206 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7209 e.base_salary
7210 } else {
7211 rust_decimal::Decimal::from(60_000)
7212 };
7213 (
7214 e.employee_id.clone(),
7215 annual, e.cost_center.clone(),
7217 e.department_id.clone(),
7218 )
7219 })
7220 .collect();
7221
7222 let change_history = &self.master_data.employee_change_history;
7225 let has_changes = !change_history.is_empty();
7226 if has_changes {
7227 debug!(
7228 "Payroll will incorporate {} employee change events",
7229 change_history.len()
7230 );
7231 }
7232
7233 for month in 0..self.config.global.period_months {
7234 let period_start = start_date + chrono::Months::new(month);
7235 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7236 let (run, items) = if has_changes {
7237 payroll_gen.generate_with_changes(
7238 company_code,
7239 &employees_with_salary,
7240 period_start,
7241 period_end,
7242 currency,
7243 change_history,
7244 )
7245 } else {
7246 payroll_gen.generate(
7247 company_code,
7248 &employees_with_salary,
7249 period_start,
7250 period_end,
7251 currency,
7252 )
7253 };
7254 snapshot.payroll_runs.push(run);
7255 snapshot.payroll_run_count += 1;
7256 snapshot.payroll_line_item_count += items.len();
7257 snapshot.payroll_line_items.extend(items);
7258 }
7259 }
7260
7261 if self.config.hr.time_attendance.enabled {
7263 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7264 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7265 if let Some(ctx) = &self.temporal_context {
7269 time_gen.set_temporal_context(Arc::clone(ctx));
7270 }
7271 let entries = time_gen.generate(
7272 &employee_ids,
7273 start_date,
7274 end_date,
7275 &self.config.hr.time_attendance,
7276 );
7277 snapshot.time_entry_count = entries.len();
7278 snapshot.time_entries = entries;
7279 }
7280
7281 if self.config.hr.expenses.enabled {
7283 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7284 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7285 expense_gen.set_country_pack(self.primary_pack().clone());
7286 if let Some(ctx) = &self.temporal_context {
7289 expense_gen.set_temporal_context(Arc::clone(ctx));
7290 }
7291 let company_currency = self
7292 .config
7293 .companies
7294 .first()
7295 .map(|c| c.currency.as_str())
7296 .unwrap_or("USD");
7297 let reports = expense_gen.generate_with_currency(
7298 &employee_ids,
7299 start_date,
7300 end_date,
7301 &self.config.hr.expenses,
7302 company_currency,
7303 );
7304 snapshot.expense_report_count = reports.len();
7305 snapshot.expense_reports = reports;
7306 }
7307
7308 if self.config.hr.payroll.enabled {
7310 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7311 let employee_pairs: Vec<(String, String)> = self
7312 .master_data
7313 .employees
7314 .iter()
7315 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7316 .collect();
7317 let enrollments =
7318 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7319 snapshot.benefit_enrollment_count = enrollments.len();
7320 snapshot.benefit_enrollments = enrollments;
7321 }
7322
7323 if self.phase_config.generate_hr {
7325 let entity_name = self
7326 .config
7327 .companies
7328 .first()
7329 .map(|c| c.name.as_str())
7330 .unwrap_or("Entity");
7331 let period_months = self.config.global.period_months;
7332 let period_label = {
7333 let y = start_date.year();
7334 let m = start_date.month();
7335 if period_months >= 12 {
7336 format!("FY{y}")
7337 } else {
7338 format!("{y}-{m:02}")
7339 }
7340 };
7341 let reporting_date =
7342 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7343
7344 let avg_salary: Option<rust_decimal::Decimal> = {
7349 let employee_count = employee_ids.len();
7350 if self.config.hr.payroll.enabled
7351 && employee_count > 0
7352 && !snapshot.payroll_runs.is_empty()
7353 {
7354 let total_gross: rust_decimal::Decimal = snapshot
7356 .payroll_runs
7357 .iter()
7358 .filter(|r| r.company_code == company_code)
7359 .map(|r| r.total_gross)
7360 .sum();
7361 if total_gross > rust_decimal::Decimal::ZERO {
7362 let annual_total = if period_months > 0 && period_months < 12 {
7364 total_gross * rust_decimal::Decimal::from(12u32)
7365 / rust_decimal::Decimal::from(period_months)
7366 } else {
7367 total_gross
7368 };
7369 Some(
7370 (annual_total / rust_decimal::Decimal::from(employee_count))
7371 .round_dp(2),
7372 )
7373 } else {
7374 None
7375 }
7376 } else {
7377 None
7378 }
7379 };
7380
7381 let mut pension_gen =
7382 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7383 let pension_snap = pension_gen.generate(
7384 company_code,
7385 entity_name,
7386 &period_label,
7387 reporting_date,
7388 employee_ids.len(),
7389 currency,
7390 avg_salary,
7391 period_months,
7392 );
7393 snapshot.pension_plan_count = pension_snap.plans.len();
7394 snapshot.pension_plans = pension_snap.plans;
7395 snapshot.pension_obligations = pension_snap.obligations;
7396 snapshot.pension_plan_assets = pension_snap.plan_assets;
7397 snapshot.pension_disclosures = pension_snap.disclosures;
7398 snapshot.pension_journal_entries = pension_snap.journal_entries;
7403 }
7404
7405 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7407 let period_months = self.config.global.period_months;
7408 let period_label = {
7409 let y = start_date.year();
7410 let m = start_date.month();
7411 if period_months >= 12 {
7412 format!("FY{y}")
7413 } else {
7414 format!("{y}-{m:02}")
7415 }
7416 };
7417 let reporting_date =
7418 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7419
7420 let mut stock_comp_gen =
7421 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7422 let stock_snap = stock_comp_gen.generate(
7423 company_code,
7424 &employee_ids,
7425 start_date,
7426 &period_label,
7427 reporting_date,
7428 currency,
7429 );
7430 snapshot.stock_grant_count = stock_snap.grants.len();
7431 snapshot.stock_grants = stock_snap.grants;
7432 snapshot.stock_comp_expenses = stock_snap.expenses;
7433 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7434 }
7435
7436 stats.payroll_run_count = snapshot.payroll_run_count;
7437 stats.time_entry_count = snapshot.time_entry_count;
7438 stats.expense_report_count = snapshot.expense_report_count;
7439 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7440 stats.pension_plan_count = snapshot.pension_plan_count;
7441 stats.stock_grant_count = snapshot.stock_grant_count;
7442
7443 info!(
7444 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7445 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7446 snapshot.time_entry_count, snapshot.expense_report_count,
7447 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7448 snapshot.stock_grant_count
7449 );
7450 self.check_resources_with_log("post-hr")?;
7451
7452 Ok(snapshot)
7453 }
7454
7455 fn phase_accounting_standards(
7457 &mut self,
7458 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7459 journal_entries: &[JournalEntry],
7460 stats: &mut EnhancedGenerationStatistics,
7461 ) -> SynthResult<AccountingStandardsSnapshot> {
7462 if !self.phase_config.generate_accounting_standards {
7463 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7464 return Ok(AccountingStandardsSnapshot::default());
7465 }
7466 info!("Phase 17: Generating Accounting Standards Data");
7467
7468 let seed = self.seed;
7469 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7470 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7471 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7472 let company_code = self
7473 .config
7474 .companies
7475 .first()
7476 .map(|c| c.code.as_str())
7477 .unwrap_or("1000");
7478 let currency = self
7479 .config
7480 .companies
7481 .first()
7482 .map(|c| c.currency.as_str())
7483 .unwrap_or("USD");
7484
7485 let framework = match self.config.accounting_standards.framework {
7490 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7491 datasynth_standards::framework::AccountingFramework::UsGaap
7492 }
7493 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7494 datasynth_standards::framework::AccountingFramework::Ifrs
7495 }
7496 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7497 datasynth_standards::framework::AccountingFramework::DualReporting
7498 }
7499 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7500 datasynth_standards::framework::AccountingFramework::FrenchGaap
7501 }
7502 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7503 datasynth_standards::framework::AccountingFramework::GermanGaap
7504 }
7505 None => {
7506 let pack = self.primary_pack();
7508 let pack_fw = pack.accounting.framework.as_str();
7509 match pack_fw {
7510 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7511 "dual_reporting" => {
7512 datasynth_standards::framework::AccountingFramework::DualReporting
7513 }
7514 "french_gaap" => {
7515 datasynth_standards::framework::AccountingFramework::FrenchGaap
7516 }
7517 "german_gaap" | "hgb" => {
7518 datasynth_standards::framework::AccountingFramework::GermanGaap
7519 }
7520 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7522 }
7523 }
7524 };
7525
7526 let mut snapshot = AccountingStandardsSnapshot::default();
7527
7528 if self.config.accounting_standards.revenue_recognition.enabled {
7530 let customer_ids: Vec<String> = self
7531 .master_data
7532 .customers
7533 .iter()
7534 .map(|c| c.customer_id.clone())
7535 .collect();
7536
7537 if !customer_ids.is_empty() {
7538 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7539 let contracts = rev_gen.generate(
7540 company_code,
7541 &customer_ids,
7542 start_date,
7543 end_date,
7544 currency,
7545 &self.config.accounting_standards.revenue_recognition,
7546 framework,
7547 );
7548 snapshot.revenue_contract_count = contracts.len();
7549 snapshot.contracts = contracts;
7550 }
7551 }
7552
7553 if self.config.accounting_standards.impairment.enabled {
7555 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7556 .master_data
7557 .assets
7558 .iter()
7559 .map(|a| {
7560 (
7561 a.asset_id.clone(),
7562 a.description.clone(),
7563 a.acquisition_cost,
7564 )
7565 })
7566 .collect();
7567
7568 if !asset_data.is_empty() {
7569 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7570 let tests = imp_gen.generate(
7571 company_code,
7572 &asset_data,
7573 end_date,
7574 &self.config.accounting_standards.impairment,
7575 framework,
7576 );
7577 snapshot.impairment_test_count = tests.len();
7578 snapshot.impairment_tests = tests;
7579 }
7580 }
7581
7582 if self
7584 .config
7585 .accounting_standards
7586 .business_combinations
7587 .enabled
7588 {
7589 let bc_config = &self.config.accounting_standards.business_combinations;
7590 let framework_str = match framework {
7591 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7592 _ => "US_GAAP",
7593 };
7594 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7595 let bc_snap = bc_gen.generate(
7596 company_code,
7597 currency,
7598 start_date,
7599 end_date,
7600 bc_config.acquisition_count,
7601 framework_str,
7602 );
7603 snapshot.business_combination_count = bc_snap.combinations.len();
7604 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7605 snapshot.business_combinations = bc_snap.combinations;
7606 }
7607
7608 if self
7610 .config
7611 .accounting_standards
7612 .expected_credit_loss
7613 .enabled
7614 {
7615 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7616 let framework_str = match framework {
7617 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7618 _ => "ASC_326",
7619 };
7620
7621 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7624
7625 let mut ecl_gen = EclGenerator::new(seed + 43);
7626
7627 let bucket_exposures: Vec<(
7629 datasynth_core::models::subledger::ar::AgingBucket,
7630 rust_decimal::Decimal,
7631 )> = if ar_aging_reports.is_empty() {
7632 use datasynth_core::models::subledger::ar::AgingBucket;
7634 vec![
7635 (
7636 AgingBucket::Current,
7637 rust_decimal::Decimal::from(500_000_u32),
7638 ),
7639 (
7640 AgingBucket::Days1To30,
7641 rust_decimal::Decimal::from(120_000_u32),
7642 ),
7643 (
7644 AgingBucket::Days31To60,
7645 rust_decimal::Decimal::from(45_000_u32),
7646 ),
7647 (
7648 AgingBucket::Days61To90,
7649 rust_decimal::Decimal::from(15_000_u32),
7650 ),
7651 (
7652 AgingBucket::Over90Days,
7653 rust_decimal::Decimal::from(8_000_u32),
7654 ),
7655 ]
7656 } else {
7657 use datasynth_core::models::subledger::ar::AgingBucket;
7658 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7660 std::collections::HashMap::new();
7661 for report in ar_aging_reports {
7662 for (bucket, amount) in &report.bucket_totals {
7663 *totals.entry(*bucket).or_default() += amount;
7664 }
7665 }
7666 AgingBucket::all()
7667 .into_iter()
7668 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7669 .collect()
7670 };
7671
7672 let ecl_snap = ecl_gen.generate(
7673 company_code,
7674 end_date,
7675 &bucket_exposures,
7676 ecl_config,
7677 &period_label,
7678 framework_str,
7679 );
7680
7681 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7682 snapshot.ecl_models = ecl_snap.ecl_models;
7683 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7684 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7685 }
7686
7687 {
7689 let framework_str = match framework {
7690 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7691 _ => "US_GAAP",
7692 };
7693
7694 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7699 .max(rust_decimal::Decimal::from(100_000_u32));
7700
7701 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7702
7703 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7704 let prov_snap = prov_gen.generate(
7705 company_code,
7706 currency,
7707 revenue_proxy,
7708 end_date,
7709 &period_label,
7710 framework_str,
7711 None, );
7713
7714 snapshot.provision_count = prov_snap.provisions.len();
7715 snapshot.provisions = prov_snap.provisions;
7716 snapshot.provision_movements = prov_snap.movements;
7717 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7718 snapshot.provision_journal_entries = prov_snap.journal_entries;
7719 }
7720
7721 {
7725 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7726
7727 let presentation_currency = self
7728 .config
7729 .global
7730 .presentation_currency
7731 .clone()
7732 .unwrap_or_else(|| self.config.global.group_currency.clone());
7733
7734 let mut rate_table = FxRateTable::new(&presentation_currency);
7737
7738 let base_rates = base_rates_usd();
7742 for (ccy, rate) in &base_rates {
7743 rate_table.add_rate(FxRate::new(
7744 ccy,
7745 "USD",
7746 RateType::Closing,
7747 end_date,
7748 *rate,
7749 "SYNTHETIC",
7750 ));
7751 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7754 rate_table.add_rate(FxRate::new(
7755 ccy,
7756 "USD",
7757 RateType::Average,
7758 end_date,
7759 avg,
7760 "SYNTHETIC",
7761 ));
7762 }
7763
7764 let mut translation_results = Vec::new();
7765 for company in &self.config.companies {
7766 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7769 .max(rust_decimal::Decimal::from(100_000_u32));
7770
7771 let func_ccy = company
7772 .functional_currency
7773 .clone()
7774 .unwrap_or_else(|| company.currency.clone());
7775
7776 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7777 &company.code,
7778 &func_ccy,
7779 &presentation_currency,
7780 &ias21_period_label,
7781 end_date,
7782 company_revenue,
7783 &rate_table,
7784 );
7785 translation_results.push(result);
7786 }
7787
7788 snapshot.currency_translation_count = translation_results.len();
7789 snapshot.currency_translation_results = translation_results;
7790 }
7791
7792 stats.revenue_contract_count = snapshot.revenue_contract_count;
7793 stats.impairment_test_count = snapshot.impairment_test_count;
7794 stats.business_combination_count = snapshot.business_combination_count;
7795 stats.ecl_model_count = snapshot.ecl_model_count;
7796 stats.provision_count = snapshot.provision_count;
7797
7798 if self.config.accounting_standards.leases.enabled {
7802 use datasynth_generators::standards::LeaseGenerator;
7803 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7804 .unwrap_or_else(|_| {
7805 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7806 });
7807 let framework =
7808 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7809 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7810 for company in &self.config.companies {
7811 let leases = lease_gen.generate(
7812 &company.code,
7813 start_date,
7814 &self.config.accounting_standards.leases,
7815 framework,
7816 );
7817 snapshot.lease_count += leases.len();
7818 snapshot.leases.extend(leases);
7819 }
7820 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7821 }
7822
7823 if self.config.accounting_standards.fair_value.enabled {
7827 use datasynth_generators::standards::FairValueGenerator;
7828 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7829 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7830 + chrono::Months::new(self.config.global.period_months);
7831 let framework =
7832 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7833 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
7834 for company in &self.config.companies {
7835 let measurements = fv_gen.generate(
7836 &company.code,
7837 end_date,
7838 &company.currency,
7839 &self.config.accounting_standards.fair_value,
7840 framework,
7841 );
7842 snapshot.fair_value_measurement_count += measurements.len();
7843 snapshot.fair_value_measurements.extend(measurements);
7844 }
7845 info!(
7846 "v3.3.1 fair value measurements: {}",
7847 snapshot.fair_value_measurement_count
7848 );
7849 }
7850
7851 if self.config.accounting_standards.generate_differences
7855 && matches!(
7856 self.config.accounting_standards.framework,
7857 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
7858 )
7859 {
7860 use datasynth_generators::standards::FrameworkReconciliationGenerator;
7861 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7862 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7863 + chrono::Months::new(self.config.global.period_months);
7864 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
7865 for company in &self.config.companies {
7866 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
7867 snapshot.framework_difference_count += records.len();
7868 snapshot.framework_differences.extend(records);
7869 snapshot.framework_reconciliations.push(reconciliation);
7870 }
7871 info!(
7872 "v3.3.1 framework reconciliation: {} differences across {} entities",
7873 snapshot.framework_difference_count,
7874 snapshot.framework_reconciliations.len()
7875 );
7876 }
7877
7878 info!(
7879 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
7880 snapshot.revenue_contract_count,
7881 snapshot.impairment_test_count,
7882 snapshot.business_combination_count,
7883 snapshot.ecl_model_count,
7884 snapshot.provision_count,
7885 snapshot.currency_translation_count,
7886 snapshot.lease_count,
7887 snapshot.fair_value_measurement_count,
7888 snapshot.framework_difference_count,
7889 );
7890 self.check_resources_with_log("post-accounting-standards")?;
7891
7892 Ok(snapshot)
7893 }
7894
7895 fn resolve_accounting_framework(
7899 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
7900 ) -> datasynth_standards::framework::AccountingFramework {
7901 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
7902 use datasynth_standards::framework::AccountingFramework as Fw;
7903 match cfg {
7904 Some(Cfg::Ifrs) => Fw::Ifrs,
7905 Some(Cfg::DualReporting) => Fw::DualReporting,
7906 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
7907 Some(Cfg::GermanGaap) => Fw::GermanGaap,
7908 _ => Fw::UsGaap,
7909 }
7910 }
7911
7912 fn phase_manufacturing(
7914 &mut self,
7915 stats: &mut EnhancedGenerationStatistics,
7916 ) -> SynthResult<ManufacturingSnapshot> {
7917 if !self.phase_config.generate_manufacturing {
7918 debug!("Phase 18: Skipped (manufacturing generation disabled)");
7919 return Ok(ManufacturingSnapshot::default());
7920 }
7921 info!("Phase 18: Generating Manufacturing Data");
7922
7923 let seed = self.seed;
7924 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7925 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7926 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7927 let company_code = self
7928 .config
7929 .companies
7930 .first()
7931 .map(|c| c.code.as_str())
7932 .unwrap_or("1000");
7933
7934 let material_data: Vec<(String, String)> = self
7935 .master_data
7936 .materials
7937 .iter()
7938 .map(|m| (m.material_id.clone(), m.description.clone()))
7939 .collect();
7940
7941 if material_data.is_empty() {
7942 debug!("Phase 18: Skipped (no materials available)");
7943 return Ok(ManufacturingSnapshot::default());
7944 }
7945
7946 let mut snapshot = ManufacturingSnapshot::default();
7947
7948 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7950 if let Some(ctx) = &self.temporal_context {
7952 prod_gen.set_temporal_context(Arc::clone(ctx));
7953 }
7954 let production_orders = prod_gen.generate(
7955 company_code,
7956 &material_data,
7957 start_date,
7958 end_date,
7959 &self.config.manufacturing.production_orders,
7960 &self.config.manufacturing.costing,
7961 &self.config.manufacturing.routing,
7962 );
7963 snapshot.production_order_count = production_orders.len();
7964
7965 let inspection_data: Vec<(String, String, String)> = production_orders
7967 .iter()
7968 .map(|po| {
7969 (
7970 po.order_id.clone(),
7971 po.material_id.clone(),
7972 po.material_description.clone(),
7973 )
7974 })
7975 .collect();
7976
7977 snapshot.production_orders = production_orders;
7978
7979 if !inspection_data.is_empty() {
7980 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7981 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7982 snapshot.quality_inspection_count = inspections.len();
7983 snapshot.quality_inspections = inspections;
7984 }
7985
7986 let storage_locations: Vec<(String, String)> = material_data
7988 .iter()
7989 .enumerate()
7990 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7991 .collect();
7992
7993 let employee_ids: Vec<String> = self
7994 .master_data
7995 .employees
7996 .iter()
7997 .map(|e| e.employee_id.clone())
7998 .collect();
7999 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8000 .with_employee_pool(employee_ids);
8001 let mut cycle_count_total = 0usize;
8002 for month in 0..self.config.global.period_months {
8003 let count_date = start_date + chrono::Months::new(month);
8004 let items_per_count = storage_locations.len().clamp(10, 50);
8005 let cc = cc_gen.generate(
8006 company_code,
8007 &storage_locations,
8008 count_date,
8009 items_per_count,
8010 );
8011 snapshot.cycle_counts.push(cc);
8012 cycle_count_total += 1;
8013 }
8014 snapshot.cycle_count_count = cycle_count_total;
8015
8016 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8018 let bom_components = bom_gen.generate(company_code, &material_data);
8019 snapshot.bom_component_count = bom_components.len();
8020 snapshot.bom_components = bom_components;
8021
8022 let currency = self
8024 .config
8025 .companies
8026 .first()
8027 .map(|c| c.currency.as_str())
8028 .unwrap_or("USD");
8029 let production_order_ids: Vec<String> = snapshot
8030 .production_orders
8031 .iter()
8032 .map(|po| po.order_id.clone())
8033 .collect();
8034 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8035 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8036 company_code,
8037 &material_data,
8038 start_date,
8039 end_date,
8040 2,
8041 currency,
8042 &production_order_ids,
8043 );
8044 snapshot.inventory_movement_count = inventory_movements.len();
8045 snapshot.inventory_movements = inventory_movements;
8046
8047 stats.production_order_count = snapshot.production_order_count;
8048 stats.quality_inspection_count = snapshot.quality_inspection_count;
8049 stats.cycle_count_count = snapshot.cycle_count_count;
8050 stats.bom_component_count = snapshot.bom_component_count;
8051 stats.inventory_movement_count = snapshot.inventory_movement_count;
8052
8053 info!(
8054 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8055 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8056 snapshot.bom_component_count, snapshot.inventory_movement_count
8057 );
8058 self.check_resources_with_log("post-manufacturing")?;
8059
8060 Ok(snapshot)
8061 }
8062
8063 fn phase_sales_kpi_budgets(
8065 &mut self,
8066 coa: &Arc<ChartOfAccounts>,
8067 financial_reporting: &FinancialReportingSnapshot,
8068 stats: &mut EnhancedGenerationStatistics,
8069 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8070 if !self.phase_config.generate_sales_kpi_budgets {
8071 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8072 return Ok(SalesKpiBudgetsSnapshot::default());
8073 }
8074 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8075
8076 let seed = self.seed;
8077 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8078 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8079 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8080 let company_code = self
8081 .config
8082 .companies
8083 .first()
8084 .map(|c| c.code.as_str())
8085 .unwrap_or("1000");
8086
8087 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8088
8089 if self.config.sales_quotes.enabled {
8091 let customer_data: Vec<(String, String)> = self
8092 .master_data
8093 .customers
8094 .iter()
8095 .map(|c| (c.customer_id.clone(), c.name.clone()))
8096 .collect();
8097 let material_data: Vec<(String, String)> = self
8098 .master_data
8099 .materials
8100 .iter()
8101 .map(|m| (m.material_id.clone(), m.description.clone()))
8102 .collect();
8103
8104 if !customer_data.is_empty() && !material_data.is_empty() {
8105 let employee_ids: Vec<String> = self
8106 .master_data
8107 .employees
8108 .iter()
8109 .map(|e| e.employee_id.clone())
8110 .collect();
8111 let customer_ids: Vec<String> = self
8112 .master_data
8113 .customers
8114 .iter()
8115 .map(|c| c.customer_id.clone())
8116 .collect();
8117 let company_currency = self
8118 .config
8119 .companies
8120 .first()
8121 .map(|c| c.currency.as_str())
8122 .unwrap_or("USD");
8123
8124 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8125 .with_pools(employee_ids, customer_ids);
8126 let quotes = quote_gen.generate_with_currency(
8127 company_code,
8128 &customer_data,
8129 &material_data,
8130 start_date,
8131 end_date,
8132 &self.config.sales_quotes,
8133 company_currency,
8134 );
8135 snapshot.sales_quote_count = quotes.len();
8136 snapshot.sales_quotes = quotes;
8137 }
8138 }
8139
8140 if self.config.financial_reporting.management_kpis.enabled {
8142 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8143 let mut kpis = kpi_gen.generate(
8144 company_code,
8145 start_date,
8146 end_date,
8147 &self.config.financial_reporting.management_kpis,
8148 );
8149
8150 {
8152 use rust_decimal::Decimal;
8153
8154 if let Some(income_stmt) =
8155 financial_reporting.financial_statements.iter().find(|fs| {
8156 fs.statement_type == StatementType::IncomeStatement
8157 && fs.company_code == company_code
8158 })
8159 {
8160 let total_revenue: Decimal = income_stmt
8162 .line_items
8163 .iter()
8164 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8165 .map(|li| li.amount)
8166 .sum();
8167 let total_cogs: Decimal = income_stmt
8168 .line_items
8169 .iter()
8170 .filter(|li| {
8171 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8172 && !li.is_total
8173 })
8174 .map(|li| li.amount.abs())
8175 .sum();
8176 let total_opex: Decimal = income_stmt
8177 .line_items
8178 .iter()
8179 .filter(|li| {
8180 li.section.contains("Expense")
8181 && !li.is_total
8182 && !li.section.contains("Cost")
8183 })
8184 .map(|li| li.amount.abs())
8185 .sum();
8186
8187 if total_revenue > Decimal::ZERO {
8188 let hundred = Decimal::from(100);
8189 let gross_margin_pct =
8190 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8191 let operating_income = total_revenue - total_cogs - total_opex;
8192 let op_margin_pct =
8193 (operating_income * hundred / total_revenue).round_dp(2);
8194
8195 for kpi in &mut kpis {
8197 if kpi.name == "Gross Margin" {
8198 kpi.value = gross_margin_pct;
8199 } else if kpi.name == "Operating Margin" {
8200 kpi.value = op_margin_pct;
8201 }
8202 }
8203 }
8204 }
8205
8206 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8208 fs.statement_type == StatementType::BalanceSheet
8209 && fs.company_code == company_code
8210 }) {
8211 let current_assets: Decimal = bs
8212 .line_items
8213 .iter()
8214 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8215 .map(|li| li.amount)
8216 .sum();
8217 let current_liabilities: Decimal = bs
8218 .line_items
8219 .iter()
8220 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8221 .map(|li| li.amount.abs())
8222 .sum();
8223
8224 if current_liabilities > Decimal::ZERO {
8225 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8226 for kpi in &mut kpis {
8227 if kpi.name == "Current Ratio" {
8228 kpi.value = current_ratio;
8229 }
8230 }
8231 }
8232 }
8233 }
8234
8235 snapshot.kpi_count = kpis.len();
8236 snapshot.kpis = kpis;
8237 }
8238
8239 if self.config.financial_reporting.budgets.enabled {
8241 let account_data: Vec<(String, String)> = coa
8242 .accounts
8243 .iter()
8244 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8245 .collect();
8246
8247 if !account_data.is_empty() {
8248 let fiscal_year = start_date.year() as u32;
8249 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8250 let budget = budget_gen.generate(
8251 company_code,
8252 fiscal_year,
8253 &account_data,
8254 &self.config.financial_reporting.budgets,
8255 );
8256 snapshot.budget_line_count = budget.line_items.len();
8257 snapshot.budgets.push(budget);
8258 }
8259 }
8260
8261 stats.sales_quote_count = snapshot.sales_quote_count;
8262 stats.kpi_count = snapshot.kpi_count;
8263 stats.budget_line_count = snapshot.budget_line_count;
8264
8265 info!(
8266 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8267 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8268 );
8269 self.check_resources_with_log("post-sales-kpi-budgets")?;
8270
8271 Ok(snapshot)
8272 }
8273
8274 fn compute_pre_tax_income(
8281 company_code: &str,
8282 journal_entries: &[JournalEntry],
8283 ) -> rust_decimal::Decimal {
8284 use datasynth_core::accounts::AccountCategory;
8285 use rust_decimal::Decimal;
8286
8287 let mut total_revenue = Decimal::ZERO;
8288 let mut total_expenses = Decimal::ZERO;
8289
8290 for je in journal_entries {
8291 if je.header.company_code != company_code {
8292 continue;
8293 }
8294 for line in &je.lines {
8295 let cat = AccountCategory::from_account(&line.gl_account);
8296 match cat {
8297 AccountCategory::Revenue => {
8298 total_revenue += line.credit_amount - line.debit_amount;
8299 }
8300 AccountCategory::Cogs
8301 | AccountCategory::OperatingExpense
8302 | AccountCategory::OtherIncomeExpense => {
8303 total_expenses += line.debit_amount - line.credit_amount;
8304 }
8305 _ => {}
8306 }
8307 }
8308 }
8309
8310 let pti = (total_revenue - total_expenses).round_dp(2);
8311 if pti == rust_decimal::Decimal::ZERO {
8312 rust_decimal::Decimal::from(1_000_000u32)
8315 } else {
8316 pti
8317 }
8318 }
8319
8320 fn phase_tax_generation(
8322 &mut self,
8323 document_flows: &DocumentFlowSnapshot,
8324 journal_entries: &[JournalEntry],
8325 stats: &mut EnhancedGenerationStatistics,
8326 ) -> SynthResult<TaxSnapshot> {
8327 if !self.phase_config.generate_tax {
8328 debug!("Phase 20: Skipped (tax generation disabled)");
8329 return Ok(TaxSnapshot::default());
8330 }
8331 info!("Phase 20: Generating Tax Data");
8332
8333 let seed = self.seed;
8334 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8335 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8336 let fiscal_year = start_date.year();
8337 let company_code = self
8338 .config
8339 .companies
8340 .first()
8341 .map(|c| c.code.as_str())
8342 .unwrap_or("1000");
8343
8344 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8345 seed + 370,
8346 self.config.tax.clone(),
8347 );
8348
8349 let pack = self.primary_pack().clone();
8350 let (jurisdictions, codes) =
8351 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8352
8353 let mut provisions = Vec::new();
8355 if self.config.tax.provisions.enabled {
8356 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8357 for company in &self.config.companies {
8358 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8359 let statutory_rate = rust_decimal::Decimal::new(
8360 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8361 2,
8362 );
8363 let provision = provision_gen.generate(
8364 &company.code,
8365 start_date,
8366 pre_tax_income,
8367 statutory_rate,
8368 );
8369 provisions.push(provision);
8370 }
8371 }
8372
8373 let mut tax_lines = Vec::new();
8375 if !codes.is_empty() {
8376 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8377 datasynth_generators::TaxLineGeneratorConfig::default(),
8378 codes.clone(),
8379 seed + 372,
8380 );
8381
8382 let buyer_country = self
8385 .config
8386 .companies
8387 .first()
8388 .map(|c| c.country.as_str())
8389 .unwrap_or("US");
8390 for vi in &document_flows.vendor_invoices {
8391 let lines = tax_line_gen.generate_for_document(
8392 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8393 &vi.header.document_id,
8394 buyer_country, buyer_country,
8396 vi.payable_amount,
8397 vi.header.document_date,
8398 None,
8399 );
8400 tax_lines.extend(lines);
8401 }
8402
8403 for ci in &document_flows.customer_invoices {
8405 let lines = tax_line_gen.generate_for_document(
8406 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8407 &ci.header.document_id,
8408 buyer_country, buyer_country,
8410 ci.total_gross_amount,
8411 ci.header.document_date,
8412 None,
8413 );
8414 tax_lines.extend(lines);
8415 }
8416 }
8417
8418 let deferred_tax = {
8420 let companies: Vec<(&str, &str)> = self
8421 .config
8422 .companies
8423 .iter()
8424 .map(|c| (c.code.as_str(), c.country.as_str()))
8425 .collect();
8426 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8427 deferred_gen.generate(&companies, start_date, journal_entries)
8428 };
8429
8430 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8433 std::collections::HashMap::new();
8434 for vi in &document_flows.vendor_invoices {
8435 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8436 }
8437 for ci in &document_flows.customer_invoices {
8438 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8439 }
8440
8441 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8443 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8444 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8445 &tax_lines,
8446 company_code,
8447 &doc_dates,
8448 end_date,
8449 );
8450 debug!("Generated {} tax posting JEs", jes.len());
8451 jes
8452 } else {
8453 Vec::new()
8454 };
8455
8456 let snapshot = TaxSnapshot {
8457 jurisdiction_count: jurisdictions.len(),
8458 code_count: codes.len(),
8459 jurisdictions,
8460 codes,
8461 tax_provisions: provisions,
8462 tax_lines,
8463 tax_returns: Vec::new(),
8464 withholding_records: Vec::new(),
8465 tax_anomaly_labels: Vec::new(),
8466 deferred_tax,
8467 tax_posting_journal_entries,
8468 };
8469
8470 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8471 stats.tax_code_count = snapshot.code_count;
8472 stats.tax_provision_count = snapshot.tax_provisions.len();
8473 stats.tax_line_count = snapshot.tax_lines.len();
8474
8475 info!(
8476 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8477 snapshot.jurisdiction_count,
8478 snapshot.code_count,
8479 snapshot.tax_provisions.len(),
8480 snapshot.deferred_tax.temporary_differences.len(),
8481 snapshot.deferred_tax.journal_entries.len(),
8482 snapshot.tax_posting_journal_entries.len(),
8483 );
8484 self.check_resources_with_log("post-tax")?;
8485
8486 Ok(snapshot)
8487 }
8488
8489 fn phase_esg_generation(
8491 &mut self,
8492 document_flows: &DocumentFlowSnapshot,
8493 manufacturing: &ManufacturingSnapshot,
8494 stats: &mut EnhancedGenerationStatistics,
8495 ) -> SynthResult<EsgSnapshot> {
8496 if !self.phase_config.generate_esg {
8497 debug!("Phase 21: Skipped (ESG generation disabled)");
8498 return Ok(EsgSnapshot::default());
8499 }
8500 let degradation = self.check_resources()?;
8501 if degradation >= DegradationLevel::Reduced {
8502 debug!(
8503 "Phase skipped due to resource pressure (degradation: {:?})",
8504 degradation
8505 );
8506 return Ok(EsgSnapshot::default());
8507 }
8508 info!("Phase 21: Generating ESG Data");
8509
8510 let seed = self.seed;
8511 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8512 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8513 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8514 let entity_id = self
8515 .config
8516 .companies
8517 .first()
8518 .map(|c| c.code.as_str())
8519 .unwrap_or("1000");
8520
8521 let esg_cfg = &self.config.esg;
8522 let mut snapshot = EsgSnapshot::default();
8523
8524 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8526 esg_cfg.environmental.energy.clone(),
8527 seed + 80,
8528 );
8529 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8530
8531 let facility_count = esg_cfg.environmental.energy.facility_count;
8533 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8534 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8535
8536 let mut waste_gen = datasynth_generators::WasteGenerator::new(
8538 seed + 82,
8539 esg_cfg.environmental.waste.diversion_target,
8540 facility_count,
8541 );
8542 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8543
8544 let mut emission_gen =
8546 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8547
8548 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8550 .iter()
8551 .map(|e| datasynth_generators::EnergyInput {
8552 facility_id: e.facility_id.clone(),
8553 energy_type: match e.energy_source {
8554 EnergySourceType::NaturalGas => {
8555 datasynth_generators::EnergyInputType::NaturalGas
8556 }
8557 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8558 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8559 _ => datasynth_generators::EnergyInputType::Electricity,
8560 },
8561 consumption_kwh: e.consumption_kwh,
8562 period: e.period,
8563 })
8564 .collect();
8565
8566 if !manufacturing.production_orders.is_empty() {
8568 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8569 &manufacturing.production_orders,
8570 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
8573 if !mfg_energy.is_empty() {
8574 info!(
8575 "ESG: {} energy inputs derived from {} production orders",
8576 mfg_energy.len(),
8577 manufacturing.production_orders.len(),
8578 );
8579 energy_inputs.extend(mfg_energy);
8580 }
8581 }
8582
8583 let mut emissions = Vec::new();
8584 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8585 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8586
8587 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8589 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8590 for payment in &document_flows.payments {
8591 if payment.is_vendor {
8592 *totals
8593 .entry(payment.business_partner_id.clone())
8594 .or_default() += payment.amount;
8595 }
8596 }
8597 totals
8598 };
8599 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8600 .master_data
8601 .vendors
8602 .iter()
8603 .map(|v| {
8604 let spend = vendor_payment_totals
8605 .get(&v.vendor_id)
8606 .copied()
8607 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8608 datasynth_generators::VendorSpendInput {
8609 vendor_id: v.vendor_id.clone(),
8610 category: format!("{:?}", v.vendor_type).to_lowercase(),
8611 spend,
8612 country: v.country.clone(),
8613 }
8614 })
8615 .collect();
8616 if !vendor_spend.is_empty() {
8617 emissions.extend(emission_gen.generate_scope3_purchased_goods(
8618 entity_id,
8619 &vendor_spend,
8620 start_date,
8621 end_date,
8622 ));
8623 }
8624
8625 let headcount = self.master_data.employees.len() as u32;
8627 if headcount > 0 {
8628 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8629 emissions.extend(emission_gen.generate_scope3_business_travel(
8630 entity_id,
8631 travel_spend,
8632 start_date,
8633 ));
8634 emissions
8635 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8636 }
8637
8638 snapshot.emission_count = emissions.len();
8639 snapshot.emissions = emissions;
8640 snapshot.energy = energy_records;
8641
8642 let mut workforce_gen =
8644 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8645 let total_headcount = headcount.max(100);
8646 snapshot.diversity =
8647 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8648 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8649
8650 if !self.master_data.employees.is_empty() {
8652 let hr_diversity = workforce_gen.generate_diversity_from_employees(
8653 entity_id,
8654 &self.master_data.employees,
8655 end_date,
8656 );
8657 if !hr_diversity.is_empty() {
8658 info!(
8659 "ESG: {} diversity metrics derived from {} actual employees",
8660 hr_diversity.len(),
8661 self.master_data.employees.len(),
8662 );
8663 snapshot.diversity.extend(hr_diversity);
8664 }
8665 }
8666
8667 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8668 entity_id,
8669 facility_count,
8670 start_date,
8671 end_date,
8672 );
8673
8674 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
8677 entity_id,
8678 &snapshot.safety_incidents,
8679 total_hours,
8680 start_date,
8681 );
8682 snapshot.safety_metrics = vec![safety_metric];
8683
8684 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8686 seed + 85,
8687 esg_cfg.governance.board_size,
8688 esg_cfg.governance.independence_target,
8689 );
8690 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8691
8692 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8694 esg_cfg.supply_chain_esg.clone(),
8695 seed + 86,
8696 );
8697 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8698 .master_data
8699 .vendors
8700 .iter()
8701 .map(|v| datasynth_generators::VendorInput {
8702 vendor_id: v.vendor_id.clone(),
8703 country: v.country.clone(),
8704 industry: format!("{:?}", v.vendor_type).to_lowercase(),
8705 quality_score: None,
8706 })
8707 .collect();
8708 snapshot.supplier_assessments =
8709 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8710
8711 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8713 seed + 87,
8714 esg_cfg.reporting.clone(),
8715 esg_cfg.climate_scenarios.clone(),
8716 );
8717 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8718 snapshot.disclosures = disclosure_gen.generate_disclosures(
8719 entity_id,
8720 &snapshot.materiality,
8721 start_date,
8722 end_date,
8723 );
8724 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8725 snapshot.disclosure_count = snapshot.disclosures.len();
8726
8727 if esg_cfg.anomaly_rate > 0.0 {
8729 let mut anomaly_injector =
8730 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8731 let mut labels = Vec::new();
8732 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8733 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8734 labels.extend(
8735 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8736 );
8737 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8738 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8739 snapshot.anomaly_labels = labels;
8740 }
8741
8742 stats.esg_emission_count = snapshot.emission_count;
8743 stats.esg_disclosure_count = snapshot.disclosure_count;
8744
8745 info!(
8746 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8747 snapshot.emission_count,
8748 snapshot.disclosure_count,
8749 snapshot.supplier_assessments.len()
8750 );
8751 self.check_resources_with_log("post-esg")?;
8752
8753 Ok(snapshot)
8754 }
8755
8756 fn phase_treasury_data(
8758 &mut self,
8759 document_flows: &DocumentFlowSnapshot,
8760 subledger: &SubledgerSnapshot,
8761 intercompany: &IntercompanySnapshot,
8762 stats: &mut EnhancedGenerationStatistics,
8763 ) -> SynthResult<TreasurySnapshot> {
8764 if !self.phase_config.generate_treasury {
8765 debug!("Phase 22: Skipped (treasury generation disabled)");
8766 return Ok(TreasurySnapshot::default());
8767 }
8768 let degradation = self.check_resources()?;
8769 if degradation >= DegradationLevel::Reduced {
8770 debug!(
8771 "Phase skipped due to resource pressure (degradation: {:?})",
8772 degradation
8773 );
8774 return Ok(TreasurySnapshot::default());
8775 }
8776 info!("Phase 22: Generating Treasury Data");
8777
8778 let seed = self.seed;
8779 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8780 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8781 let currency = self
8782 .config
8783 .companies
8784 .first()
8785 .map(|c| c.currency.as_str())
8786 .unwrap_or("USD");
8787 let entity_id = self
8788 .config
8789 .companies
8790 .first()
8791 .map(|c| c.code.as_str())
8792 .unwrap_or("1000");
8793
8794 let mut snapshot = TreasurySnapshot::default();
8795
8796 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8798 self.config.treasury.debt.clone(),
8799 seed + 90,
8800 );
8801 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8802
8803 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8805 self.config.treasury.hedging.clone(),
8806 seed + 91,
8807 );
8808 for debt in &snapshot.debt_instruments {
8809 if debt.rate_type == InterestRateType::Variable {
8810 let swap = hedge_gen.generate_ir_swap(
8811 currency,
8812 debt.principal,
8813 debt.origination_date,
8814 debt.maturity_date,
8815 );
8816 snapshot.hedging_instruments.push(swap);
8817 }
8818 }
8819
8820 {
8823 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8824 for payment in &document_flows.payments {
8825 if payment.currency != currency {
8826 let entry = fx_map
8827 .entry(payment.currency.clone())
8828 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8829 entry.0 += payment.amount;
8830 if payment.header.document_date > entry.1 {
8832 entry.1 = payment.header.document_date;
8833 }
8834 }
8835 }
8836 if !fx_map.is_empty() {
8837 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8838 .into_iter()
8839 .map(|(foreign_ccy, (net_amount, settlement_date))| {
8840 datasynth_generators::treasury::FxExposure {
8841 currency_pair: format!("{foreign_ccy}/{currency}"),
8842 foreign_currency: foreign_ccy,
8843 net_amount,
8844 settlement_date,
8845 description: "AP payment FX exposure".to_string(),
8846 }
8847 })
8848 .collect();
8849 let (fx_instruments, fx_relationships) =
8850 hedge_gen.generate(start_date, &fx_exposures);
8851 snapshot.hedging_instruments.extend(fx_instruments);
8852 snapshot.hedge_relationships.extend(fx_relationships);
8853 }
8854 }
8855
8856 if self.config.treasury.anomaly_rate > 0.0 {
8858 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8859 seed + 92,
8860 self.config.treasury.anomaly_rate,
8861 );
8862 let mut labels = Vec::new();
8863 labels.extend(
8864 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8865 );
8866 snapshot.treasury_anomaly_labels = labels;
8867 }
8868
8869 if self.config.treasury.cash_positioning.enabled {
8871 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8872
8873 for payment in &document_flows.payments {
8875 cash_flows.push(datasynth_generators::treasury::CashFlow {
8876 date: payment.header.document_date,
8877 account_id: format!("{entity_id}-MAIN"),
8878 amount: payment.amount,
8879 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8880 });
8881 }
8882
8883 for chain in &document_flows.o2c_chains {
8885 if let Some(ref receipt) = chain.customer_receipt {
8886 cash_flows.push(datasynth_generators::treasury::CashFlow {
8887 date: receipt.header.document_date,
8888 account_id: format!("{entity_id}-MAIN"),
8889 amount: receipt.amount,
8890 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8891 });
8892 }
8893 for receipt in &chain.remainder_receipts {
8895 cash_flows.push(datasynth_generators::treasury::CashFlow {
8896 date: receipt.header.document_date,
8897 account_id: format!("{entity_id}-MAIN"),
8898 amount: receipt.amount,
8899 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8900 });
8901 }
8902 }
8903
8904 if !cash_flows.is_empty() {
8905 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8906 self.config.treasury.cash_positioning.clone(),
8907 seed + 93,
8908 );
8909 let account_id = format!("{entity_id}-MAIN");
8910 snapshot.cash_positions = cash_gen.generate(
8911 entity_id,
8912 &account_id,
8913 currency,
8914 &cash_flows,
8915 start_date,
8916 start_date + chrono::Months::new(self.config.global.period_months),
8917 rust_decimal::Decimal::new(1_000_000, 0), );
8919 }
8920 }
8921
8922 if self.config.treasury.cash_forecasting.enabled {
8924 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8925
8926 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8928 .ar_invoices
8929 .iter()
8930 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8931 .map(|inv| {
8932 let days_past_due = if inv.due_date < end_date {
8933 (end_date - inv.due_date).num_days().max(0) as u32
8934 } else {
8935 0
8936 };
8937 datasynth_generators::treasury::ArAgingItem {
8938 expected_date: inv.due_date,
8939 amount: inv.amount_remaining,
8940 days_past_due,
8941 document_id: inv.invoice_number.clone(),
8942 }
8943 })
8944 .collect();
8945
8946 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8948 .ap_invoices
8949 .iter()
8950 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8951 .map(|inv| datasynth_generators::treasury::ApAgingItem {
8952 payment_date: inv.due_date,
8953 amount: inv.amount_remaining,
8954 document_id: inv.invoice_number.clone(),
8955 })
8956 .collect();
8957
8958 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8959 self.config.treasury.cash_forecasting.clone(),
8960 seed + 94,
8961 );
8962 let forecast = forecast_gen.generate(
8963 entity_id,
8964 currency,
8965 end_date,
8966 &ar_items,
8967 &ap_items,
8968 &[], );
8970 snapshot.cash_forecasts.push(forecast);
8971 }
8972
8973 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8975 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8976 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8977 self.config.treasury.cash_pooling.clone(),
8978 seed + 95,
8979 );
8980
8981 let account_ids: Vec<String> = snapshot
8983 .cash_positions
8984 .iter()
8985 .map(|cp| cp.bank_account_id.clone())
8986 .collect::<std::collections::HashSet<_>>()
8987 .into_iter()
8988 .collect();
8989
8990 if let Some(pool) =
8991 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8992 {
8993 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8995 for cp in &snapshot.cash_positions {
8996 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8997 }
8998
8999 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9000 latest_balances
9001 .into_iter()
9002 .filter(|(id, _)| pool.participant_accounts.contains(id))
9003 .map(
9004 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9005 account_id: id,
9006 balance,
9007 },
9008 )
9009 .collect();
9010
9011 let sweeps =
9012 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9013 snapshot.cash_pool_sweeps = sweeps;
9014 snapshot.cash_pools.push(pool);
9015 }
9016 }
9017
9018 if self.config.treasury.bank_guarantees.enabled {
9020 let vendor_names: Vec<String> = self
9021 .master_data
9022 .vendors
9023 .iter()
9024 .map(|v| v.name.clone())
9025 .collect();
9026 if !vendor_names.is_empty() {
9027 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9028 self.config.treasury.bank_guarantees.clone(),
9029 seed + 96,
9030 );
9031 snapshot.bank_guarantees =
9032 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9033 }
9034 }
9035
9036 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9038 let entity_ids: Vec<String> = self
9039 .config
9040 .companies
9041 .iter()
9042 .map(|c| c.code.clone())
9043 .collect();
9044 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9045 .matched_pairs
9046 .iter()
9047 .map(|mp| {
9048 (
9049 mp.seller_company.clone(),
9050 mp.buyer_company.clone(),
9051 mp.amount,
9052 )
9053 })
9054 .collect();
9055 if entity_ids.len() >= 2 {
9056 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9057 self.config.treasury.netting.clone(),
9058 seed + 97,
9059 );
9060 snapshot.netting_runs = netting_gen.generate(
9061 &entity_ids,
9062 currency,
9063 start_date,
9064 self.config.global.period_months,
9065 &ic_amounts,
9066 );
9067 }
9068 }
9069
9070 {
9072 use datasynth_generators::treasury::TreasuryAccounting;
9073
9074 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9075 let mut treasury_jes = Vec::new();
9076
9077 if !snapshot.debt_instruments.is_empty() {
9079 let debt_jes =
9080 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9081 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9082 treasury_jes.extend(debt_jes);
9083 }
9084
9085 if !snapshot.hedging_instruments.is_empty() {
9087 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9088 &snapshot.hedging_instruments,
9089 &snapshot.hedge_relationships,
9090 end_date,
9091 entity_id,
9092 );
9093 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9094 treasury_jes.extend(hedge_jes);
9095 }
9096
9097 if !snapshot.cash_pool_sweeps.is_empty() {
9099 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9100 &snapshot.cash_pool_sweeps,
9101 entity_id,
9102 );
9103 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9104 treasury_jes.extend(sweep_jes);
9105 }
9106
9107 if !treasury_jes.is_empty() {
9108 debug!("Total treasury journal entries: {}", treasury_jes.len());
9109 }
9110 snapshot.journal_entries = treasury_jes;
9111 }
9112
9113 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9114 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9115 stats.cash_position_count = snapshot.cash_positions.len();
9116 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9117 stats.cash_pool_count = snapshot.cash_pools.len();
9118
9119 info!(
9120 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9121 snapshot.debt_instruments.len(),
9122 snapshot.hedging_instruments.len(),
9123 snapshot.cash_positions.len(),
9124 snapshot.cash_forecasts.len(),
9125 snapshot.cash_pools.len(),
9126 snapshot.bank_guarantees.len(),
9127 snapshot.netting_runs.len(),
9128 snapshot.journal_entries.len(),
9129 );
9130 self.check_resources_with_log("post-treasury")?;
9131
9132 Ok(snapshot)
9133 }
9134
9135 fn phase_project_accounting(
9137 &mut self,
9138 document_flows: &DocumentFlowSnapshot,
9139 hr: &HrSnapshot,
9140 stats: &mut EnhancedGenerationStatistics,
9141 ) -> SynthResult<ProjectAccountingSnapshot> {
9142 if !self.phase_config.generate_project_accounting {
9143 debug!("Phase 23: Skipped (project accounting disabled)");
9144 return Ok(ProjectAccountingSnapshot::default());
9145 }
9146 let degradation = self.check_resources()?;
9147 if degradation >= DegradationLevel::Reduced {
9148 debug!(
9149 "Phase skipped due to resource pressure (degradation: {:?})",
9150 degradation
9151 );
9152 return Ok(ProjectAccountingSnapshot::default());
9153 }
9154 info!("Phase 23: Generating Project Accounting Data");
9155
9156 let seed = self.seed;
9157 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9158 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9159 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9160 let company_code = self
9161 .config
9162 .companies
9163 .first()
9164 .map(|c| c.code.as_str())
9165 .unwrap_or("1000");
9166
9167 let mut snapshot = ProjectAccountingSnapshot::default();
9168
9169 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9171 self.config.project_accounting.clone(),
9172 seed + 95,
9173 );
9174 let pool = project_gen.generate(company_code, start_date, end_date);
9175 snapshot.projects = pool.projects.clone();
9176
9177 {
9179 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9180 Vec::new();
9181
9182 for te in &hr.time_entries {
9184 let total_hours = te.hours_regular + te.hours_overtime;
9185 if total_hours > 0.0 {
9186 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9187 id: te.entry_id.clone(),
9188 entity_id: company_code.to_string(),
9189 date: te.date,
9190 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9191 .unwrap_or(rust_decimal::Decimal::ZERO),
9192 source_type: CostSourceType::TimeEntry,
9193 hours: Some(
9194 rust_decimal::Decimal::from_f64_retain(total_hours)
9195 .unwrap_or(rust_decimal::Decimal::ZERO),
9196 ),
9197 });
9198 }
9199 }
9200
9201 for er in &hr.expense_reports {
9203 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9204 id: er.report_id.clone(),
9205 entity_id: company_code.to_string(),
9206 date: er.submission_date,
9207 amount: er.total_amount,
9208 source_type: CostSourceType::ExpenseReport,
9209 hours: None,
9210 });
9211 }
9212
9213 for po in &document_flows.purchase_orders {
9215 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9216 id: po.header.document_id.clone(),
9217 entity_id: company_code.to_string(),
9218 date: po.header.document_date,
9219 amount: po.total_net_amount,
9220 source_type: CostSourceType::PurchaseOrder,
9221 hours: None,
9222 });
9223 }
9224
9225 for vi in &document_flows.vendor_invoices {
9227 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9228 id: vi.header.document_id.clone(),
9229 entity_id: company_code.to_string(),
9230 date: vi.header.document_date,
9231 amount: vi.payable_amount,
9232 source_type: CostSourceType::VendorInvoice,
9233 hours: None,
9234 });
9235 }
9236
9237 if !source_docs.is_empty() && !pool.projects.is_empty() {
9238 let mut cost_gen =
9239 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9240 self.config.project_accounting.cost_allocation.clone(),
9241 seed + 99,
9242 );
9243 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9244 }
9245 }
9246
9247 if self.config.project_accounting.change_orders.enabled {
9249 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9250 self.config.project_accounting.change_orders.clone(),
9251 seed + 96,
9252 );
9253 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9254 }
9255
9256 if self.config.project_accounting.milestones.enabled {
9258 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9259 self.config.project_accounting.milestones.clone(),
9260 seed + 97,
9261 );
9262 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9263 }
9264
9265 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9267 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9268 self.config.project_accounting.earned_value.clone(),
9269 seed + 98,
9270 );
9271 snapshot.earned_value_metrics =
9272 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9273 }
9274
9275 if self.config.project_accounting.revenue_recognition.enabled
9277 && !snapshot.projects.is_empty()
9278 && !snapshot.cost_lines.is_empty()
9279 {
9280 use datasynth_generators::project_accounting::RevenueGenerator;
9281 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9282 let avg_contract_value =
9283 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9284 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9285
9286 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9289 snapshot
9290 .projects
9291 .iter()
9292 .filter(|p| {
9293 matches!(
9294 p.project_type,
9295 datasynth_core::models::ProjectType::Customer
9296 )
9297 })
9298 .map(|p| {
9299 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9300 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9301 } else {
9303 avg_contract_value
9304 };
9305 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9307 })
9308 .collect();
9309
9310 if !contract_values.is_empty() {
9311 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9312 snapshot.revenue_records = rev_gen.generate(
9313 &snapshot.projects,
9314 &snapshot.cost_lines,
9315 &contract_values,
9316 start_date,
9317 end_date,
9318 );
9319 debug!(
9320 "Generated {} revenue recognition records for {} customer projects",
9321 snapshot.revenue_records.len(),
9322 contract_values.len()
9323 );
9324 }
9325 }
9326
9327 stats.project_count = snapshot.projects.len();
9328 stats.project_change_order_count = snapshot.change_orders.len();
9329 stats.project_cost_line_count = snapshot.cost_lines.len();
9330
9331 info!(
9332 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9333 snapshot.projects.len(),
9334 snapshot.change_orders.len(),
9335 snapshot.milestones.len(),
9336 snapshot.earned_value_metrics.len()
9337 );
9338 self.check_resources_with_log("post-project-accounting")?;
9339
9340 Ok(snapshot)
9341 }
9342
9343 fn phase_evolution_events(
9345 &mut self,
9346 stats: &mut EnhancedGenerationStatistics,
9347 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9348 if !self.phase_config.generate_evolution_events {
9349 debug!("Phase 24: Skipped (evolution events disabled)");
9350 return Ok((Vec::new(), Vec::new()));
9351 }
9352 info!("Phase 24: Generating Process Evolution + Organizational Events");
9353
9354 let seed = self.seed;
9355 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9356 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9357 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9358
9359 let mut proc_gen =
9361 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9362 seed + 100,
9363 );
9364 let process_events = proc_gen.generate_events(start_date, end_date);
9365
9366 let company_codes: Vec<String> = self
9368 .config
9369 .companies
9370 .iter()
9371 .map(|c| c.code.clone())
9372 .collect();
9373 let mut org_gen =
9374 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9375 seed + 101,
9376 );
9377 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9378
9379 stats.process_evolution_event_count = process_events.len();
9380 stats.organizational_event_count = org_events.len();
9381
9382 info!(
9383 "Evolution events generated: {} process evolution, {} organizational",
9384 process_events.len(),
9385 org_events.len()
9386 );
9387 self.check_resources_with_log("post-evolution-events")?;
9388
9389 Ok((process_events, org_events))
9390 }
9391
9392 fn phase_disruption_events(
9395 &self,
9396 stats: &mut EnhancedGenerationStatistics,
9397 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9398 if !self.config.organizational_events.enabled {
9399 debug!("Phase 24b: Skipped (organizational events disabled)");
9400 return Ok(Vec::new());
9401 }
9402 info!("Phase 24b: Generating Disruption Events");
9403
9404 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9405 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9406 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9407
9408 let company_codes: Vec<String> = self
9409 .config
9410 .companies
9411 .iter()
9412 .map(|c| c.code.clone())
9413 .collect();
9414
9415 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9416 let events = gen.generate(start_date, end_date, &company_codes);
9417
9418 stats.disruption_event_count = events.len();
9419 info!("Disruption events generated: {} events", events.len());
9420 self.check_resources_with_log("post-disruption-events")?;
9421
9422 Ok(events)
9423 }
9424
9425 fn phase_counterfactuals(
9432 &self,
9433 journal_entries: &[JournalEntry],
9434 stats: &mut EnhancedGenerationStatistics,
9435 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9436 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9437 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9438 return Ok(Vec::new());
9439 }
9440 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9441
9442 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9443
9444 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9445
9446 let specs = [
9448 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9449 CounterfactualSpec::ShiftDate { days: -14 },
9450 CounterfactualSpec::SelfApprove,
9451 CounterfactualSpec::SplitTransaction { split_count: 3 },
9452 ];
9453
9454 let pairs: Vec<_> = journal_entries
9455 .iter()
9456 .enumerate()
9457 .map(|(i, je)| {
9458 let spec = &specs[i % specs.len()];
9459 gen.generate(je, spec)
9460 })
9461 .collect();
9462
9463 stats.counterfactual_pair_count = pairs.len();
9464 info!(
9465 "Counterfactual pairs generated: {} pairs from {} journal entries",
9466 pairs.len(),
9467 journal_entries.len()
9468 );
9469 self.check_resources_with_log("post-counterfactuals")?;
9470
9471 Ok(pairs)
9472 }
9473
9474 fn phase_red_flags(
9481 &self,
9482 anomaly_labels: &AnomalyLabels,
9483 document_flows: &DocumentFlowSnapshot,
9484 stats: &mut EnhancedGenerationStatistics,
9485 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9486 if !self.config.fraud.enabled {
9487 debug!("Phase 26: Skipped (fraud generation disabled)");
9488 return Ok(Vec::new());
9489 }
9490 info!("Phase 26: Generating Fraud Red-Flag Indicators");
9491
9492 use datasynth_generators::fraud::RedFlagGenerator;
9493
9494 let generator = RedFlagGenerator::new();
9495 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9496
9497 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9499 .labels
9500 .iter()
9501 .filter(|label| label.anomaly_type.is_intentional())
9502 .map(|label| label.document_id.as_str())
9503 .collect();
9504
9505 let mut flags = Vec::new();
9506
9507 for chain in &document_flows.p2p_chains {
9509 let doc_id = &chain.purchase_order.header.document_id;
9510 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9511 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9512 }
9513
9514 for chain in &document_flows.o2c_chains {
9516 let doc_id = &chain.sales_order.header.document_id;
9517 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9518 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9519 }
9520
9521 stats.red_flag_count = flags.len();
9522 info!(
9523 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9524 flags.len(),
9525 document_flows.p2p_chains.len(),
9526 document_flows.o2c_chains.len(),
9527 fraud_doc_ids.len()
9528 );
9529 self.check_resources_with_log("post-red-flags")?;
9530
9531 Ok(flags)
9532 }
9533
9534 fn phase_collusion_rings(
9540 &mut self,
9541 stats: &mut EnhancedGenerationStatistics,
9542 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9543 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9544 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9545 return Ok(Vec::new());
9546 }
9547 info!("Phase 26b: Generating Collusion Rings");
9548
9549 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9550 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9551 let months = self.config.global.period_months;
9552
9553 let employee_ids: Vec<String> = self
9554 .master_data
9555 .employees
9556 .iter()
9557 .map(|e| e.employee_id.clone())
9558 .collect();
9559 let vendor_ids: Vec<String> = self
9560 .master_data
9561 .vendors
9562 .iter()
9563 .map(|v| v.vendor_id.clone())
9564 .collect();
9565
9566 let mut generator =
9567 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9568 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9569
9570 stats.collusion_ring_count = rings.len();
9571 info!(
9572 "Collusion rings generated: {} rings, total members: {}",
9573 rings.len(),
9574 rings
9575 .iter()
9576 .map(datasynth_generators::fraud::CollusionRing::size)
9577 .sum::<usize>()
9578 );
9579 self.check_resources_with_log("post-collusion-rings")?;
9580
9581 Ok(rings)
9582 }
9583
9584 fn phase_temporal_attributes(
9589 &mut self,
9590 stats: &mut EnhancedGenerationStatistics,
9591 ) -> SynthResult<
9592 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9593 > {
9594 if !self.config.temporal_attributes.enabled {
9595 debug!("Phase 27: Skipped (temporal attributes disabled)");
9596 return Ok(Vec::new());
9597 }
9598 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9599
9600 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9601 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9602
9603 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9607 || self.config.temporal_attributes.enabled;
9608 let temporal_config = {
9609 let ta = &self.config.temporal_attributes;
9610 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9611 .enabled(ta.enabled)
9612 .closed_probability(ta.valid_time.closed_probability)
9613 .avg_validity_days(ta.valid_time.avg_validity_days)
9614 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9615 .with_version_chains(if generate_version_chains {
9616 ta.avg_versions_per_entity
9617 } else {
9618 1.0
9619 })
9620 .build()
9621 };
9622 let temporal_config = if self
9624 .config
9625 .temporal_attributes
9626 .transaction_time
9627 .allow_backdating
9628 {
9629 let mut c = temporal_config;
9630 c.transaction_time.allow_backdating = true;
9631 c.transaction_time.backdating_probability = self
9632 .config
9633 .temporal_attributes
9634 .transaction_time
9635 .backdating_probability;
9636 c.transaction_time.max_backdate_days = self
9637 .config
9638 .temporal_attributes
9639 .transaction_time
9640 .max_backdate_days;
9641 c
9642 } else {
9643 temporal_config
9644 };
9645 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9646 temporal_config,
9647 self.seed + 130,
9648 start_date,
9649 );
9650
9651 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9652 self.seed + 130,
9653 datasynth_core::GeneratorType::Vendor,
9654 );
9655
9656 let chains: Vec<_> = self
9657 .master_data
9658 .vendors
9659 .iter()
9660 .map(|vendor| {
9661 let id = uuid_factory.next();
9662 gen.generate_version_chain(vendor.clone(), id)
9663 })
9664 .collect();
9665
9666 stats.temporal_version_chain_count = chains.len();
9667 info!("Temporal version chains generated: {} chains", chains.len());
9668 self.check_resources_with_log("post-temporal-attributes")?;
9669
9670 Ok(chains)
9671 }
9672
9673 fn phase_entity_relationships(
9683 &self,
9684 journal_entries: &[JournalEntry],
9685 document_flows: &DocumentFlowSnapshot,
9686 stats: &mut EnhancedGenerationStatistics,
9687 ) -> SynthResult<(
9688 Option<datasynth_core::models::EntityGraph>,
9689 Vec<datasynth_core::models::CrossProcessLink>,
9690 )> {
9691 use datasynth_generators::relationships::{
9692 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9693 TransactionSummary,
9694 };
9695
9696 let rs_enabled = self.config.relationship_strength.enabled;
9697 let cpl_enabled = self.config.cross_process_links.enabled
9698 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9699
9700 if !rs_enabled && !cpl_enabled {
9701 debug!(
9702 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9703 );
9704 return Ok((None, Vec::new()));
9705 }
9706
9707 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9708
9709 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9710 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9711
9712 let company_code = self
9713 .config
9714 .companies
9715 .first()
9716 .map(|c| c.code.as_str())
9717 .unwrap_or("1000");
9718
9719 let gen_config = EntityGraphConfig {
9721 enabled: rs_enabled,
9722 cross_process: datasynth_generators::relationships::CrossProcessConfig {
9723 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9724 enable_return_flows: false,
9725 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9726 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9727 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9729 1.0
9730 } else {
9731 0.30
9732 },
9733 ..Default::default()
9734 },
9735 strength_config: datasynth_generators::relationships::StrengthConfig {
9736 transaction_volume_weight: self
9737 .config
9738 .relationship_strength
9739 .calculation
9740 .transaction_volume_weight,
9741 transaction_count_weight: self
9742 .config
9743 .relationship_strength
9744 .calculation
9745 .transaction_count_weight,
9746 duration_weight: self
9747 .config
9748 .relationship_strength
9749 .calculation
9750 .relationship_duration_weight,
9751 recency_weight: self.config.relationship_strength.calculation.recency_weight,
9752 mutual_connections_weight: self
9753 .config
9754 .relationship_strength
9755 .calculation
9756 .mutual_connections_weight,
9757 recency_half_life_days: self
9758 .config
9759 .relationship_strength
9760 .calculation
9761 .recency_half_life_days,
9762 },
9763 ..Default::default()
9764 };
9765
9766 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9767
9768 let entity_graph = if rs_enabled {
9770 let vendor_summaries: Vec<EntitySummary> = self
9772 .master_data
9773 .vendors
9774 .iter()
9775 .map(|v| {
9776 EntitySummary::new(
9777 &v.vendor_id,
9778 &v.name,
9779 datasynth_core::models::GraphEntityType::Vendor,
9780 start_date,
9781 )
9782 })
9783 .collect();
9784
9785 let customer_summaries: Vec<EntitySummary> = self
9786 .master_data
9787 .customers
9788 .iter()
9789 .map(|c| {
9790 EntitySummary::new(
9791 &c.customer_id,
9792 &c.name,
9793 datasynth_core::models::GraphEntityType::Customer,
9794 start_date,
9795 )
9796 })
9797 .collect();
9798
9799 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9804 std::collections::HashMap::new();
9805
9806 for je in journal_entries {
9807 let cc = je.header.company_code.clone();
9808 let posting_date = je.header.posting_date;
9809 for line in &je.lines {
9810 if let Some(ref tp) = line.trading_partner {
9811 let amount = if line.debit_amount > line.credit_amount {
9812 line.debit_amount
9813 } else {
9814 line.credit_amount
9815 };
9816 let entry = txn_summaries
9817 .entry((cc.clone(), tp.clone()))
9818 .or_insert_with(|| TransactionSummary {
9819 total_volume: rust_decimal::Decimal::ZERO,
9820 transaction_count: 0,
9821 first_transaction_date: posting_date,
9822 last_transaction_date: posting_date,
9823 related_entities: std::collections::HashSet::new(),
9824 });
9825 entry.total_volume += amount;
9826 entry.transaction_count += 1;
9827 if posting_date < entry.first_transaction_date {
9828 entry.first_transaction_date = posting_date;
9829 }
9830 if posting_date > entry.last_transaction_date {
9831 entry.last_transaction_date = posting_date;
9832 }
9833 entry.related_entities.insert(cc.clone());
9834 }
9835 }
9836 }
9837
9838 for chain in &document_flows.p2p_chains {
9841 let cc = chain.purchase_order.header.company_code.clone();
9842 let vendor_id = chain.purchase_order.vendor_id.clone();
9843 let po_date = chain.purchase_order.header.document_date;
9844 let amount = chain.purchase_order.total_net_amount;
9845
9846 let entry = txn_summaries
9847 .entry((cc.clone(), vendor_id))
9848 .or_insert_with(|| TransactionSummary {
9849 total_volume: rust_decimal::Decimal::ZERO,
9850 transaction_count: 0,
9851 first_transaction_date: po_date,
9852 last_transaction_date: po_date,
9853 related_entities: std::collections::HashSet::new(),
9854 });
9855 entry.total_volume += amount;
9856 entry.transaction_count += 1;
9857 if po_date < entry.first_transaction_date {
9858 entry.first_transaction_date = po_date;
9859 }
9860 if po_date > entry.last_transaction_date {
9861 entry.last_transaction_date = po_date;
9862 }
9863 entry.related_entities.insert(cc);
9864 }
9865
9866 for chain in &document_flows.o2c_chains {
9868 let cc = chain.sales_order.header.company_code.clone();
9869 let customer_id = chain.sales_order.customer_id.clone();
9870 let so_date = chain.sales_order.header.document_date;
9871 let amount = chain.sales_order.total_net_amount;
9872
9873 let entry = txn_summaries
9874 .entry((cc.clone(), customer_id))
9875 .or_insert_with(|| TransactionSummary {
9876 total_volume: rust_decimal::Decimal::ZERO,
9877 transaction_count: 0,
9878 first_transaction_date: so_date,
9879 last_transaction_date: so_date,
9880 related_entities: std::collections::HashSet::new(),
9881 });
9882 entry.total_volume += amount;
9883 entry.transaction_count += 1;
9884 if so_date < entry.first_transaction_date {
9885 entry.first_transaction_date = so_date;
9886 }
9887 if so_date > entry.last_transaction_date {
9888 entry.last_transaction_date = so_date;
9889 }
9890 entry.related_entities.insert(cc);
9891 }
9892
9893 let as_of_date = journal_entries
9894 .last()
9895 .map(|je| je.header.posting_date)
9896 .unwrap_or(start_date);
9897
9898 let graph = gen.generate_entity_graph(
9899 company_code,
9900 as_of_date,
9901 &vendor_summaries,
9902 &customer_summaries,
9903 &txn_summaries,
9904 );
9905
9906 info!(
9907 "Entity relationship graph: {} nodes, {} edges",
9908 graph.nodes.len(),
9909 graph.edges.len()
9910 );
9911 stats.entity_relationship_node_count = graph.nodes.len();
9912 stats.entity_relationship_edge_count = graph.edges.len();
9913 Some(graph)
9914 } else {
9915 None
9916 };
9917
9918 let cross_process_links = if cpl_enabled {
9920 let gr_refs: Vec<GoodsReceiptRef> = document_flows
9922 .p2p_chains
9923 .iter()
9924 .flat_map(|chain| {
9925 let vendor_id = chain.purchase_order.vendor_id.clone();
9926 let cc = chain.purchase_order.header.company_code.clone();
9927 chain.goods_receipts.iter().flat_map(move |gr| {
9928 gr.items.iter().filter_map({
9929 let doc_id = gr.header.document_id.clone();
9930 let v_id = vendor_id.clone();
9931 let company = cc.clone();
9932 let receipt_date = gr.header.document_date;
9933 move |item| {
9934 item.base
9935 .material_id
9936 .as_ref()
9937 .map(|mat_id| GoodsReceiptRef {
9938 document_id: doc_id.clone(),
9939 material_id: mat_id.clone(),
9940 quantity: item.base.quantity,
9941 receipt_date,
9942 vendor_id: v_id.clone(),
9943 company_code: company.clone(),
9944 })
9945 }
9946 })
9947 })
9948 })
9949 .collect();
9950
9951 let del_refs: Vec<DeliveryRef> = document_flows
9953 .o2c_chains
9954 .iter()
9955 .flat_map(|chain| {
9956 let customer_id = chain.sales_order.customer_id.clone();
9957 let cc = chain.sales_order.header.company_code.clone();
9958 chain.deliveries.iter().flat_map(move |del| {
9959 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9960 del.items.iter().filter_map({
9961 let doc_id = del.header.document_id.clone();
9962 let c_id = customer_id.clone();
9963 let company = cc.clone();
9964 move |item| {
9965 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9966 document_id: doc_id.clone(),
9967 material_id: mat_id.clone(),
9968 quantity: item.base.quantity,
9969 delivery_date,
9970 customer_id: c_id.clone(),
9971 company_code: company.clone(),
9972 })
9973 }
9974 })
9975 })
9976 })
9977 .collect();
9978
9979 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9980 info!("Cross-process links generated: {} links", links.len());
9981 stats.cross_process_link_count = links.len();
9982 links
9983 } else {
9984 Vec::new()
9985 };
9986
9987 self.check_resources_with_log("post-entity-relationships")?;
9988 Ok((entity_graph, cross_process_links))
9989 }
9990
9991 fn phase_industry_data(
9993 &self,
9994 stats: &mut EnhancedGenerationStatistics,
9995 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9996 if !self.config.industry_specific.enabled {
9997 return None;
9998 }
9999 info!("Phase 29: Generating industry-specific data");
10000 let output = datasynth_generators::industry::factory::generate_industry_output(
10001 self.config.global.industry,
10002 );
10003 stats.industry_gl_account_count = output.gl_accounts.len();
10004 info!(
10005 "Industry data generated: {} GL accounts for {:?}",
10006 output.gl_accounts.len(),
10007 self.config.global.industry
10008 );
10009 Some(output)
10010 }
10011
10012 fn phase_opening_balances(
10014 &mut self,
10015 coa: &Arc<ChartOfAccounts>,
10016 stats: &mut EnhancedGenerationStatistics,
10017 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10018 if !self.config.balance.generate_opening_balances {
10019 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10020 return Ok(Vec::new());
10021 }
10022 info!("Phase 3b: Generating Opening Balances");
10023
10024 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10025 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10026 let fiscal_year = start_date.year();
10027
10028 let industry = match self.config.global.industry {
10029 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10030 IndustrySector::Retail => IndustryType::Retail,
10031 IndustrySector::FinancialServices => IndustryType::Financial,
10032 IndustrySector::Healthcare => IndustryType::Healthcare,
10033 IndustrySector::Technology => IndustryType::Technology,
10034 _ => IndustryType::Manufacturing,
10035 };
10036
10037 let config = datasynth_generators::OpeningBalanceConfig {
10038 industry,
10039 ..Default::default()
10040 };
10041 let mut gen =
10042 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10043
10044 let mut results = Vec::new();
10045 for company in &self.config.companies {
10046 let spec = OpeningBalanceSpec::new(
10047 company.code.clone(),
10048 start_date,
10049 fiscal_year,
10050 company.currency.clone(),
10051 rust_decimal::Decimal::new(10_000_000, 0),
10052 industry,
10053 );
10054 let ob = gen.generate(&spec, coa, start_date, &company.code);
10055 results.push(ob);
10056 }
10057
10058 stats.opening_balance_count = results.len();
10059 info!("Opening balances generated: {} companies", results.len());
10060 self.check_resources_with_log("post-opening-balances")?;
10061
10062 Ok(results)
10063 }
10064
10065 fn phase_subledger_reconciliation(
10067 &mut self,
10068 subledger: &SubledgerSnapshot,
10069 entries: &[JournalEntry],
10070 stats: &mut EnhancedGenerationStatistics,
10071 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10072 if !self.config.balance.reconcile_subledgers {
10073 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10074 return Ok(Vec::new());
10075 }
10076 info!("Phase 9b: Reconciling GL to subledger balances");
10077
10078 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10079 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10080 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10081
10082 let tracker_config = BalanceTrackerConfig {
10084 validate_on_each_entry: false,
10085 track_history: false,
10086 fail_on_validation_error: false,
10087 ..Default::default()
10088 };
10089 let recon_currency = self
10090 .config
10091 .companies
10092 .first()
10093 .map(|c| c.currency.clone())
10094 .unwrap_or_else(|| "USD".to_string());
10095 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10096 let validation_errors = tracker.apply_entries(entries);
10097 if !validation_errors.is_empty() {
10098 warn!(
10099 error_count = validation_errors.len(),
10100 "Balance tracker encountered validation errors during subledger reconciliation"
10101 );
10102 for err in &validation_errors {
10103 debug!("Balance validation error: {:?}", err);
10104 }
10105 }
10106
10107 let mut engine = datasynth_generators::ReconciliationEngine::new(
10108 datasynth_generators::ReconciliationConfig::default(),
10109 );
10110
10111 let mut results = Vec::new();
10112 let company_code = self
10113 .config
10114 .companies
10115 .first()
10116 .map(|c| c.code.as_str())
10117 .unwrap_or("1000");
10118
10119 if !subledger.ar_invoices.is_empty() {
10121 let gl_balance = tracker
10122 .get_account_balance(
10123 company_code,
10124 datasynth_core::accounts::control_accounts::AR_CONTROL,
10125 )
10126 .map(|b| b.closing_balance)
10127 .unwrap_or_default();
10128 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10129 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10130 }
10131
10132 if !subledger.ap_invoices.is_empty() {
10134 let gl_balance = tracker
10135 .get_account_balance(
10136 company_code,
10137 datasynth_core::accounts::control_accounts::AP_CONTROL,
10138 )
10139 .map(|b| b.closing_balance)
10140 .unwrap_or_default();
10141 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10142 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10143 }
10144
10145 if !subledger.fa_records.is_empty() {
10147 let gl_asset_balance = tracker
10148 .get_account_balance(
10149 company_code,
10150 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10151 )
10152 .map(|b| b.closing_balance)
10153 .unwrap_or_default();
10154 let gl_accum_depr_balance = tracker
10155 .get_account_balance(
10156 company_code,
10157 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10158 )
10159 .map(|b| b.closing_balance)
10160 .unwrap_or_default();
10161 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10162 subledger.fa_records.iter().collect();
10163 let (asset_recon, depr_recon) = engine.reconcile_fa(
10164 company_code,
10165 end_date,
10166 gl_asset_balance,
10167 gl_accum_depr_balance,
10168 &fa_refs,
10169 );
10170 results.push(asset_recon);
10171 results.push(depr_recon);
10172 }
10173
10174 if !subledger.inventory_positions.is_empty() {
10176 let gl_balance = tracker
10177 .get_account_balance(
10178 company_code,
10179 datasynth_core::accounts::control_accounts::INVENTORY,
10180 )
10181 .map(|b| b.closing_balance)
10182 .unwrap_or_default();
10183 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10184 subledger.inventory_positions.iter().collect();
10185 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10186 }
10187
10188 stats.subledger_reconciliation_count = results.len();
10189 let passed = results.iter().filter(|r| r.is_balanced()).count();
10190 let failed = results.len() - passed;
10191 info!(
10192 "Subledger reconciliation: {} checks, {} passed, {} failed",
10193 results.len(),
10194 passed,
10195 failed
10196 );
10197 self.check_resources_with_log("post-subledger-reconciliation")?;
10198
10199 Ok(results)
10200 }
10201
10202 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10204 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10205
10206 let coa_framework = self.resolve_coa_framework();
10207
10208 let mut gen = ChartOfAccountsGenerator::new(
10209 self.config.chart_of_accounts.complexity,
10210 self.config.global.industry,
10211 self.seed,
10212 )
10213 .with_coa_framework(coa_framework);
10214
10215 let mut built = gen.generate();
10216 if self.config.accounting_standards.enabled {
10220 use datasynth_config::schema::AccountingFrameworkConfig;
10221 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10222 match f {
10223 AccountingFrameworkConfig::UsGaap => "us_gaap",
10224 AccountingFrameworkConfig::Ifrs => "ifrs",
10225 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10226 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10227 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10228 }
10229 .to_string()
10230 });
10231 }
10232 let coa = Arc::new(built);
10233 self.coa = Some(Arc::clone(&coa));
10234
10235 if let Some(pb) = pb {
10236 pb.finish_with_message("Chart of Accounts complete");
10237 }
10238
10239 Ok(coa)
10240 }
10241
10242 fn generate_master_data(&mut self) -> SynthResult<()> {
10244 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10245 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10246 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10247
10248 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
10250
10251 let pack = self.primary_pack().clone();
10253
10254 let vendors_per_company = self.phase_config.vendors_per_company;
10256 let customers_per_company = self.phase_config.customers_per_company;
10257 let materials_per_company = self.phase_config.materials_per_company;
10258 let assets_per_company = self.phase_config.assets_per_company;
10259 let coa_framework = self.resolve_coa_framework();
10260
10261 let per_company_results: Vec<_> = self
10264 .config
10265 .companies
10266 .par_iter()
10267 .enumerate()
10268 .map(|(i, company)| {
10269 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10270 let pack = pack.clone();
10271
10272 let mut vendor_gen = VendorGenerator::new(company_seed);
10274 vendor_gen.set_country_pack(pack.clone());
10275 vendor_gen.set_coa_framework(coa_framework);
10276 vendor_gen.set_counter_offset(i * vendors_per_company);
10277 vendor_gen.set_template_provider(self.template_provider.clone());
10280 if self.config.vendor_network.enabled {
10282 let vn = &self.config.vendor_network;
10283 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10284 enabled: true,
10285 depth: vn.depth,
10286 tier1_count: datasynth_generators::TierCountConfig::new(
10287 vn.tier1.min,
10288 vn.tier1.max,
10289 ),
10290 tier2_per_parent: datasynth_generators::TierCountConfig::new(
10291 vn.tier2_per_parent.min,
10292 vn.tier2_per_parent.max,
10293 ),
10294 tier3_per_parent: datasynth_generators::TierCountConfig::new(
10295 vn.tier3_per_parent.min,
10296 vn.tier3_per_parent.max,
10297 ),
10298 cluster_distribution: datasynth_generators::ClusterDistribution {
10299 reliable_strategic: vn.clusters.reliable_strategic,
10300 standard_operational: vn.clusters.standard_operational,
10301 transactional: vn.clusters.transactional,
10302 problematic: vn.clusters.problematic,
10303 },
10304 concentration_limits: datasynth_generators::ConcentrationLimits {
10305 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10306 max_top5: vn.dependencies.top_5_concentration,
10307 },
10308 ..datasynth_generators::VendorNetworkConfig::default()
10309 });
10310 }
10311 let vendor_pool =
10312 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10313
10314 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10316 customer_gen.set_country_pack(pack.clone());
10317 customer_gen.set_coa_framework(coa_framework);
10318 customer_gen.set_counter_offset(i * customers_per_company);
10319 customer_gen.set_template_provider(self.template_provider.clone());
10321 if self.config.customer_segmentation.enabled {
10323 let cs = &self.config.customer_segmentation;
10324 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10325 enabled: true,
10326 segment_distribution: datasynth_generators::SegmentDistribution {
10327 enterprise: cs.value_segments.enterprise.customer_share,
10328 mid_market: cs.value_segments.mid_market.customer_share,
10329 smb: cs.value_segments.smb.customer_share,
10330 consumer: cs.value_segments.consumer.customer_share,
10331 },
10332 referral_config: datasynth_generators::ReferralConfig {
10333 enabled: cs.networks.referrals.enabled,
10334 referral_rate: cs.networks.referrals.referral_rate,
10335 ..Default::default()
10336 },
10337 hierarchy_config: datasynth_generators::HierarchyConfig {
10338 enabled: cs.networks.corporate_hierarchies.enabled,
10339 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10340 ..Default::default()
10341 },
10342 ..Default::default()
10343 };
10344 customer_gen.set_segmentation_config(seg_cfg);
10345 }
10346 let customer_pool = customer_gen.generate_customer_pool(
10347 customers_per_company,
10348 &company.code,
10349 start_date,
10350 );
10351
10352 let mut material_gen = MaterialGenerator::new(company_seed + 200);
10354 material_gen.set_country_pack(pack.clone());
10355 material_gen.set_counter_offset(i * materials_per_company);
10356 material_gen.set_template_provider(self.template_provider.clone());
10358 let material_pool = material_gen.generate_material_pool(
10359 materials_per_company,
10360 &company.code,
10361 start_date,
10362 );
10363
10364 let mut asset_gen = AssetGenerator::new(company_seed + 300);
10366 asset_gen.set_template_provider(self.template_provider.clone());
10368 let asset_pool = asset_gen.generate_asset_pool(
10369 assets_per_company,
10370 &company.code,
10371 (start_date, end_date),
10372 );
10373
10374 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10376 employee_gen.set_country_pack(pack);
10377 employee_gen.set_template_provider(self.template_provider.clone());
10379 let employee_pool =
10380 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10381
10382 let employee_change_history =
10384 employee_gen.generate_all_change_history(&employee_pool, end_date);
10385
10386 let employee_ids: Vec<String> = employee_pool
10388 .employees
10389 .iter()
10390 .map(|e| e.employee_id.clone())
10391 .collect();
10392 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10393 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10394
10395 (
10396 vendor_pool.vendors,
10397 customer_pool.customers,
10398 material_pool.materials,
10399 asset_pool.assets,
10400 employee_pool.employees,
10401 employee_change_history,
10402 cost_centers,
10403 )
10404 })
10405 .collect();
10406
10407 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
10409 per_company_results
10410 {
10411 self.master_data.vendors.extend(vendors);
10412 self.master_data.customers.extend(customers);
10413 self.master_data.materials.extend(materials);
10414 self.master_data.assets.extend(assets);
10415 self.master_data.employees.extend(employees);
10416 self.master_data.cost_centers.extend(cost_centers);
10417 self.master_data
10418 .employee_change_history
10419 .extend(change_history);
10420 }
10421
10422 {
10426 use datasynth_core::models::IndustrySector;
10427 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10428 let industry = match self.config.global.industry {
10429 IndustrySector::Manufacturing => "manufacturing",
10430 IndustrySector::Retail => "retail",
10431 IndustrySector::FinancialServices => "financial_services",
10432 IndustrySector::Technology => "technology",
10433 IndustrySector::Healthcare => "healthcare",
10434 _ => "other",
10435 };
10436 for (i, company) in self.config.companies.iter().enumerate() {
10437 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10438 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10439 let profile = profile_gen.generate(&company.code, industry);
10440 self.master_data.organizational_profiles.push(profile);
10441 }
10442 }
10443
10444 if let Some(pb) = &pb {
10445 pb.inc(total);
10446 }
10447 if let Some(pb) = pb {
10448 pb.finish_with_message("Master data generation complete");
10449 }
10450
10451 Ok(())
10452 }
10453
10454 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10456 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10457 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10458
10459 let months = (self.config.global.period_months as usize).max(1);
10462 let p2p_count = self
10463 .phase_config
10464 .p2p_chains
10465 .min(self.master_data.vendors.len() * 2 * months);
10466 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10467
10468 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10470 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10471 p2p_gen.set_country_pack(self.primary_pack().clone());
10472 if let Some(ctx) = &self.temporal_context {
10476 p2p_gen.set_temporal_context(Arc::clone(ctx));
10477 }
10478
10479 for i in 0..p2p_count {
10480 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10481 let materials: Vec<&Material> = self
10482 .master_data
10483 .materials
10484 .iter()
10485 .skip(i % self.master_data.materials.len().max(1))
10486 .take(2.min(self.master_data.materials.len()))
10487 .collect();
10488
10489 if materials.is_empty() {
10490 continue;
10491 }
10492
10493 let company = &self.config.companies[i % self.config.companies.len()];
10494 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10495 let fiscal_period = po_date.month() as u8;
10496 let created_by = if self.master_data.employees.is_empty() {
10497 "SYSTEM"
10498 } else {
10499 self.master_data.employees[i % self.master_data.employees.len()]
10500 .user_id
10501 .as_str()
10502 };
10503
10504 let chain = p2p_gen.generate_chain(
10505 &company.code,
10506 vendor,
10507 &materials,
10508 po_date,
10509 start_date.year() as u16,
10510 fiscal_period,
10511 created_by,
10512 );
10513
10514 flows.purchase_orders.push(chain.purchase_order.clone());
10516 flows.goods_receipts.extend(chain.goods_receipts.clone());
10517 if let Some(vi) = &chain.vendor_invoice {
10518 flows.vendor_invoices.push(vi.clone());
10519 }
10520 if let Some(payment) = &chain.payment {
10521 flows.payments.push(payment.clone());
10522 }
10523 for remainder in &chain.remainder_payments {
10524 flows.payments.push(remainder.clone());
10525 }
10526 flows.p2p_chains.push(chain);
10527
10528 if let Some(pb) = &pb {
10529 pb.inc(1);
10530 }
10531 }
10532
10533 if let Some(pb) = pb {
10534 pb.finish_with_message("P2P document flows complete");
10535 }
10536
10537 let o2c_count = self
10540 .phase_config
10541 .o2c_chains
10542 .min(self.master_data.customers.len() * 2 * months);
10543 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10544
10545 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10547 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10548 o2c_gen.set_country_pack(self.primary_pack().clone());
10549 if let Some(ctx) = &self.temporal_context {
10551 o2c_gen.set_temporal_context(Arc::clone(ctx));
10552 }
10553
10554 for i in 0..o2c_count {
10555 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10556 let materials: Vec<&Material> = self
10557 .master_data
10558 .materials
10559 .iter()
10560 .skip(i % self.master_data.materials.len().max(1))
10561 .take(2.min(self.master_data.materials.len()))
10562 .collect();
10563
10564 if materials.is_empty() {
10565 continue;
10566 }
10567
10568 let company = &self.config.companies[i % self.config.companies.len()];
10569 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10570 let fiscal_period = so_date.month() as u8;
10571 let created_by = if self.master_data.employees.is_empty() {
10572 "SYSTEM"
10573 } else {
10574 self.master_data.employees[i % self.master_data.employees.len()]
10575 .user_id
10576 .as_str()
10577 };
10578
10579 let chain = o2c_gen.generate_chain(
10580 &company.code,
10581 customer,
10582 &materials,
10583 so_date,
10584 start_date.year() as u16,
10585 fiscal_period,
10586 created_by,
10587 );
10588
10589 flows.sales_orders.push(chain.sales_order.clone());
10591 flows.deliveries.extend(chain.deliveries.clone());
10592 if let Some(ci) = &chain.customer_invoice {
10593 flows.customer_invoices.push(ci.clone());
10594 }
10595 if let Some(receipt) = &chain.customer_receipt {
10596 flows.payments.push(receipt.clone());
10597 }
10598 for receipt in &chain.remainder_receipts {
10600 flows.payments.push(receipt.clone());
10601 }
10602 flows.o2c_chains.push(chain);
10603
10604 if let Some(pb) = &pb {
10605 pb.inc(1);
10606 }
10607 }
10608
10609 if let Some(pb) = pb {
10610 pb.finish_with_message("O2C document flows complete");
10611 }
10612
10613 {
10617 let mut refs = Vec::new();
10618 for doc in &flows.purchase_orders {
10619 refs.extend(doc.header.document_references.iter().cloned());
10620 }
10621 for doc in &flows.goods_receipts {
10622 refs.extend(doc.header.document_references.iter().cloned());
10623 }
10624 for doc in &flows.vendor_invoices {
10625 refs.extend(doc.header.document_references.iter().cloned());
10626 }
10627 for doc in &flows.sales_orders {
10628 refs.extend(doc.header.document_references.iter().cloned());
10629 }
10630 for doc in &flows.deliveries {
10631 refs.extend(doc.header.document_references.iter().cloned());
10632 }
10633 for doc in &flows.customer_invoices {
10634 refs.extend(doc.header.document_references.iter().cloned());
10635 }
10636 for doc in &flows.payments {
10637 refs.extend(doc.header.document_references.iter().cloned());
10638 }
10639 debug!(
10640 "Collected {} document cross-references from document headers",
10641 refs.len()
10642 );
10643 flows.document_references = refs;
10644 }
10645
10646 Ok(())
10647 }
10648
10649 fn generate_journal_entries(
10651 &mut self,
10652 coa: &Arc<ChartOfAccounts>,
10653 ) -> SynthResult<Vec<JournalEntry>> {
10654 use datasynth_core::traits::ParallelGenerator;
10655
10656 let total = self.calculate_total_transactions();
10657 let pb = self.create_progress_bar(total, "Generating Journal Entries");
10658
10659 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10660 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10661 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10662
10663 let company_codes: Vec<String> = self
10664 .config
10665 .companies
10666 .iter()
10667 .map(|c| c.code.clone())
10668 .collect();
10669
10670 let mut generator = JournalEntryGenerator::new_with_params(
10671 self.config.transactions.clone(),
10672 Arc::clone(coa),
10673 company_codes,
10674 start_date,
10675 end_date,
10676 self.seed,
10677 );
10678 let bp = &self.config.business_processes;
10681 generator.set_business_process_weights(
10682 bp.o2c_weight,
10683 bp.p2p_weight,
10684 bp.r2r_weight,
10685 bp.h2r_weight,
10686 bp.a2r_weight,
10687 );
10688 generator
10693 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10694 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10695 let generator = generator;
10696
10697 let je_pack = self.primary_pack();
10701
10702 let mut generator = generator
10703 .with_master_data(
10704 &self.master_data.vendors,
10705 &self.master_data.customers,
10706 &self.master_data.materials,
10707 )
10708 .with_country_pack_names(je_pack)
10709 .with_country_pack_temporal(
10710 self.config.temporal_patterns.clone(),
10711 self.seed + 200,
10712 je_pack,
10713 )
10714 .with_persona_errors(true)
10715 .with_fraud_config(self.config.fraud.clone());
10716
10717 let temporal_enabled = self.config.temporal.enabled;
10722 let regimes_enabled = self.config.distributions.regime_changes.enabled;
10723 if temporal_enabled || regimes_enabled {
10724 let mut drift_config = if temporal_enabled {
10725 self.config.temporal.to_core_config()
10726 } else {
10727 datasynth_core::distributions::DriftConfig::default()
10730 };
10731 if regimes_enabled {
10732 self.config
10733 .distributions
10734 .regime_changes
10735 .apply_to(&mut drift_config, start_date);
10736 }
10737 generator = generator.with_drift_config(drift_config, self.seed + 100);
10738 }
10739
10740 self.check_memory_limit()?;
10742
10743 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
10745
10746 let entries = if total >= 10_000 && num_threads > 1 {
10750 let sub_generators = generator.split(num_threads);
10753 let entries_per_thread = total as usize / num_threads;
10754 let remainder = total as usize % num_threads;
10755
10756 let batches: Vec<Vec<JournalEntry>> = sub_generators
10757 .into_par_iter()
10758 .enumerate()
10759 .map(|(i, mut gen)| {
10760 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
10761 gen.generate_batch(count)
10762 })
10763 .collect();
10764
10765 let entries = JournalEntryGenerator::merge_results(batches);
10767
10768 if let Some(pb) = &pb {
10769 pb.inc(total);
10770 }
10771 entries
10772 } else {
10773 let mut entries = Vec::with_capacity(total as usize);
10775 for _ in 0..total {
10776 let entry = generator.generate();
10777 entries.push(entry);
10778 if let Some(pb) = &pb {
10779 pb.inc(1);
10780 }
10781 }
10782 entries
10783 };
10784
10785 if let Some(pb) = pb {
10786 pb.finish_with_message("Journal entries complete");
10787 }
10788
10789 Ok(entries)
10790 }
10791
10792 fn generate_jes_from_document_flows(
10797 &mut self,
10798 flows: &DocumentFlowSnapshot,
10799 ) -> SynthResult<Vec<JournalEntry>> {
10800 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
10801 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
10802
10803 let je_config = match self.resolve_coa_framework() {
10804 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
10805 CoAFramework::GermanSkr04 => {
10806 let fa = datasynth_core::FrameworkAccounts::german_gaap();
10807 DocumentFlowJeConfig::from(&fa)
10808 }
10809 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
10810 };
10811
10812 let populate_fec = je_config.populate_fec_fields;
10813 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
10814
10815 if populate_fec {
10819 let mut aux_lookup = std::collections::HashMap::new();
10820 for vendor in &self.master_data.vendors {
10821 if let Some(ref aux) = vendor.auxiliary_gl_account {
10822 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
10823 }
10824 }
10825 for customer in &self.master_data.customers {
10826 if let Some(ref aux) = customer.auxiliary_gl_account {
10827 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
10828 }
10829 }
10830 if !aux_lookup.is_empty() {
10831 generator.set_auxiliary_account_lookup(aux_lookup);
10832 }
10833 }
10834
10835 let mut entries = Vec::new();
10836
10837 for chain in &flows.p2p_chains {
10839 let chain_entries = generator.generate_from_p2p_chain(chain);
10840 entries.extend(chain_entries);
10841 if let Some(pb) = &pb {
10842 pb.inc(1);
10843 }
10844 }
10845
10846 for chain in &flows.o2c_chains {
10848 let chain_entries = generator.generate_from_o2c_chain(chain);
10849 entries.extend(chain_entries);
10850 if let Some(pb) = &pb {
10851 pb.inc(1);
10852 }
10853 }
10854
10855 if let Some(pb) = pb {
10856 pb.finish_with_message(format!(
10857 "Generated {} JEs from document flows",
10858 entries.len()
10859 ));
10860 }
10861
10862 Ok(entries)
10863 }
10864
10865 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10871 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10872
10873 let mut jes = Vec::with_capacity(payroll_runs.len());
10874
10875 for run in payroll_runs {
10876 let mut je = JournalEntry::new_simple(
10877 format!("JE-PAYROLL-{}", run.payroll_id),
10878 run.company_code.clone(),
10879 run.run_date,
10880 format!("Payroll {}", run.payroll_id),
10881 );
10882
10883 je.add_line(JournalEntryLine {
10885 line_number: 1,
10886 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10887 debit_amount: run.total_gross,
10888 reference: Some(run.payroll_id.clone()),
10889 text: Some(format!(
10890 "Payroll {} ({} employees)",
10891 run.payroll_id, run.employee_count
10892 )),
10893 ..Default::default()
10894 });
10895
10896 je.add_line(JournalEntryLine {
10898 line_number: 2,
10899 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10900 credit_amount: run.total_gross,
10901 reference: Some(run.payroll_id.clone()),
10902 ..Default::default()
10903 });
10904
10905 jes.push(je);
10906 }
10907
10908 jes
10909 }
10910
10911 fn link_document_flows_to_subledgers(
10916 &mut self,
10917 flows: &DocumentFlowSnapshot,
10918 ) -> SynthResult<SubledgerSnapshot> {
10919 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10920 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10921
10922 let vendor_names: std::collections::HashMap<String, String> = self
10924 .master_data
10925 .vendors
10926 .iter()
10927 .map(|v| (v.vendor_id.clone(), v.name.clone()))
10928 .collect();
10929 let customer_names: std::collections::HashMap<String, String> = self
10930 .master_data
10931 .customers
10932 .iter()
10933 .map(|c| (c.customer_id.clone(), c.name.clone()))
10934 .collect();
10935
10936 let mut linker = DocumentFlowLinker::new()
10937 .with_vendor_names(vendor_names)
10938 .with_customer_names(customer_names);
10939
10940 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10942 if let Some(pb) = &pb {
10943 pb.inc(flows.vendor_invoices.len() as u64);
10944 }
10945
10946 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10948 if let Some(pb) = &pb {
10949 pb.inc(flows.customer_invoices.len() as u64);
10950 }
10951
10952 if let Some(pb) = pb {
10953 pb.finish_with_message(format!(
10954 "Linked {} AP and {} AR invoices",
10955 ap_invoices.len(),
10956 ar_invoices.len()
10957 ));
10958 }
10959
10960 Ok(SubledgerSnapshot {
10961 ap_invoices,
10962 ar_invoices,
10963 fa_records: Vec::new(),
10964 inventory_positions: Vec::new(),
10965 inventory_movements: Vec::new(),
10966 ar_aging_reports: Vec::new(),
10968 ap_aging_reports: Vec::new(),
10969 depreciation_runs: Vec::new(),
10971 inventory_valuations: Vec::new(),
10972 dunning_runs: Vec::new(),
10974 dunning_letters: Vec::new(),
10975 })
10976 }
10977
10978 #[allow(clippy::too_many_arguments)]
10983 fn generate_ocpm_events(
10984 &mut self,
10985 flows: &DocumentFlowSnapshot,
10986 sourcing: &SourcingSnapshot,
10987 hr: &HrSnapshot,
10988 manufacturing: &ManufacturingSnapshot,
10989 banking: &BankingSnapshot,
10990 audit: &AuditSnapshot,
10991 financial_reporting: &FinancialReportingSnapshot,
10992 ) -> SynthResult<OcpmSnapshot> {
10993 let total_chains = flows.p2p_chains.len()
10994 + flows.o2c_chains.len()
10995 + sourcing.sourcing_projects.len()
10996 + hr.payroll_runs.len()
10997 + manufacturing.production_orders.len()
10998 + banking.customers.len()
10999 + audit.engagements.len()
11000 + financial_reporting.bank_reconciliations.len();
11001 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11002
11003 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11005 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11006
11007 let ocpm_config = OcpmGeneratorConfig {
11009 generate_p2p: true,
11010 generate_o2c: true,
11011 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11012 generate_h2r: !hr.payroll_runs.is_empty(),
11013 generate_mfg: !manufacturing.production_orders.is_empty(),
11014 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11015 generate_bank: !banking.customers.is_empty(),
11016 generate_audit: !audit.engagements.is_empty(),
11017 happy_path_rate: 0.75,
11018 exception_path_rate: 0.20,
11019 error_path_rate: 0.05,
11020 add_duration_variability: true,
11021 duration_std_dev_factor: 0.3,
11022 };
11023 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11024 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11025
11026 let available_users: Vec<String> = self
11028 .master_data
11029 .employees
11030 .iter()
11031 .take(20)
11032 .map(|e| e.user_id.clone())
11033 .collect();
11034
11035 let fallback_date =
11037 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11038 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11039 .unwrap_or(fallback_date);
11040 let base_midnight = base_date
11041 .and_hms_opt(0, 0, 0)
11042 .expect("midnight is always valid");
11043 let base_datetime =
11044 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11045
11046 let add_result = |event_log: &mut OcpmEventLog,
11048 result: datasynth_ocpm::CaseGenerationResult| {
11049 for event in result.events {
11050 event_log.add_event(event);
11051 }
11052 for object in result.objects {
11053 event_log.add_object(object);
11054 }
11055 for relationship in result.relationships {
11056 event_log.add_relationship(relationship);
11057 }
11058 for corr in result.correlation_events {
11059 event_log.add_correlation_event(corr);
11060 }
11061 event_log.add_case(result.case_trace);
11062 };
11063
11064 for chain in &flows.p2p_chains {
11066 let po = &chain.purchase_order;
11067 let documents = P2pDocuments::new(
11068 &po.header.document_id,
11069 &po.vendor_id,
11070 &po.header.company_code,
11071 po.total_net_amount,
11072 &po.header.currency,
11073 &ocpm_uuid_factory,
11074 )
11075 .with_goods_receipt(
11076 chain
11077 .goods_receipts
11078 .first()
11079 .map(|gr| gr.header.document_id.as_str())
11080 .unwrap_or(""),
11081 &ocpm_uuid_factory,
11082 )
11083 .with_invoice(
11084 chain
11085 .vendor_invoice
11086 .as_ref()
11087 .map(|vi| vi.header.document_id.as_str())
11088 .unwrap_or(""),
11089 &ocpm_uuid_factory,
11090 )
11091 .with_payment(
11092 chain
11093 .payment
11094 .as_ref()
11095 .map(|p| p.header.document_id.as_str())
11096 .unwrap_or(""),
11097 &ocpm_uuid_factory,
11098 );
11099
11100 let start_time =
11101 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11102 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11103 add_result(&mut event_log, result);
11104
11105 if let Some(pb) = &pb {
11106 pb.inc(1);
11107 }
11108 }
11109
11110 for chain in &flows.o2c_chains {
11112 let so = &chain.sales_order;
11113 let documents = O2cDocuments::new(
11114 &so.header.document_id,
11115 &so.customer_id,
11116 &so.header.company_code,
11117 so.total_net_amount,
11118 &so.header.currency,
11119 &ocpm_uuid_factory,
11120 )
11121 .with_delivery(
11122 chain
11123 .deliveries
11124 .first()
11125 .map(|d| d.header.document_id.as_str())
11126 .unwrap_or(""),
11127 &ocpm_uuid_factory,
11128 )
11129 .with_invoice(
11130 chain
11131 .customer_invoice
11132 .as_ref()
11133 .map(|ci| ci.header.document_id.as_str())
11134 .unwrap_or(""),
11135 &ocpm_uuid_factory,
11136 )
11137 .with_receipt(
11138 chain
11139 .customer_receipt
11140 .as_ref()
11141 .map(|r| r.header.document_id.as_str())
11142 .unwrap_or(""),
11143 &ocpm_uuid_factory,
11144 );
11145
11146 let start_time =
11147 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11148 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11149 add_result(&mut event_log, result);
11150
11151 if let Some(pb) = &pb {
11152 pb.inc(1);
11153 }
11154 }
11155
11156 for project in &sourcing.sourcing_projects {
11158 let vendor_id = sourcing
11160 .contracts
11161 .iter()
11162 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11163 .map(|c| c.vendor_id.clone())
11164 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11165 .or_else(|| {
11166 self.master_data
11167 .vendors
11168 .first()
11169 .map(|v| v.vendor_id.clone())
11170 })
11171 .unwrap_or_else(|| "V000".to_string());
11172 let mut docs = S2cDocuments::new(
11173 &project.project_id,
11174 &vendor_id,
11175 &project.company_code,
11176 project.estimated_annual_spend,
11177 &ocpm_uuid_factory,
11178 );
11179 if let Some(rfx) = sourcing
11181 .rfx_events
11182 .iter()
11183 .find(|r| r.sourcing_project_id == project.project_id)
11184 {
11185 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11186 if let Some(bid) = sourcing.bids.iter().find(|b| {
11188 b.rfx_id == rfx.rfx_id
11189 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11190 }) {
11191 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11192 }
11193 }
11194 if let Some(contract) = sourcing
11196 .contracts
11197 .iter()
11198 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11199 {
11200 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11201 }
11202 let start_time = base_datetime - chrono::Duration::days(90);
11203 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11204 add_result(&mut event_log, result);
11205
11206 if let Some(pb) = &pb {
11207 pb.inc(1);
11208 }
11209 }
11210
11211 for run in &hr.payroll_runs {
11213 let employee_id = hr
11215 .payroll_line_items
11216 .iter()
11217 .find(|li| li.payroll_id == run.payroll_id)
11218 .map(|li| li.employee_id.as_str())
11219 .unwrap_or("EMP000");
11220 let docs = H2rDocuments::new(
11221 &run.payroll_id,
11222 employee_id,
11223 &run.company_code,
11224 run.total_gross,
11225 &ocpm_uuid_factory,
11226 )
11227 .with_time_entries(
11228 hr.time_entries
11229 .iter()
11230 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11231 .take(5)
11232 .map(|t| t.entry_id.as_str())
11233 .collect(),
11234 );
11235 let start_time = base_datetime - chrono::Duration::days(30);
11236 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11237 add_result(&mut event_log, result);
11238
11239 if let Some(pb) = &pb {
11240 pb.inc(1);
11241 }
11242 }
11243
11244 for order in &manufacturing.production_orders {
11246 let mut docs = MfgDocuments::new(
11247 &order.order_id,
11248 &order.material_id,
11249 &order.company_code,
11250 order.planned_quantity,
11251 &ocpm_uuid_factory,
11252 )
11253 .with_operations(
11254 order
11255 .operations
11256 .iter()
11257 .map(|o| format!("OP-{:04}", o.operation_number))
11258 .collect::<Vec<_>>()
11259 .iter()
11260 .map(std::string::String::as_str)
11261 .collect(),
11262 );
11263 if let Some(insp) = manufacturing
11265 .quality_inspections
11266 .iter()
11267 .find(|i| i.reference_id == order.order_id)
11268 {
11269 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11270 }
11271 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11273 cc.items
11274 .iter()
11275 .any(|item| item.material_id == order.material_id)
11276 }) {
11277 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11278 }
11279 let start_time = base_datetime - chrono::Duration::days(60);
11280 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11281 add_result(&mut event_log, result);
11282
11283 if let Some(pb) = &pb {
11284 pb.inc(1);
11285 }
11286 }
11287
11288 for customer in &banking.customers {
11290 let customer_id_str = customer.customer_id.to_string();
11291 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11292 if let Some(account) = banking
11294 .accounts
11295 .iter()
11296 .find(|a| a.primary_owner_id == customer.customer_id)
11297 {
11298 let account_id_str = account.account_id.to_string();
11299 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11300 let txn_strs: Vec<String> = banking
11302 .transactions
11303 .iter()
11304 .filter(|t| t.account_id == account.account_id)
11305 .take(10)
11306 .map(|t| t.transaction_id.to_string())
11307 .collect();
11308 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11309 let txn_amounts: Vec<rust_decimal::Decimal> = banking
11310 .transactions
11311 .iter()
11312 .filter(|t| t.account_id == account.account_id)
11313 .take(10)
11314 .map(|t| t.amount)
11315 .collect();
11316 if !txn_ids.is_empty() {
11317 docs = docs.with_transactions(txn_ids, txn_amounts);
11318 }
11319 }
11320 let start_time = base_datetime - chrono::Duration::days(180);
11321 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11322 add_result(&mut event_log, result);
11323
11324 if let Some(pb) = &pb {
11325 pb.inc(1);
11326 }
11327 }
11328
11329 for engagement in &audit.engagements {
11331 let engagement_id_str = engagement.engagement_id.to_string();
11332 let docs = AuditDocuments::new(
11333 &engagement_id_str,
11334 &engagement.client_entity_id,
11335 &ocpm_uuid_factory,
11336 )
11337 .with_workpapers(
11338 audit
11339 .workpapers
11340 .iter()
11341 .filter(|w| w.engagement_id == engagement.engagement_id)
11342 .take(10)
11343 .map(|w| w.workpaper_id.to_string())
11344 .collect::<Vec<_>>()
11345 .iter()
11346 .map(std::string::String::as_str)
11347 .collect(),
11348 )
11349 .with_evidence(
11350 audit
11351 .evidence
11352 .iter()
11353 .filter(|e| e.engagement_id == engagement.engagement_id)
11354 .take(10)
11355 .map(|e| e.evidence_id.to_string())
11356 .collect::<Vec<_>>()
11357 .iter()
11358 .map(std::string::String::as_str)
11359 .collect(),
11360 )
11361 .with_risks(
11362 audit
11363 .risk_assessments
11364 .iter()
11365 .filter(|r| r.engagement_id == engagement.engagement_id)
11366 .take(5)
11367 .map(|r| r.risk_id.to_string())
11368 .collect::<Vec<_>>()
11369 .iter()
11370 .map(std::string::String::as_str)
11371 .collect(),
11372 )
11373 .with_findings(
11374 audit
11375 .findings
11376 .iter()
11377 .filter(|f| f.engagement_id == engagement.engagement_id)
11378 .take(5)
11379 .map(|f| f.finding_id.to_string())
11380 .collect::<Vec<_>>()
11381 .iter()
11382 .map(std::string::String::as_str)
11383 .collect(),
11384 )
11385 .with_judgments(
11386 audit
11387 .judgments
11388 .iter()
11389 .filter(|j| j.engagement_id == engagement.engagement_id)
11390 .take(5)
11391 .map(|j| j.judgment_id.to_string())
11392 .collect::<Vec<_>>()
11393 .iter()
11394 .map(std::string::String::as_str)
11395 .collect(),
11396 );
11397 let start_time = base_datetime - chrono::Duration::days(120);
11398 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11399 add_result(&mut event_log, result);
11400
11401 if let Some(pb) = &pb {
11402 pb.inc(1);
11403 }
11404 }
11405
11406 for recon in &financial_reporting.bank_reconciliations {
11408 let docs = BankReconDocuments::new(
11409 &recon.reconciliation_id,
11410 &recon.bank_account_id,
11411 &recon.company_code,
11412 recon.bank_ending_balance,
11413 &ocpm_uuid_factory,
11414 )
11415 .with_statement_lines(
11416 recon
11417 .statement_lines
11418 .iter()
11419 .take(20)
11420 .map(|l| l.line_id.as_str())
11421 .collect(),
11422 )
11423 .with_reconciling_items(
11424 recon
11425 .reconciling_items
11426 .iter()
11427 .take(10)
11428 .map(|i| i.item_id.as_str())
11429 .collect(),
11430 );
11431 let start_time = base_datetime - chrono::Duration::days(30);
11432 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11433 add_result(&mut event_log, result);
11434
11435 if let Some(pb) = &pb {
11436 pb.inc(1);
11437 }
11438 }
11439
11440 event_log.compute_variants();
11442
11443 let summary = event_log.summary();
11444
11445 if let Some(pb) = pb {
11446 pb.finish_with_message(format!(
11447 "Generated {} OCPM events, {} objects",
11448 summary.event_count, summary.object_count
11449 ));
11450 }
11451
11452 Ok(OcpmSnapshot {
11453 event_count: summary.event_count,
11454 object_count: summary.object_count,
11455 case_count: summary.case_count,
11456 event_log: Some(event_log),
11457 })
11458 }
11459
11460 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11462 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11463
11464 let total_rate = if self.config.anomaly_injection.enabled {
11467 self.config.anomaly_injection.rates.total_rate
11468 } else if self.config.fraud.enabled {
11469 self.config.fraud.fraud_rate
11470 } else {
11471 0.02
11472 };
11473
11474 let fraud_rate = if self.config.anomaly_injection.enabled {
11475 self.config.anomaly_injection.rates.fraud_rate
11476 } else {
11477 AnomalyRateConfig::default().fraud_rate
11478 };
11479
11480 let error_rate = if self.config.anomaly_injection.enabled {
11481 self.config.anomaly_injection.rates.error_rate
11482 } else {
11483 AnomalyRateConfig::default().error_rate
11484 };
11485
11486 let process_issue_rate = if self.config.anomaly_injection.enabled {
11487 self.config.anomaly_injection.rates.process_rate
11488 } else {
11489 AnomalyRateConfig::default().process_issue_rate
11490 };
11491
11492 let anomaly_config = AnomalyInjectorConfig {
11493 rates: AnomalyRateConfig {
11494 total_rate,
11495 fraud_rate,
11496 error_rate,
11497 process_issue_rate,
11498 ..Default::default()
11499 },
11500 seed: self.seed + 5000,
11501 ..Default::default()
11502 };
11503
11504 let mut injector = AnomalyInjector::new(anomaly_config);
11505 let result = injector.process_entries(entries);
11506
11507 if let Some(pb) = &pb {
11508 pb.inc(entries.len() as u64);
11509 pb.finish_with_message("Anomaly injection complete");
11510 }
11511
11512 let mut by_type = HashMap::new();
11513 for label in &result.labels {
11514 *by_type
11515 .entry(format!("{:?}", label.anomaly_type))
11516 .or_insert(0) += 1;
11517 }
11518
11519 Ok(AnomalyLabels {
11520 labels: result.labels,
11521 summary: Some(result.summary),
11522 by_type,
11523 })
11524 }
11525
11526 fn validate_journal_entries(
11535 &mut self,
11536 entries: &[JournalEntry],
11537 ) -> SynthResult<BalanceValidationResult> {
11538 let clean_entries: Vec<&JournalEntry> = entries
11540 .iter()
11541 .filter(|e| {
11542 e.header
11543 .header_text
11544 .as_ref()
11545 .map(|t| !t.contains("[HUMAN_ERROR:"))
11546 .unwrap_or(true)
11547 })
11548 .collect();
11549
11550 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11551
11552 let config = BalanceTrackerConfig {
11554 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
11558 };
11559 let validation_currency = self
11560 .config
11561 .companies
11562 .first()
11563 .map(|c| c.currency.clone())
11564 .unwrap_or_else(|| "USD".to_string());
11565
11566 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11567
11568 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11570 let errors = tracker.apply_entries(&clean_refs);
11571
11572 if let Some(pb) = &pb {
11573 pb.inc(entries.len() as u64);
11574 }
11575
11576 let has_unbalanced = tracker
11579 .get_validation_errors()
11580 .iter()
11581 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11582
11583 let mut all_errors = errors;
11586 all_errors.extend(tracker.get_validation_errors().iter().cloned());
11587 let company_codes: Vec<String> = self
11588 .config
11589 .companies
11590 .iter()
11591 .map(|c| c.code.clone())
11592 .collect();
11593
11594 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11595 .map(|d| d + chrono::Months::new(self.config.global.period_months))
11596 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11597
11598 for company_code in &company_codes {
11599 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11600 all_errors.push(e);
11601 }
11602 }
11603
11604 let stats = tracker.get_statistics();
11606
11607 let is_balanced = all_errors.is_empty();
11609
11610 if let Some(pb) = pb {
11611 let msg = if is_balanced {
11612 "Balance validation passed"
11613 } else {
11614 "Balance validation completed with errors"
11615 };
11616 pb.finish_with_message(msg);
11617 }
11618
11619 Ok(BalanceValidationResult {
11620 validated: true,
11621 is_balanced,
11622 entries_processed: stats.entries_processed,
11623 total_debits: stats.total_debits,
11624 total_credits: stats.total_credits,
11625 accounts_tracked: stats.accounts_tracked,
11626 companies_tracked: stats.companies_tracked,
11627 validation_errors: all_errors,
11628 has_unbalanced_entries: has_unbalanced,
11629 })
11630 }
11631
11632 fn inject_data_quality(
11637 &mut self,
11638 entries: &mut [JournalEntry],
11639 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11640 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11641
11642 let config = if self.config.data_quality.enabled {
11645 let dq = &self.config.data_quality;
11646 DataQualityConfig {
11647 enable_missing_values: dq.missing_values.enabled,
11648 missing_values: datasynth_generators::MissingValueConfig {
11649 global_rate: dq.effective_missing_rate(),
11650 ..Default::default()
11651 },
11652 enable_format_variations: dq.format_variations.enabled,
11653 format_variations: datasynth_generators::FormatVariationConfig {
11654 date_variation_rate: dq.format_variations.dates.rate,
11655 amount_variation_rate: dq.format_variations.amounts.rate,
11656 identifier_variation_rate: dq.format_variations.identifiers.rate,
11657 ..Default::default()
11658 },
11659 enable_duplicates: dq.duplicates.enabled,
11660 duplicates: datasynth_generators::DuplicateConfig {
11661 duplicate_rate: dq.effective_duplicate_rate(),
11662 ..Default::default()
11663 },
11664 enable_typos: dq.typos.enabled,
11665 typos: datasynth_generators::TypoConfig {
11666 char_error_rate: dq.effective_typo_rate(),
11667 ..Default::default()
11668 },
11669 enable_encoding_issues: dq.encoding_issues.enabled,
11670 encoding_issue_rate: dq.encoding_issues.rate,
11671 seed: self.seed.wrapping_add(77), track_statistics: true,
11673 }
11674 } else {
11675 DataQualityConfig::minimal()
11676 };
11677 let mut injector = DataQualityInjector::new(config);
11678
11679 injector.set_country_pack(self.primary_pack().clone());
11681
11682 let context = HashMap::new();
11684
11685 for entry in entries.iter_mut() {
11686 if let Some(text) = &entry.header.header_text {
11688 let processed = injector.process_text_field(
11689 "header_text",
11690 text,
11691 &entry.header.document_id.to_string(),
11692 &context,
11693 );
11694 match processed {
11695 Some(new_text) if new_text != *text => {
11696 entry.header.header_text = Some(new_text);
11697 }
11698 None => {
11699 entry.header.header_text = None; }
11701 _ => {}
11702 }
11703 }
11704
11705 if let Some(ref_text) = &entry.header.reference {
11707 let processed = injector.process_text_field(
11708 "reference",
11709 ref_text,
11710 &entry.header.document_id.to_string(),
11711 &context,
11712 );
11713 match processed {
11714 Some(new_text) if new_text != *ref_text => {
11715 entry.header.reference = Some(new_text);
11716 }
11717 None => {
11718 entry.header.reference = None;
11719 }
11720 _ => {}
11721 }
11722 }
11723
11724 let user_persona = entry.header.user_persona.clone();
11726 if let Some(processed) = injector.process_text_field(
11727 "user_persona",
11728 &user_persona,
11729 &entry.header.document_id.to_string(),
11730 &context,
11731 ) {
11732 if processed != user_persona {
11733 entry.header.user_persona = processed;
11734 }
11735 }
11736
11737 for line in &mut entry.lines {
11739 if let Some(ref text) = line.line_text {
11741 let processed = injector.process_text_field(
11742 "line_text",
11743 text,
11744 &entry.header.document_id.to_string(),
11745 &context,
11746 );
11747 match processed {
11748 Some(new_text) if new_text != *text => {
11749 line.line_text = Some(new_text);
11750 }
11751 None => {
11752 line.line_text = None;
11753 }
11754 _ => {}
11755 }
11756 }
11757
11758 if let Some(cc) = &line.cost_center {
11760 let processed = injector.process_text_field(
11761 "cost_center",
11762 cc,
11763 &entry.header.document_id.to_string(),
11764 &context,
11765 );
11766 match processed {
11767 Some(new_cc) if new_cc != *cc => {
11768 line.cost_center = Some(new_cc);
11769 }
11770 None => {
11771 line.cost_center = None;
11772 }
11773 _ => {}
11774 }
11775 }
11776 }
11777
11778 if let Some(pb) = &pb {
11779 pb.inc(1);
11780 }
11781 }
11782
11783 if let Some(pb) = pb {
11784 pb.finish_with_message("Data quality injection complete");
11785 }
11786
11787 let quality_issues = injector.issues().to_vec();
11788 Ok((injector.stats().clone(), quality_issues))
11789 }
11790
11791 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
11802 let use_fsm = self
11804 .config
11805 .audit
11806 .fsm
11807 .as_ref()
11808 .map(|f| f.enabled)
11809 .unwrap_or(false);
11810
11811 if use_fsm {
11812 return self.generate_audit_data_with_fsm(entries);
11813 }
11814
11815 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11817 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11818 let fiscal_year = start_date.year() as u16;
11819 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11820
11821 let total_revenue: rust_decimal::Decimal = entries
11823 .iter()
11824 .flat_map(|e| e.lines.iter())
11825 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11826 .map(|l| l.credit_amount)
11827 .sum();
11828
11829 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
11831
11832 let mut snapshot = AuditSnapshot::default();
11833
11834 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
11836 engagement_gen.set_team_config(&self.config.audit.team);
11839
11840 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
11841 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
11845 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
11846 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
11847 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
11848 finding_gen.set_template_provider(self.template_provider.clone());
11850 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
11851 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
11852 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
11853 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
11854 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
11855 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
11856 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
11857
11858 let accounts: Vec<String> = self
11860 .coa
11861 .as_ref()
11862 .map(|coa| {
11863 coa.get_postable_accounts()
11864 .iter()
11865 .map(|acc| acc.account_code().to_string())
11866 .collect()
11867 })
11868 .unwrap_or_default();
11869
11870 for (i, company) in self.config.companies.iter().enumerate() {
11872 let company_revenue = total_revenue
11874 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
11875
11876 let engagements_for_company =
11878 self.phase_config.audit_engagements / self.config.companies.len().max(1);
11879 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11880 1
11881 } else {
11882 0
11883 };
11884
11885 for _eng_idx in 0..(engagements_for_company + extra) {
11886 let eng_type =
11891 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
11892
11893 let mut engagement = engagement_gen.generate_engagement(
11895 &company.code,
11896 &company.name,
11897 fiscal_year,
11898 period_end,
11899 company_revenue,
11900 Some(eng_type),
11901 );
11902
11903 if !self.master_data.employees.is_empty() {
11905 let emp_count = self.master_data.employees.len();
11906 let base = (i * 10 + _eng_idx) % emp_count;
11908 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11909 .employee_id
11910 .clone();
11911 engagement.engagement_manager_id = self.master_data.employees
11912 [(base + 1) % emp_count]
11913 .employee_id
11914 .clone();
11915 let real_team: Vec<String> = engagement
11916 .team_member_ids
11917 .iter()
11918 .enumerate()
11919 .map(|(j, _)| {
11920 self.master_data.employees[(base + 2 + j) % emp_count]
11921 .employee_id
11922 .clone()
11923 })
11924 .collect();
11925 engagement.team_member_ids = real_team;
11926 }
11927
11928 if let Some(pb) = &pb {
11929 pb.inc(1);
11930 }
11931
11932 let team_members: Vec<String> = engagement.team_member_ids.clone();
11934
11935 let workpapers = if self.config.audit.generate_workpapers {
11941 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
11942 } else {
11943 Vec::new()
11944 };
11945
11946 for wp in &workpapers {
11947 if let Some(pb) = &pb {
11948 pb.inc(1);
11949 }
11950
11951 let evidence = evidence_gen.generate_evidence_for_workpaper(
11953 wp,
11954 &team_members,
11955 wp.preparer_date,
11956 );
11957
11958 for _ in &evidence {
11959 if let Some(pb) = &pb {
11960 pb.inc(1);
11961 }
11962 }
11963
11964 snapshot.evidence.extend(evidence);
11965 }
11966
11967 let risks =
11969 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11970
11971 for _ in &risks {
11972 if let Some(pb) = &pb {
11973 pb.inc(1);
11974 }
11975 }
11976 snapshot.risk_assessments.extend(risks);
11977
11978 let findings = finding_gen.generate_findings_for_engagement(
11980 &engagement,
11981 &workpapers,
11982 &team_members,
11983 );
11984
11985 for _ in &findings {
11986 if let Some(pb) = &pb {
11987 pb.inc(1);
11988 }
11989 }
11990 snapshot.findings.extend(findings);
11991
11992 let judgments =
11994 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11995
11996 for _ in &judgments {
11997 if let Some(pb) = &pb {
11998 pb.inc(1);
11999 }
12000 }
12001 snapshot.judgments.extend(judgments);
12002
12003 let (confs, resps) =
12005 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12006 snapshot.confirmations.extend(confs);
12007 snapshot.confirmation_responses.extend(resps);
12008
12009 let team_pairs: Vec<(String, String)> = team_members
12011 .iter()
12012 .map(|id| {
12013 let name = self
12014 .master_data
12015 .employees
12016 .iter()
12017 .find(|e| e.employee_id == *id)
12018 .map(|e| e.display_name.clone())
12019 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12020 (id.clone(), name)
12021 })
12022 .collect();
12023 for wp in &workpapers {
12024 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12025 snapshot.procedure_steps.extend(steps);
12026 }
12027
12028 for wp in &workpapers {
12030 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12031 snapshot.samples.push(sample);
12032 }
12033 }
12034
12035 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12037 snapshot.analytical_results.extend(analytical);
12038
12039 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12041 snapshot.ia_functions.push(ia_func);
12042 snapshot.ia_reports.extend(ia_reports);
12043
12044 let vendor_names: Vec<String> = self
12046 .master_data
12047 .vendors
12048 .iter()
12049 .map(|v| v.name.clone())
12050 .collect();
12051 let customer_names: Vec<String> = self
12052 .master_data
12053 .customers
12054 .iter()
12055 .map(|c| c.name.clone())
12056 .collect();
12057 let (parties, rp_txns) =
12058 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12059 snapshot.related_parties.extend(parties);
12060 snapshot.related_party_transactions.extend(rp_txns);
12061
12062 snapshot.workpapers.extend(workpapers);
12064
12065 {
12067 let scope_id = format!(
12068 "SCOPE-{}-{}",
12069 engagement.engagement_id.simple(),
12070 &engagement.client_entity_id
12071 );
12072 let scope = datasynth_core::models::audit::AuditScope::new(
12073 scope_id.clone(),
12074 engagement.engagement_id.to_string(),
12075 engagement.client_entity_id.clone(),
12076 engagement.materiality,
12077 );
12078 let mut eng = engagement;
12080 eng.scope_id = Some(scope_id);
12081 snapshot.audit_scopes.push(scope);
12082 snapshot.engagements.push(eng);
12083 }
12084 }
12085 }
12086
12087 if self.config.companies.len() > 1 {
12091 let group_materiality = snapshot
12094 .engagements
12095 .first()
12096 .map(|e| e.materiality)
12097 .unwrap_or_else(|| {
12098 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12099 total_revenue * pct
12100 });
12101
12102 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12103 let group_engagement_id = snapshot
12104 .engagements
12105 .first()
12106 .map(|e| e.engagement_id.to_string())
12107 .unwrap_or_else(|| "GROUP-ENG".to_string());
12108
12109 let component_snapshot = component_gen.generate(
12110 &self.config.companies,
12111 group_materiality,
12112 &group_engagement_id,
12113 period_end,
12114 );
12115
12116 snapshot.component_auditors = component_snapshot.component_auditors;
12117 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12118 snapshot.component_instructions = component_snapshot.component_instructions;
12119 snapshot.component_reports = component_snapshot.component_reports;
12120
12121 info!(
12122 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12123 snapshot.component_auditors.len(),
12124 snapshot.component_instructions.len(),
12125 snapshot.component_reports.len(),
12126 );
12127 }
12128
12129 {
12133 let applicable_framework = self
12134 .config
12135 .accounting_standards
12136 .framework
12137 .as_ref()
12138 .map(|f| format!("{f:?}"))
12139 .unwrap_or_else(|| "IFRS".to_string());
12140
12141 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12142 let entity_count = self.config.companies.len();
12143
12144 for engagement in &snapshot.engagements {
12145 let company = self
12146 .config
12147 .companies
12148 .iter()
12149 .find(|c| c.code == engagement.client_entity_id);
12150 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12151 let letter_date = engagement.planning_start;
12152 let letter = letter_gen.generate(
12153 &engagement.engagement_id.to_string(),
12154 &engagement.client_name,
12155 entity_count,
12156 engagement.period_end_date,
12157 currency,
12158 &applicable_framework,
12159 letter_date,
12160 );
12161 snapshot.engagement_letters.push(letter);
12162 }
12163
12164 info!(
12165 "ISA 210 engagement letters: {} generated",
12166 snapshot.engagement_letters.len()
12167 );
12168 }
12169
12170 if self.phase_config.generate_legal_documents {
12174 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12175 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12176 for engagement in &snapshot.engagements {
12177 let employee_names: Vec<String> = self
12181 .master_data
12182 .employees
12183 .iter()
12184 .filter(|e| e.company_code == engagement.client_entity_id)
12185 .map(|e| e.display_name.clone())
12186 .collect();
12187 let names_to_use = if !employee_names.is_empty() {
12188 employee_names
12189 } else {
12190 self.master_data
12191 .employees
12192 .iter()
12193 .take(10)
12194 .map(|e| e.display_name.clone())
12195 .collect()
12196 };
12197 let docs = legal_gen.generate(
12198 &engagement.client_entity_id,
12199 engagement.fiscal_year as i32,
12200 &names_to_use,
12201 );
12202 snapshot.legal_documents.extend(docs);
12203 }
12204 info!(
12205 "v3.3.0 legal documents: {} emitted across {} engagements",
12206 snapshot.legal_documents.len(),
12207 snapshot.engagements.len()
12208 );
12209 }
12210
12211 if self.phase_config.generate_it_controls {
12221 use datasynth_generators::it_controls_generator::ItControlsGenerator;
12222 use std::collections::HashMap;
12223 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12224
12225 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12228 HashMap::new();
12229 for engagement in &snapshot.engagements {
12230 let entry = by_company
12231 .entry(engagement.client_entity_id.clone())
12232 .or_insert((engagement.planning_start, engagement.period_end_date));
12233 if engagement.planning_start < entry.0 {
12234 entry.0 = engagement.planning_start;
12235 }
12236 if engagement.period_end_date > entry.1 {
12237 entry.1 = engagement.period_end_date;
12238 }
12239 }
12240
12241 let systems: Vec<String> = vec![
12245 "SAP ECC",
12246 "SAP S/4 HANA",
12247 "Oracle EBS",
12248 "Workday",
12249 "NetSuite",
12250 "Active Directory",
12251 "SharePoint",
12252 "Salesforce",
12253 "ServiceNow",
12254 "Jira",
12255 "GitHub Enterprise",
12256 "AWS Console",
12257 "Okta",
12258 ]
12259 .into_iter()
12260 .map(String::from)
12261 .collect();
12262
12263 for (company_code, (start, end)) in by_company {
12264 let emps: Vec<(String, String)> = self
12265 .master_data
12266 .employees
12267 .iter()
12268 .filter(|e| e.company_code == company_code)
12269 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12270 .collect();
12271 if emps.is_empty() {
12272 continue;
12273 }
12274 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12277 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12278 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12279 snapshot.it_controls_access_logs.extend(access_logs);
12280 snapshot.it_controls_change_records.extend(change_records);
12281 }
12282
12283 info!(
12284 "v3.3.0 IT controls: {} access logs, {} change records",
12285 snapshot.it_controls_access_logs.len(),
12286 snapshot.it_controls_change_records.len()
12287 );
12288 }
12289
12290 {
12294 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12295 let entity_codes: Vec<String> = self
12296 .config
12297 .companies
12298 .iter()
12299 .map(|c| c.code.clone())
12300 .collect();
12301 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12302 info!(
12303 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12304 subsequent.len(),
12305 subsequent
12306 .iter()
12307 .filter(|e| matches!(
12308 e.classification,
12309 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12310 ))
12311 .count(),
12312 subsequent
12313 .iter()
12314 .filter(|e| matches!(
12315 e.classification,
12316 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12317 ))
12318 .count(),
12319 );
12320 snapshot.subsequent_events = subsequent;
12321 }
12322
12323 {
12327 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12328 let entity_codes: Vec<String> = self
12329 .config
12330 .companies
12331 .iter()
12332 .map(|c| c.code.clone())
12333 .collect();
12334 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12335 info!(
12336 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12337 soc_snapshot.service_organizations.len(),
12338 soc_snapshot.soc_reports.len(),
12339 soc_snapshot.user_entity_controls.len(),
12340 );
12341 snapshot.service_organizations = soc_snapshot.service_organizations;
12342 snapshot.soc_reports = soc_snapshot.soc_reports;
12343 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12344 }
12345
12346 {
12350 use datasynth_generators::audit::going_concern_generator::{
12351 GoingConcernGenerator, GoingConcernInput,
12352 };
12353 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12354 let entity_codes: Vec<String> = self
12355 .config
12356 .companies
12357 .iter()
12358 .map(|c| c.code.clone())
12359 .collect();
12360 let assessment_date = period_end + chrono::Duration::days(75);
12362 let period_label = format!("FY{}", period_end.year());
12363
12364 let gc_inputs: Vec<GoingConcernInput> = self
12375 .config
12376 .companies
12377 .iter()
12378 .map(|company| {
12379 let code = &company.code;
12380 let mut revenue = rust_decimal::Decimal::ZERO;
12381 let mut expenses = rust_decimal::Decimal::ZERO;
12382 let mut current_assets = rust_decimal::Decimal::ZERO;
12383 let mut current_liabs = rust_decimal::Decimal::ZERO;
12384 let mut total_debt = rust_decimal::Decimal::ZERO;
12385
12386 for je in entries.iter().filter(|je| &je.header.company_code == code) {
12387 for line in &je.lines {
12388 let acct = line.gl_account.as_str();
12389 let net = line.debit_amount - line.credit_amount;
12390 if acct.starts_with('4') {
12391 revenue -= net;
12393 } else if acct.starts_with('6') {
12394 expenses += net;
12396 }
12397 if acct.starts_with('1') {
12399 if let Ok(n) = acct.parse::<u32>() {
12401 if (1000..=1499).contains(&n) {
12402 current_assets += net;
12403 }
12404 }
12405 } else if acct.starts_with('2') {
12406 if let Ok(n) = acct.parse::<u32>() {
12407 if (2000..=2499).contains(&n) {
12408 current_liabs -= net; } else if (2500..=2999).contains(&n) {
12411 total_debt -= net;
12413 }
12414 }
12415 }
12416 }
12417 }
12418
12419 let net_income = revenue - expenses;
12420 let working_capital = current_assets - current_liabs;
12421 let operating_cash_flow = net_income;
12424
12425 GoingConcernInput {
12426 entity_code: code.clone(),
12427 net_income,
12428 working_capital,
12429 operating_cash_flow,
12430 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12431 assessment_date,
12432 }
12433 })
12434 .collect();
12435
12436 let assessments = if gc_inputs.is_empty() {
12437 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12438 } else {
12439 gc_gen.generate_for_entities_with_inputs(
12440 &entity_codes,
12441 &gc_inputs,
12442 assessment_date,
12443 &period_label,
12444 )
12445 };
12446 info!(
12447 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12448 assessments.len(),
12449 assessments.iter().filter(|a| matches!(
12450 a.auditor_conclusion,
12451 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12452 )).count(),
12453 assessments.iter().filter(|a| matches!(
12454 a.auditor_conclusion,
12455 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12456 )).count(),
12457 assessments.iter().filter(|a| matches!(
12458 a.auditor_conclusion,
12459 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12460 )).count(),
12461 );
12462 snapshot.going_concern_assessments = assessments;
12463 }
12464
12465 {
12469 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12470 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12471 let entity_codes: Vec<String> = self
12472 .config
12473 .companies
12474 .iter()
12475 .map(|c| c.code.clone())
12476 .collect();
12477 let estimates = est_gen.generate_for_entities(&entity_codes);
12478 info!(
12479 "ISA 540 accounting estimates: {} estimates across {} entities \
12480 ({} with retrospective reviews, {} with auditor point estimates)",
12481 estimates.len(),
12482 entity_codes.len(),
12483 estimates
12484 .iter()
12485 .filter(|e| e.retrospective_review.is_some())
12486 .count(),
12487 estimates
12488 .iter()
12489 .filter(|e| e.auditor_point_estimate.is_some())
12490 .count(),
12491 );
12492 snapshot.accounting_estimates = estimates;
12493 }
12494
12495 {
12499 use datasynth_generators::audit::audit_opinion_generator::{
12500 AuditOpinionGenerator, AuditOpinionInput,
12501 };
12502
12503 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12504
12505 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12507 .engagements
12508 .iter()
12509 .map(|eng| {
12510 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12512 .findings
12513 .iter()
12514 .filter(|f| f.engagement_id == eng.engagement_id)
12515 .cloned()
12516 .collect();
12517
12518 let gc = snapshot
12520 .going_concern_assessments
12521 .iter()
12522 .find(|g| g.entity_code == eng.client_entity_id)
12523 .cloned();
12524
12525 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12527 snapshot.component_reports.clone();
12528
12529 let auditor = self
12530 .master_data
12531 .employees
12532 .first()
12533 .map(|e| e.display_name.clone())
12534 .unwrap_or_else(|| "Global Audit LLP".into());
12535
12536 let partner = self
12537 .master_data
12538 .employees
12539 .get(1)
12540 .map(|e| e.display_name.clone())
12541 .unwrap_or_else(|| eng.engagement_partner_id.clone());
12542
12543 AuditOpinionInput {
12544 entity_code: eng.client_entity_id.clone(),
12545 entity_name: eng.client_name.clone(),
12546 engagement_id: eng.engagement_id,
12547 period_end: eng.period_end_date,
12548 findings: eng_findings,
12549 going_concern: gc,
12550 component_reports: comp_reports,
12551 is_us_listed: {
12553 let fw = &self.config.audit_standards.isa_compliance.framework;
12554 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12555 },
12556 auditor_name: auditor,
12557 engagement_partner: partner,
12558 }
12559 })
12560 .collect();
12561
12562 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12563
12564 for go in &generated_opinions {
12565 snapshot
12566 .key_audit_matters
12567 .extend(go.key_audit_matters.clone());
12568 }
12569 snapshot.audit_opinions = generated_opinions
12570 .into_iter()
12571 .map(|go| go.opinion)
12572 .collect();
12573
12574 info!(
12575 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12576 snapshot.audit_opinions.len(),
12577 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12578 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12579 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12580 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12581 );
12582 }
12583
12584 {
12588 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12589
12590 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12591
12592 for (i, company) in self.config.companies.iter().enumerate() {
12593 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12595 .engagements
12596 .iter()
12597 .filter(|e| e.client_entity_id == company.code)
12598 .map(|e| e.engagement_id)
12599 .collect();
12600
12601 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12602 .findings
12603 .iter()
12604 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12605 .cloned()
12606 .collect();
12607
12608 let emp_count = self.master_data.employees.len();
12610 let ceo_name = if emp_count > 0 {
12611 self.master_data.employees[i % emp_count]
12612 .display_name
12613 .clone()
12614 } else {
12615 format!("CEO of {}", company.name)
12616 };
12617 let cfo_name = if emp_count > 1 {
12618 self.master_data.employees[(i + 1) % emp_count]
12619 .display_name
12620 .clone()
12621 } else {
12622 format!("CFO of {}", company.name)
12623 };
12624
12625 let materiality = snapshot
12627 .engagements
12628 .iter()
12629 .find(|e| e.client_entity_id == company.code)
12630 .map(|e| e.materiality)
12631 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12632
12633 let input = SoxGeneratorInput {
12634 company_code: company.code.clone(),
12635 company_name: company.name.clone(),
12636 fiscal_year,
12637 period_end,
12638 findings: company_findings,
12639 ceo_name,
12640 cfo_name,
12641 materiality_threshold: materiality,
12642 revenue_percent: rust_decimal::Decimal::from(100),
12643 assets_percent: rust_decimal::Decimal::from(100),
12644 significant_accounts: vec![
12645 "Revenue".into(),
12646 "Accounts Receivable".into(),
12647 "Inventory".into(),
12648 "Fixed Assets".into(),
12649 "Accounts Payable".into(),
12650 ],
12651 };
12652
12653 let (certs, assessment) = sox_gen.generate(&input);
12654 snapshot.sox_302_certifications.extend(certs);
12655 snapshot.sox_404_assessments.push(assessment);
12656 }
12657
12658 info!(
12659 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12660 snapshot.sox_302_certifications.len(),
12661 snapshot.sox_404_assessments.len(),
12662 snapshot
12663 .sox_404_assessments
12664 .iter()
12665 .filter(|a| a.icfr_effective)
12666 .count(),
12667 snapshot
12668 .sox_404_assessments
12669 .iter()
12670 .filter(|a| !a.icfr_effective)
12671 .count(),
12672 );
12673 }
12674
12675 {
12679 use datasynth_generators::audit::materiality_generator::{
12680 MaterialityGenerator, MaterialityInput,
12681 };
12682
12683 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
12684
12685 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
12689
12690 for company in &self.config.companies {
12691 let company_code = company.code.clone();
12692
12693 let company_revenue: rust_decimal::Decimal = entries
12695 .iter()
12696 .filter(|e| e.company_code() == company_code)
12697 .flat_map(|e| e.lines.iter())
12698 .filter(|l| l.account_code.starts_with('4'))
12699 .map(|l| l.credit_amount)
12700 .sum();
12701
12702 let total_assets: rust_decimal::Decimal = entries
12704 .iter()
12705 .filter(|e| e.company_code() == company_code)
12706 .flat_map(|e| e.lines.iter())
12707 .filter(|l| l.account_code.starts_with('1'))
12708 .map(|l| l.debit_amount)
12709 .sum();
12710
12711 let total_expenses: rust_decimal::Decimal = entries
12713 .iter()
12714 .filter(|e| e.company_code() == company_code)
12715 .flat_map(|e| e.lines.iter())
12716 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12717 .map(|l| l.debit_amount)
12718 .sum();
12719
12720 let equity: rust_decimal::Decimal = entries
12722 .iter()
12723 .filter(|e| e.company_code() == company_code)
12724 .flat_map(|e| e.lines.iter())
12725 .filter(|l| l.account_code.starts_with('3'))
12726 .map(|l| l.credit_amount)
12727 .sum();
12728
12729 let pretax_income = company_revenue - total_expenses;
12730
12731 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
12733 let w = rust_decimal::Decimal::try_from(company.volume_weight)
12734 .unwrap_or(rust_decimal::Decimal::ONE);
12735 (
12736 total_revenue * w,
12737 total_revenue * w * rust_decimal::Decimal::from(3),
12738 total_revenue * w * rust_decimal::Decimal::new(1, 1),
12739 total_revenue * w * rust_decimal::Decimal::from(2),
12740 )
12741 } else {
12742 (company_revenue, total_assets, pretax_income, equity)
12743 };
12744
12745 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
12748 entity_code: company_code,
12749 period: format!("FY{}", fiscal_year),
12750 revenue: rev,
12751 pretax_income: pti,
12752 total_assets: assets,
12753 equity: eq,
12754 gross_profit,
12755 });
12756 }
12757
12758 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
12759
12760 info!(
12761 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
12762 {} total assets, {} equity benchmarks)",
12763 snapshot.materiality_calculations.len(),
12764 snapshot
12765 .materiality_calculations
12766 .iter()
12767 .filter(|m| matches!(
12768 m.benchmark,
12769 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
12770 ))
12771 .count(),
12772 snapshot
12773 .materiality_calculations
12774 .iter()
12775 .filter(|m| matches!(
12776 m.benchmark,
12777 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
12778 ))
12779 .count(),
12780 snapshot
12781 .materiality_calculations
12782 .iter()
12783 .filter(|m| matches!(
12784 m.benchmark,
12785 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
12786 ))
12787 .count(),
12788 snapshot
12789 .materiality_calculations
12790 .iter()
12791 .filter(|m| matches!(
12792 m.benchmark,
12793 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
12794 ))
12795 .count(),
12796 );
12797 }
12798
12799 {
12803 use datasynth_generators::audit::cra_generator::CraGenerator;
12804
12805 let mut cra_gen = CraGenerator::new(self.seed + 8315);
12806
12807 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
12809 .audit_scopes
12810 .iter()
12811 .map(|s| (s.entity_code.clone(), s.id.clone()))
12812 .collect();
12813
12814 for company in &self.config.companies {
12815 let cras = cra_gen.generate_for_entity(&company.code, None);
12816 let scope_id = entity_scope_map.get(&company.code).cloned();
12817 let cras_with_scope: Vec<_> = cras
12818 .into_iter()
12819 .map(|mut cra| {
12820 cra.scope_id = scope_id.clone();
12821 cra
12822 })
12823 .collect();
12824 snapshot.combined_risk_assessments.extend(cras_with_scope);
12825 }
12826
12827 let significant_count = snapshot
12828 .combined_risk_assessments
12829 .iter()
12830 .filter(|c| c.significant_risk)
12831 .count();
12832 let high_cra_count = snapshot
12833 .combined_risk_assessments
12834 .iter()
12835 .filter(|c| {
12836 matches!(
12837 c.combined_risk,
12838 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
12839 )
12840 })
12841 .count();
12842
12843 info!(
12844 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
12845 snapshot.combined_risk_assessments.len(),
12846 significant_count,
12847 high_cra_count,
12848 );
12849 }
12850
12851 {
12855 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
12856
12857 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
12858
12859 for company in &self.config.companies {
12861 let entity_code = company.code.clone();
12862
12863 let tolerable_error = snapshot
12865 .materiality_calculations
12866 .iter()
12867 .find(|m| m.entity_code == entity_code)
12868 .map(|m| m.tolerable_error);
12869
12870 let entity_cras: Vec<_> = snapshot
12872 .combined_risk_assessments
12873 .iter()
12874 .filter(|c| c.entity_code == entity_code)
12875 .cloned()
12876 .collect();
12877
12878 if !entity_cras.is_empty() {
12879 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
12880 snapshot.sampling_plans.extend(plans);
12881 snapshot.sampled_items.extend(items);
12882 }
12883 }
12884
12885 let misstatement_count = snapshot
12886 .sampled_items
12887 .iter()
12888 .filter(|i| i.misstatement_found)
12889 .count();
12890
12891 info!(
12892 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
12893 snapshot.sampling_plans.len(),
12894 snapshot.sampled_items.len(),
12895 misstatement_count,
12896 );
12897 }
12898
12899 {
12903 use datasynth_generators::audit::scots_generator::{
12904 ScotsGenerator, ScotsGeneratorConfig,
12905 };
12906
12907 let ic_enabled = self.config.intercompany.enabled;
12908
12909 let config = ScotsGeneratorConfig {
12910 intercompany_enabled: ic_enabled,
12911 ..ScotsGeneratorConfig::default()
12912 };
12913 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
12914
12915 for company in &self.config.companies {
12916 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
12917 snapshot
12918 .significant_transaction_classes
12919 .extend(entity_scots);
12920 }
12921
12922 let estimation_count = snapshot
12923 .significant_transaction_classes
12924 .iter()
12925 .filter(|s| {
12926 matches!(
12927 s.transaction_type,
12928 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
12929 )
12930 })
12931 .count();
12932
12933 info!(
12934 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
12935 snapshot.significant_transaction_classes.len(),
12936 estimation_count,
12937 );
12938 }
12939
12940 {
12944 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
12945
12946 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
12947 let entity_codes: Vec<String> = self
12948 .config
12949 .companies
12950 .iter()
12951 .map(|c| c.code.clone())
12952 .collect();
12953 let unusual_flags =
12954 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
12955 info!(
12956 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
12957 unusual_flags.len(),
12958 unusual_flags
12959 .iter()
12960 .filter(|f| matches!(
12961 f.severity,
12962 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
12963 ))
12964 .count(),
12965 unusual_flags
12966 .iter()
12967 .filter(|f| matches!(
12968 f.severity,
12969 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
12970 ))
12971 .count(),
12972 unusual_flags
12973 .iter()
12974 .filter(|f| matches!(
12975 f.severity,
12976 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
12977 ))
12978 .count(),
12979 );
12980 snapshot.unusual_items = unusual_flags;
12981 }
12982
12983 {
12987 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
12988
12989 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
12990 let entity_codes: Vec<String> = self
12991 .config
12992 .companies
12993 .iter()
12994 .map(|c| c.code.clone())
12995 .collect();
12996 let current_period_label = format!("FY{fiscal_year}");
12997 let prior_period_label = format!("FY{}", fiscal_year - 1);
12998 let analytical_rels = ar_gen.generate_for_entities(
12999 &entity_codes,
13000 entries,
13001 ¤t_period_label,
13002 &prior_period_label,
13003 );
13004 let out_of_range = analytical_rels
13005 .iter()
13006 .filter(|r| !r.within_expected_range)
13007 .count();
13008 info!(
13009 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13010 analytical_rels.len(),
13011 out_of_range,
13012 );
13013 snapshot.analytical_relationships = analytical_rels;
13014 }
13015
13016 if let Some(pb) = pb {
13017 pb.finish_with_message(format!(
13018 "Audit data: {} engagements, {} workpapers, {} evidence, \
13019 {} confirmations, {} procedure steps, {} samples, \
13020 {} analytical, {} IA funcs, {} related parties, \
13021 {} component auditors, {} letters, {} subsequent events, \
13022 {} service orgs, {} going concern, {} accounting estimates, \
13023 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13024 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13025 {} unusual items, {} analytical relationships",
13026 snapshot.engagements.len(),
13027 snapshot.workpapers.len(),
13028 snapshot.evidence.len(),
13029 snapshot.confirmations.len(),
13030 snapshot.procedure_steps.len(),
13031 snapshot.samples.len(),
13032 snapshot.analytical_results.len(),
13033 snapshot.ia_functions.len(),
13034 snapshot.related_parties.len(),
13035 snapshot.component_auditors.len(),
13036 snapshot.engagement_letters.len(),
13037 snapshot.subsequent_events.len(),
13038 snapshot.service_organizations.len(),
13039 snapshot.going_concern_assessments.len(),
13040 snapshot.accounting_estimates.len(),
13041 snapshot.audit_opinions.len(),
13042 snapshot.key_audit_matters.len(),
13043 snapshot.sox_302_certifications.len(),
13044 snapshot.sox_404_assessments.len(),
13045 snapshot.materiality_calculations.len(),
13046 snapshot.combined_risk_assessments.len(),
13047 snapshot.sampling_plans.len(),
13048 snapshot.significant_transaction_classes.len(),
13049 snapshot.unusual_items.len(),
13050 snapshot.analytical_relationships.len(),
13051 ));
13052 }
13053
13054 {
13061 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13062 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13063 debug!(
13064 "PCAOB-ISA mappings generated: {} mappings",
13065 snapshot.isa_pcaob_mappings.len()
13066 );
13067 }
13068
13069 {
13076 use datasynth_standards::audit::isa_reference::IsaStandard;
13077 snapshot.isa_mappings = IsaStandard::standard_entries();
13078 debug!(
13079 "ISA standard entries generated: {} standards",
13080 snapshot.isa_mappings.len()
13081 );
13082 }
13083
13084 {
13087 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13088 .engagements
13089 .iter()
13090 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13091 .collect();
13092
13093 for rpt in &mut snapshot.related_party_transactions {
13094 if rpt.journal_entry_id.is_some() {
13095 continue; }
13097 let entity = engagement_by_id
13098 .get(&rpt.engagement_id.to_string())
13099 .copied()
13100 .unwrap_or("");
13101
13102 let best_je = entries
13104 .iter()
13105 .filter(|je| je.header.company_code == entity)
13106 .min_by_key(|je| {
13107 (je.header.posting_date - rpt.transaction_date)
13108 .num_days()
13109 .abs()
13110 });
13111
13112 if let Some(je) = best_je {
13113 rpt.journal_entry_id = Some(je.header.document_id.to_string());
13114 }
13115 }
13116
13117 let linked = snapshot
13118 .related_party_transactions
13119 .iter()
13120 .filter(|t| t.journal_entry_id.is_some())
13121 .count();
13122 debug!(
13123 "Linked {}/{} related party transactions to journal entries",
13124 linked,
13125 snapshot.related_party_transactions.len()
13126 );
13127 }
13128
13129 if !snapshot.engagements.is_empty() {
13135 use datasynth_generators::audit_opinion_generator::{
13136 AuditOpinionGenerator, AuditOpinionInput,
13137 };
13138
13139 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13140 let inputs: Vec<AuditOpinionInput> = snapshot
13141 .engagements
13142 .iter()
13143 .map(|eng| {
13144 let findings = snapshot
13145 .findings
13146 .iter()
13147 .filter(|f| f.engagement_id == eng.engagement_id)
13148 .cloned()
13149 .collect();
13150 let going_concern = snapshot
13151 .going_concern_assessments
13152 .iter()
13153 .find(|gc| gc.entity_code == eng.client_entity_id)
13154 .cloned();
13155 let component_reports = snapshot
13158 .component_reports
13159 .iter()
13160 .filter(|r| r.entity_code == eng.client_entity_id)
13161 .cloned()
13162 .collect();
13163
13164 AuditOpinionInput {
13165 entity_code: eng.client_entity_id.clone(),
13166 entity_name: eng.client_name.clone(),
13167 engagement_id: eng.engagement_id,
13168 period_end: eng.period_end_date,
13169 findings,
13170 going_concern,
13171 component_reports,
13172 is_us_listed: matches!(
13173 eng.engagement_type,
13174 datasynth_core::audit::EngagementType::IntegratedAudit
13175 | datasynth_core::audit::EngagementType::Sox404
13176 ),
13177 auditor_name: "DataSynth Audit LLP".to_string(),
13178 engagement_partner: "Engagement Partner".to_string(),
13179 }
13180 })
13181 .collect();
13182
13183 let generated = opinion_gen.generate_batch(&inputs);
13184 for g in generated {
13185 snapshot.key_audit_matters.extend(g.key_audit_matters);
13186 snapshot.audit_opinions.push(g.opinion);
13187 }
13188 debug!(
13189 "Generated {} audit opinions with {} key audit matters",
13190 snapshot.audit_opinions.len(),
13191 snapshot.key_audit_matters.len()
13192 );
13193 }
13194
13195 Ok(snapshot)
13196 }
13197
13198 fn generate_audit_data_with_fsm(
13205 &mut self,
13206 entries: &[JournalEntry],
13207 ) -> SynthResult<AuditSnapshot> {
13208 use datasynth_audit_fsm::{
13209 context::EngagementContext,
13210 engine::AuditFsmEngine,
13211 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13212 };
13213 use rand::SeedableRng;
13214 use rand_chacha::ChaCha8Rng;
13215
13216 info!("Audit FSM: generating audit data via FSM engine");
13217
13218 let fsm_config = self
13219 .config
13220 .audit
13221 .fsm
13222 .as_ref()
13223 .expect("FSM config must be present when FSM is enabled");
13224
13225 let bwp = match fsm_config.blueprint.as_str() {
13227 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13228 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13229 _ => {
13230 warn!(
13231 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13232 fsm_config.blueprint
13233 );
13234 BlueprintWithPreconditions::load_builtin_fsa()
13235 }
13236 }
13237 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13238
13239 let overlay = match fsm_config.overlay.as_str() {
13241 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13242 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13243 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13244 _ => {
13245 warn!(
13246 "Unknown FSM overlay '{}', falling back to builtin:default",
13247 fsm_config.overlay
13248 );
13249 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13250 }
13251 }
13252 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13253
13254 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13256 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13257 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13258
13259 let company = self.config.companies.first();
13261 let company_code = company
13262 .map(|c| c.code.clone())
13263 .unwrap_or_else(|| "UNKNOWN".to_string());
13264 let company_name = company
13265 .map(|c| c.name.clone())
13266 .unwrap_or_else(|| "Unknown Company".to_string());
13267 let currency = company
13268 .map(|c| c.currency.clone())
13269 .unwrap_or_else(|| "USD".to_string());
13270
13271 let entity_entries: Vec<_> = entries
13273 .iter()
13274 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13275 .cloned()
13276 .collect();
13277 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
13281 .iter()
13282 .flat_map(|e| e.lines.iter())
13283 .filter(|l| l.account_code.starts_with('4'))
13284 .map(|l| l.credit_amount - l.debit_amount)
13285 .sum();
13286
13287 let total_assets: rust_decimal::Decimal = entries
13288 .iter()
13289 .flat_map(|e| e.lines.iter())
13290 .filter(|l| l.account_code.starts_with('1'))
13291 .map(|l| l.debit_amount - l.credit_amount)
13292 .sum();
13293
13294 let total_expenses: rust_decimal::Decimal = entries
13295 .iter()
13296 .flat_map(|e| e.lines.iter())
13297 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13298 .map(|l| l.debit_amount)
13299 .sum();
13300
13301 let equity: rust_decimal::Decimal = entries
13302 .iter()
13303 .flat_map(|e| e.lines.iter())
13304 .filter(|l| l.account_code.starts_with('3'))
13305 .map(|l| l.credit_amount - l.debit_amount)
13306 .sum();
13307
13308 let total_debt: rust_decimal::Decimal = entries
13309 .iter()
13310 .flat_map(|e| e.lines.iter())
13311 .filter(|l| l.account_code.starts_with('2'))
13312 .map(|l| l.credit_amount - l.debit_amount)
13313 .sum();
13314
13315 let pretax_income = total_revenue - total_expenses;
13316
13317 let cogs: rust_decimal::Decimal = entries
13318 .iter()
13319 .flat_map(|e| e.lines.iter())
13320 .filter(|l| l.account_code.starts_with('5'))
13321 .map(|l| l.debit_amount)
13322 .sum();
13323 let gross_profit = total_revenue - cogs;
13324
13325 let current_assets: rust_decimal::Decimal = entries
13326 .iter()
13327 .flat_map(|e| e.lines.iter())
13328 .filter(|l| {
13329 l.account_code.starts_with("10")
13330 || l.account_code.starts_with("11")
13331 || l.account_code.starts_with("12")
13332 || l.account_code.starts_with("13")
13333 })
13334 .map(|l| l.debit_amount - l.credit_amount)
13335 .sum();
13336 let current_liabilities: rust_decimal::Decimal = entries
13337 .iter()
13338 .flat_map(|e| e.lines.iter())
13339 .filter(|l| {
13340 l.account_code.starts_with("20")
13341 || l.account_code.starts_with("21")
13342 || l.account_code.starts_with("22")
13343 })
13344 .map(|l| l.credit_amount - l.debit_amount)
13345 .sum();
13346 let working_capital = current_assets - current_liabilities;
13347
13348 let depreciation: rust_decimal::Decimal = entries
13349 .iter()
13350 .flat_map(|e| e.lines.iter())
13351 .filter(|l| l.account_code.starts_with("60"))
13352 .map(|l| l.debit_amount)
13353 .sum();
13354 let operating_cash_flow = pretax_income + depreciation;
13355
13356 let accounts: Vec<String> = self
13358 .coa
13359 .as_ref()
13360 .map(|coa| {
13361 coa.get_postable_accounts()
13362 .iter()
13363 .map(|acc| acc.account_code().to_string())
13364 .collect()
13365 })
13366 .unwrap_or_default();
13367
13368 let team_member_ids: Vec<String> = self
13370 .master_data
13371 .employees
13372 .iter()
13373 .take(8) .map(|e| e.employee_id.clone())
13375 .collect();
13376 let team_member_pairs: Vec<(String, String)> = self
13377 .master_data
13378 .employees
13379 .iter()
13380 .take(8)
13381 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13382 .collect();
13383
13384 let vendor_names: Vec<String> = self
13385 .master_data
13386 .vendors
13387 .iter()
13388 .map(|v| v.name.clone())
13389 .collect();
13390 let customer_names: Vec<String> = self
13391 .master_data
13392 .customers
13393 .iter()
13394 .map(|c| c.name.clone())
13395 .collect();
13396
13397 let entity_codes: Vec<String> = self
13398 .config
13399 .companies
13400 .iter()
13401 .map(|c| c.code.clone())
13402 .collect();
13403
13404 let journal_entry_ids: Vec<String> = entries
13406 .iter()
13407 .take(50)
13408 .map(|e| e.header.document_id.to_string())
13409 .collect();
13410
13411 let mut account_balances = std::collections::HashMap::<String, f64>::new();
13413 for entry in entries {
13414 for line in &entry.lines {
13415 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13416 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13417 *account_balances
13418 .entry(line.account_code.clone())
13419 .or_insert(0.0) += debit_f64 - credit_f64;
13420 }
13421 }
13422
13423 let control_ids: Vec<String> = Vec::new();
13428 let anomaly_refs: Vec<String> = Vec::new();
13429
13430 let mut context = EngagementContext {
13431 company_code,
13432 company_name,
13433 fiscal_year: start_date.year(),
13434 currency,
13435 total_revenue,
13436 total_assets,
13437 engagement_start: start_date,
13438 report_date: period_end,
13439 pretax_income,
13440 equity,
13441 gross_profit,
13442 working_capital,
13443 operating_cash_flow,
13444 total_debt,
13445 team_member_ids,
13446 team_member_pairs,
13447 accounts,
13448 vendor_names,
13449 customer_names,
13450 journal_entry_ids,
13451 account_balances,
13452 control_ids,
13453 anomaly_refs,
13454 journal_entries: entries.to_vec(),
13455 is_us_listed: false,
13456 entity_codes,
13457 auditor_firm_name: "DataSynth Audit LLP".into(),
13458 accounting_framework: self
13459 .config
13460 .accounting_standards
13461 .framework
13462 .map(|f| match f {
13463 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13464 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13465 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13466 "French GAAP"
13467 }
13468 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13469 "German GAAP"
13470 }
13471 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13472 "Dual Reporting"
13473 }
13474 })
13475 .unwrap_or("IFRS")
13476 .into(),
13477 };
13478
13479 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13481 let rng = ChaCha8Rng::seed_from_u64(seed);
13482 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13483
13484 let mut result = engine
13485 .run_engagement(&context)
13486 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13487
13488 info!(
13489 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13490 {} phases completed, duration {:.1}h",
13491 result.event_log.len(),
13492 result.artifacts.total_artifacts(),
13493 result.anomalies.len(),
13494 result.phases_completed.len(),
13495 result.total_duration_hours,
13496 );
13497
13498 let tb_entity = context.company_code.clone();
13500 let tb_fy = context.fiscal_year;
13501 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13502 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13503 entries,
13504 &tb_entity,
13505 tb_fy,
13506 self.coa.as_ref().map(|c| c.as_ref()),
13507 );
13508
13509 let bag = result.artifacts;
13511 let mut snapshot = AuditSnapshot {
13512 engagements: bag.engagements,
13513 engagement_letters: bag.engagement_letters,
13514 materiality_calculations: bag.materiality_calculations,
13515 risk_assessments: bag.risk_assessments,
13516 combined_risk_assessments: bag.combined_risk_assessments,
13517 workpapers: bag.workpapers,
13518 evidence: bag.evidence,
13519 findings: bag.findings,
13520 judgments: bag.judgments,
13521 sampling_plans: bag.sampling_plans,
13522 sampled_items: bag.sampled_items,
13523 analytical_results: bag.analytical_results,
13524 going_concern_assessments: bag.going_concern_assessments,
13525 subsequent_events: bag.subsequent_events,
13526 audit_opinions: bag.audit_opinions,
13527 key_audit_matters: bag.key_audit_matters,
13528 procedure_steps: bag.procedure_steps,
13529 samples: bag.samples,
13530 confirmations: bag.confirmations,
13531 confirmation_responses: bag.confirmation_responses,
13532 fsm_event_trail: Some(result.event_log),
13534 ..Default::default()
13536 };
13537
13538 {
13540 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13541 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13542 }
13543 {
13544 use datasynth_standards::audit::isa_reference::IsaStandard;
13545 snapshot.isa_mappings = IsaStandard::standard_entries();
13546 }
13547
13548 info!(
13549 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13550 {} risk assessments, {} findings, {} materiality calcs",
13551 snapshot.engagements.len(),
13552 snapshot.workpapers.len(),
13553 snapshot.evidence.len(),
13554 snapshot.risk_assessments.len(),
13555 snapshot.findings.len(),
13556 snapshot.materiality_calculations.len(),
13557 );
13558
13559 Ok(snapshot)
13560 }
13561
13562 fn export_graphs(
13569 &mut self,
13570 entries: &[JournalEntry],
13571 _coa: &Arc<ChartOfAccounts>,
13572 stats: &mut EnhancedGenerationStatistics,
13573 ) -> SynthResult<GraphExportSnapshot> {
13574 let pb = self.create_progress_bar(100, "Exporting Graphs");
13575
13576 let mut snapshot = GraphExportSnapshot::default();
13577
13578 let output_dir = self
13580 .output_path
13581 .clone()
13582 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13583 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13584
13585 for graph_type in &self.config.graph_export.graph_types {
13587 if let Some(pb) = &pb {
13588 pb.inc(10);
13589 }
13590
13591 let graph_config = TransactionGraphConfig {
13593 include_vendors: false,
13594 include_customers: false,
13595 create_debit_credit_edges: true,
13596 include_document_nodes: graph_type.include_document_nodes,
13597 min_edge_weight: graph_type.min_edge_weight,
13598 aggregate_parallel_edges: graph_type.aggregate_edges,
13599 framework: None,
13600 };
13601
13602 let mut builder = TransactionGraphBuilder::new(graph_config);
13603 builder.add_journal_entries(entries);
13604 let graph = builder.build();
13605
13606 stats.graph_node_count += graph.node_count();
13608 stats.graph_edge_count += graph.edge_count();
13609
13610 if let Some(pb) = &pb {
13611 pb.inc(40);
13612 }
13613
13614 for format in &self.config.graph_export.formats {
13616 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13617
13618 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13620 warn!("Failed to create graph output directory: {}", e);
13621 continue;
13622 }
13623
13624 match format {
13625 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13626 let pyg_config = PyGExportConfig {
13627 common: datasynth_graph::CommonExportConfig {
13628 export_node_features: true,
13629 export_edge_features: true,
13630 export_node_labels: true,
13631 export_edge_labels: true,
13632 export_masks: true,
13633 train_ratio: self.config.graph_export.train_ratio,
13634 val_ratio: self.config.graph_export.validation_ratio,
13635 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13636 },
13637 one_hot_categoricals: false,
13638 };
13639
13640 let exporter = PyGExporter::new(pyg_config);
13641 match exporter.export(&graph, &format_dir) {
13642 Ok(metadata) => {
13643 snapshot.exports.insert(
13644 format!("{}_{}", graph_type.name, "pytorch_geometric"),
13645 GraphExportInfo {
13646 name: graph_type.name.clone(),
13647 format: "pytorch_geometric".to_string(),
13648 output_path: format_dir.clone(),
13649 node_count: metadata.num_nodes,
13650 edge_count: metadata.num_edges,
13651 },
13652 );
13653 snapshot.graph_count += 1;
13654 }
13655 Err(e) => {
13656 warn!("Failed to export PyTorch Geometric graph: {}", e);
13657 }
13658 }
13659 }
13660 datasynth_config::schema::GraphExportFormat::Neo4j => {
13661 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13662
13663 let neo4j_config = Neo4jExportConfig {
13664 export_node_properties: true,
13665 export_edge_properties: true,
13666 export_features: true,
13667 generate_cypher: true,
13668 generate_admin_import: true,
13669 database_name: "synth".to_string(),
13670 cypher_batch_size: 1000,
13671 };
13672
13673 let exporter = Neo4jExporter::new(neo4j_config);
13674 match exporter.export(&graph, &format_dir) {
13675 Ok(metadata) => {
13676 snapshot.exports.insert(
13677 format!("{}_{}", graph_type.name, "neo4j"),
13678 GraphExportInfo {
13679 name: graph_type.name.clone(),
13680 format: "neo4j".to_string(),
13681 output_path: format_dir.clone(),
13682 node_count: metadata.num_nodes,
13683 edge_count: metadata.num_edges,
13684 },
13685 );
13686 snapshot.graph_count += 1;
13687 }
13688 Err(e) => {
13689 warn!("Failed to export Neo4j graph: {}", e);
13690 }
13691 }
13692 }
13693 datasynth_config::schema::GraphExportFormat::Dgl => {
13694 use datasynth_graph::{DGLExportConfig, DGLExporter};
13695
13696 let dgl_config = DGLExportConfig {
13697 common: datasynth_graph::CommonExportConfig {
13698 export_node_features: true,
13699 export_edge_features: true,
13700 export_node_labels: true,
13701 export_edge_labels: true,
13702 export_masks: true,
13703 train_ratio: self.config.graph_export.train_ratio,
13704 val_ratio: self.config.graph_export.validation_ratio,
13705 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13706 },
13707 heterogeneous: self.config.graph_export.dgl.heterogeneous,
13708 include_pickle_script: true, };
13710
13711 let exporter = DGLExporter::new(dgl_config);
13712 match exporter.export(&graph, &format_dir) {
13713 Ok(metadata) => {
13714 snapshot.exports.insert(
13715 format!("{}_{}", graph_type.name, "dgl"),
13716 GraphExportInfo {
13717 name: graph_type.name.clone(),
13718 format: "dgl".to_string(),
13719 output_path: format_dir.clone(),
13720 node_count: metadata.common.num_nodes,
13721 edge_count: metadata.common.num_edges,
13722 },
13723 );
13724 snapshot.graph_count += 1;
13725 }
13726 Err(e) => {
13727 warn!("Failed to export DGL graph: {}", e);
13728 }
13729 }
13730 }
13731 datasynth_config::schema::GraphExportFormat::RustGraph => {
13732 use datasynth_graph::{
13733 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
13734 };
13735
13736 let rustgraph_config = RustGraphExportConfig {
13737 include_features: true,
13738 include_temporal: true,
13739 include_labels: true,
13740 source_name: "datasynth".to_string(),
13741 batch_id: None,
13742 output_format: RustGraphOutputFormat::JsonLines,
13743 export_node_properties: true,
13744 export_edge_properties: true,
13745 pretty_print: false,
13746 };
13747
13748 let exporter = RustGraphExporter::new(rustgraph_config);
13749 match exporter.export(&graph, &format_dir) {
13750 Ok(metadata) => {
13751 snapshot.exports.insert(
13752 format!("{}_{}", graph_type.name, "rustgraph"),
13753 GraphExportInfo {
13754 name: graph_type.name.clone(),
13755 format: "rustgraph".to_string(),
13756 output_path: format_dir.clone(),
13757 node_count: metadata.num_nodes,
13758 edge_count: metadata.num_edges,
13759 },
13760 );
13761 snapshot.graph_count += 1;
13762 }
13763 Err(e) => {
13764 warn!("Failed to export RustGraph: {}", e);
13765 }
13766 }
13767 }
13768 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
13769 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
13771 }
13772 }
13773 }
13774
13775 if let Some(pb) = &pb {
13776 pb.inc(40);
13777 }
13778 }
13779
13780 stats.graph_export_count = snapshot.graph_count;
13781 snapshot.exported = snapshot.graph_count > 0;
13782
13783 if let Some(pb) = pb {
13784 pb.finish_with_message(format!(
13785 "Graphs exported: {} graphs ({} nodes, {} edges)",
13786 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
13787 ));
13788 }
13789
13790 Ok(snapshot)
13791 }
13792
13793 fn build_additional_graphs(
13798 &self,
13799 banking: &BankingSnapshot,
13800 intercompany: &IntercompanySnapshot,
13801 entries: &[JournalEntry],
13802 stats: &mut EnhancedGenerationStatistics,
13803 ) {
13804 let output_dir = self
13805 .output_path
13806 .clone()
13807 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13808 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13809
13810 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
13812 info!("Phase 10c: Building banking network graph");
13813 let config = BankingGraphConfig::default();
13814 let mut builder = BankingGraphBuilder::new(config);
13815 builder.add_customers(&banking.customers);
13816 builder.add_accounts(&banking.accounts, &banking.customers);
13817 builder.add_transactions(&banking.transactions);
13818 let graph = builder.build();
13819
13820 let node_count = graph.node_count();
13821 let edge_count = graph.edge_count();
13822 stats.graph_node_count += node_count;
13823 stats.graph_edge_count += edge_count;
13824
13825 for format in &self.config.graph_export.formats {
13827 if matches!(
13828 format,
13829 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13830 ) {
13831 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
13832 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13833 warn!("Failed to create banking graph output dir: {}", e);
13834 continue;
13835 }
13836 let pyg_config = PyGExportConfig::default();
13837 let exporter = PyGExporter::new(pyg_config);
13838 if let Err(e) = exporter.export(&graph, &format_dir) {
13839 warn!("Failed to export banking graph as PyG: {}", e);
13840 } else {
13841 info!(
13842 "Banking network graph exported: {} nodes, {} edges",
13843 node_count, edge_count
13844 );
13845 }
13846 }
13847 }
13848 }
13849
13850 let approval_entries: Vec<_> = entries
13852 .iter()
13853 .filter(|je| je.header.approval_workflow.is_some())
13854 .collect();
13855
13856 if !approval_entries.is_empty() {
13857 info!(
13858 "Phase 10c: Building approval network graph ({} entries with approvals)",
13859 approval_entries.len()
13860 );
13861 let config = ApprovalGraphConfig::default();
13862 let mut builder = ApprovalGraphBuilder::new(config);
13863
13864 for je in &approval_entries {
13865 if let Some(ref wf) = je.header.approval_workflow {
13866 for action in &wf.actions {
13867 let record = datasynth_core::models::ApprovalRecord {
13868 approval_id: format!(
13869 "APR-{}-{}",
13870 je.header.document_id, action.approval_level
13871 ),
13872 document_number: je.header.document_id.to_string(),
13873 document_type: "JE".to_string(),
13874 company_code: je.company_code().to_string(),
13875 requester_id: wf.preparer_id.clone(),
13876 requester_name: Some(wf.preparer_name.clone()),
13877 approver_id: action.actor_id.clone(),
13878 approver_name: action.actor_name.clone(),
13879 approval_date: je.posting_date(),
13880 action: format!("{:?}", action.action),
13881 amount: wf.amount,
13882 approval_limit: None,
13883 comments: action.comments.clone(),
13884 delegation_from: None,
13885 is_auto_approved: false,
13886 };
13887 builder.add_approval(&record);
13888 }
13889 }
13890 }
13891
13892 let graph = builder.build();
13893 let node_count = graph.node_count();
13894 let edge_count = graph.edge_count();
13895 stats.graph_node_count += node_count;
13896 stats.graph_edge_count += edge_count;
13897
13898 for format in &self.config.graph_export.formats {
13900 if matches!(
13901 format,
13902 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13903 ) {
13904 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
13905 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13906 warn!("Failed to create approval graph output dir: {}", e);
13907 continue;
13908 }
13909 let pyg_config = PyGExportConfig::default();
13910 let exporter = PyGExporter::new(pyg_config);
13911 if let Err(e) = exporter.export(&graph, &format_dir) {
13912 warn!("Failed to export approval graph as PyG: {}", e);
13913 } else {
13914 info!(
13915 "Approval network graph exported: {} nodes, {} edges",
13916 node_count, edge_count
13917 );
13918 }
13919 }
13920 }
13921 }
13922
13923 if self.config.companies.len() >= 2 {
13925 info!(
13926 "Phase 10c: Building entity relationship graph ({} companies)",
13927 self.config.companies.len()
13928 );
13929
13930 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13931 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
13932
13933 let parent_code = &self.config.companies[0].code;
13935 let mut companies: Vec<datasynth_core::models::Company> =
13936 Vec::with_capacity(self.config.companies.len());
13937
13938 let first = &self.config.companies[0];
13940 companies.push(datasynth_core::models::Company::parent(
13941 &first.code,
13942 &first.name,
13943 &first.country,
13944 &first.currency,
13945 ));
13946
13947 for cc in self.config.companies.iter().skip(1) {
13949 companies.push(datasynth_core::models::Company::subsidiary(
13950 &cc.code,
13951 &cc.name,
13952 &cc.country,
13953 &cc.currency,
13954 parent_code,
13955 rust_decimal::Decimal::from(100),
13956 ));
13957 }
13958
13959 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
13961 self.config
13962 .companies
13963 .iter()
13964 .skip(1)
13965 .enumerate()
13966 .map(|(i, cc)| {
13967 let mut rel =
13968 datasynth_core::models::intercompany::IntercompanyRelationship::new(
13969 format!("REL{:03}", i + 1),
13970 parent_code.clone(),
13971 cc.code.clone(),
13972 rust_decimal::Decimal::from(100),
13973 start_date,
13974 );
13975 rel.functional_currency = cc.currency.clone();
13976 rel
13977 })
13978 .collect();
13979
13980 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
13981 builder.add_companies(&companies);
13982 builder.add_ownership_relationships(&relationships);
13983
13984 for pair in &intercompany.matched_pairs {
13986 builder.add_intercompany_edge(
13987 &pair.seller_company,
13988 &pair.buyer_company,
13989 pair.amount,
13990 &format!("{:?}", pair.transaction_type),
13991 );
13992 }
13993
13994 let graph = builder.build();
13995 let node_count = graph.node_count();
13996 let edge_count = graph.edge_count();
13997 stats.graph_node_count += node_count;
13998 stats.graph_edge_count += edge_count;
13999
14000 for format in &self.config.graph_export.formats {
14002 if matches!(
14003 format,
14004 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14005 ) {
14006 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14007 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14008 warn!("Failed to create entity graph output dir: {}", e);
14009 continue;
14010 }
14011 let pyg_config = PyGExportConfig::default();
14012 let exporter = PyGExporter::new(pyg_config);
14013 if let Err(e) = exporter.export(&graph, &format_dir) {
14014 warn!("Failed to export entity graph as PyG: {}", e);
14015 } else {
14016 info!(
14017 "Entity relationship graph exported: {} nodes, {} edges",
14018 node_count, edge_count
14019 );
14020 }
14021 }
14022 }
14023 } else {
14024 debug!(
14025 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14026 self.config.companies.len()
14027 );
14028 }
14029 }
14030
14031 #[allow(clippy::too_many_arguments)]
14038 fn export_hypergraph(
14039 &self,
14040 coa: &Arc<ChartOfAccounts>,
14041 entries: &[JournalEntry],
14042 document_flows: &DocumentFlowSnapshot,
14043 sourcing: &SourcingSnapshot,
14044 hr: &HrSnapshot,
14045 manufacturing: &ManufacturingSnapshot,
14046 banking: &BankingSnapshot,
14047 audit: &AuditSnapshot,
14048 financial_reporting: &FinancialReportingSnapshot,
14049 ocpm: &OcpmSnapshot,
14050 compliance: &ComplianceRegulationsSnapshot,
14051 stats: &mut EnhancedGenerationStatistics,
14052 ) -> SynthResult<HypergraphExportInfo> {
14053 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14054 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14055 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14056 use datasynth_graph::models::hypergraph::AggregationStrategy;
14057
14058 let hg_settings = &self.config.graph_export.hypergraph;
14059
14060 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14062 "truncate" => AggregationStrategy::Truncate,
14063 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14064 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14065 "importance_sample" => AggregationStrategy::ImportanceSample,
14066 _ => AggregationStrategy::PoolByCounterparty,
14067 };
14068
14069 let builder_config = HypergraphConfig {
14070 max_nodes: hg_settings.max_nodes,
14071 aggregation_strategy,
14072 include_coso: hg_settings.governance_layer.include_coso,
14073 include_controls: hg_settings.governance_layer.include_controls,
14074 include_sox: hg_settings.governance_layer.include_sox,
14075 include_vendors: hg_settings.governance_layer.include_vendors,
14076 include_customers: hg_settings.governance_layer.include_customers,
14077 include_employees: hg_settings.governance_layer.include_employees,
14078 include_p2p: hg_settings.process_layer.include_p2p,
14079 include_o2c: hg_settings.process_layer.include_o2c,
14080 include_s2c: hg_settings.process_layer.include_s2c,
14081 include_h2r: hg_settings.process_layer.include_h2r,
14082 include_mfg: hg_settings.process_layer.include_mfg,
14083 include_bank: hg_settings.process_layer.include_bank,
14084 include_audit: hg_settings.process_layer.include_audit,
14085 include_r2r: hg_settings.process_layer.include_r2r,
14086 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14087 docs_per_counterparty_threshold: hg_settings
14088 .process_layer
14089 .docs_per_counterparty_threshold,
14090 include_accounts: hg_settings.accounting_layer.include_accounts,
14091 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14092 include_cross_layer_edges: hg_settings.cross_layer.enabled,
14093 include_compliance: self.config.compliance_regulations.enabled,
14094 include_tax: true,
14095 include_treasury: true,
14096 include_esg: true,
14097 include_project: true,
14098 include_intercompany: true,
14099 include_temporal_events: true,
14100 };
14101
14102 let mut builder = HypergraphBuilder::new(builder_config);
14103
14104 builder.add_coso_framework();
14106
14107 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14110 let controls = InternalControl::standard_controls();
14111 builder.add_controls(&controls);
14112 }
14113
14114 builder.add_vendors(&self.master_data.vendors);
14116 builder.add_customers(&self.master_data.customers);
14117 builder.add_employees(&self.master_data.employees);
14118
14119 builder.add_p2p_documents(
14121 &document_flows.purchase_orders,
14122 &document_flows.goods_receipts,
14123 &document_flows.vendor_invoices,
14124 &document_flows.payments,
14125 );
14126 builder.add_o2c_documents(
14127 &document_flows.sales_orders,
14128 &document_flows.deliveries,
14129 &document_flows.customer_invoices,
14130 );
14131 builder.add_s2c_documents(
14132 &sourcing.sourcing_projects,
14133 &sourcing.qualifications,
14134 &sourcing.rfx_events,
14135 &sourcing.bids,
14136 &sourcing.bid_evaluations,
14137 &sourcing.contracts,
14138 );
14139 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14140 builder.add_mfg_documents(
14141 &manufacturing.production_orders,
14142 &manufacturing.quality_inspections,
14143 &manufacturing.cycle_counts,
14144 );
14145 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14146 builder.add_audit_documents(
14147 &audit.engagements,
14148 &audit.workpapers,
14149 &audit.findings,
14150 &audit.evidence,
14151 &audit.risk_assessments,
14152 &audit.judgments,
14153 &audit.materiality_calculations,
14154 &audit.audit_opinions,
14155 &audit.going_concern_assessments,
14156 );
14157 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14158
14159 if let Some(ref event_log) = ocpm.event_log {
14161 builder.add_ocpm_events(event_log);
14162 }
14163
14164 if self.config.compliance_regulations.enabled
14166 && hg_settings.governance_layer.include_controls
14167 {
14168 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14170 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14171 .standard_records
14172 .iter()
14173 .filter_map(|r| {
14174 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14175 registry.get(&sid).cloned()
14176 })
14177 .collect();
14178
14179 builder.add_compliance_regulations(
14180 &standards,
14181 &compliance.findings,
14182 &compliance.filings,
14183 );
14184 }
14185
14186 builder.add_accounts(coa);
14188 builder.add_journal_entries_as_hyperedges(entries);
14189
14190 let hypergraph = builder.build();
14192
14193 let output_dir = self
14195 .output_path
14196 .clone()
14197 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14198 let hg_dir = output_dir
14199 .join(&self.config.graph_export.output_subdirectory)
14200 .join(&hg_settings.output_subdirectory);
14201
14202 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14204 "unified" => {
14205 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14206 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14207 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14208 })?;
14209 (
14210 metadata.num_nodes,
14211 metadata.num_edges,
14212 metadata.num_hyperedges,
14213 )
14214 }
14215 _ => {
14216 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14218 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14219 SynthError::generation(format!("Hypergraph export failed: {e}"))
14220 })?;
14221 (
14222 metadata.num_nodes,
14223 metadata.num_edges,
14224 metadata.num_hyperedges,
14225 )
14226 }
14227 };
14228
14229 #[cfg(feature = "streaming")]
14231 if let Some(ref target_url) = hg_settings.stream_target {
14232 use crate::stream_client::{StreamClient, StreamConfig};
14233 use std::io::Write as _;
14234
14235 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14236 let stream_config = StreamConfig {
14237 target_url: target_url.clone(),
14238 batch_size: hg_settings.stream_batch_size,
14239 api_key,
14240 ..StreamConfig::default()
14241 };
14242
14243 match StreamClient::new(stream_config) {
14244 Ok(mut client) => {
14245 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14246 match exporter.export_to_writer(&hypergraph, &mut client) {
14247 Ok(_) => {
14248 if let Err(e) = client.flush() {
14249 warn!("Failed to flush stream client: {}", e);
14250 } else {
14251 info!("Streamed {} records to {}", client.total_sent(), target_url);
14252 }
14253 }
14254 Err(e) => {
14255 warn!("Streaming export failed: {}", e);
14256 }
14257 }
14258 }
14259 Err(e) => {
14260 warn!("Failed to create stream client: {}", e);
14261 }
14262 }
14263 }
14264
14265 stats.graph_node_count += num_nodes;
14267 stats.graph_edge_count += num_edges;
14268 stats.graph_export_count += 1;
14269
14270 Ok(HypergraphExportInfo {
14271 node_count: num_nodes,
14272 edge_count: num_edges,
14273 hyperedge_count: num_hyperedges,
14274 output_path: hg_dir,
14275 })
14276 }
14277
14278 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14283 let pb = self.create_progress_bar(100, "Generating Banking Data");
14284
14285 let orchestrator = BankingOrchestratorBuilder::new()
14287 .config(self.config.banking.clone())
14288 .seed(self.seed + 9000)
14289 .country_pack(self.primary_pack().clone())
14290 .build();
14291
14292 if let Some(pb) = &pb {
14293 pb.inc(10);
14294 }
14295
14296 let result = orchestrator.generate();
14298
14299 if let Some(pb) = &pb {
14300 pb.inc(90);
14301 pb.finish_with_message(format!(
14302 "Banking: {} customers, {} transactions",
14303 result.customers.len(),
14304 result.transactions.len()
14305 ));
14306 }
14307
14308 let mut banking_customers = result.customers;
14313 let core_customers = &self.master_data.customers;
14314 if !core_customers.is_empty() {
14315 for (i, bc) in banking_customers.iter_mut().enumerate() {
14316 let core = &core_customers[i % core_customers.len()];
14317 bc.name = CustomerName::business(&core.name);
14318 bc.residence_country = core.country.clone();
14319 bc.enterprise_customer_id = Some(core.customer_id.clone());
14320 }
14321 debug!(
14322 "Cross-referenced {} banking customers with {} core customers",
14323 banking_customers.len(),
14324 core_customers.len()
14325 );
14326 }
14327
14328 Ok(BankingSnapshot {
14329 customers: banking_customers,
14330 accounts: result.accounts,
14331 transactions: result.transactions,
14332 transaction_labels: result.transaction_labels,
14333 customer_labels: result.customer_labels,
14334 account_labels: result.account_labels,
14335 relationship_labels: result.relationship_labels,
14336 narratives: result.narratives,
14337 suspicious_count: result.stats.suspicious_count,
14338 scenario_count: result.scenarios.len(),
14339 })
14340 }
14341
14342 fn calculate_total_transactions(&self) -> u64 {
14344 let months = self.config.global.period_months as f64;
14345 self.config
14346 .companies
14347 .iter()
14348 .map(|c| {
14349 let annual = c.annual_transaction_volume.count() as f64;
14350 let weighted = annual * c.volume_weight;
14351 (weighted * months / 12.0) as u64
14352 })
14353 .sum()
14354 }
14355
14356 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14358 if !self.phase_config.show_progress {
14359 return None;
14360 }
14361
14362 let pb = if let Some(mp) = &self.multi_progress {
14363 mp.add(ProgressBar::new(total))
14364 } else {
14365 ProgressBar::new(total)
14366 };
14367
14368 pb.set_style(
14369 ProgressStyle::default_bar()
14370 .template(&format!(
14371 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14372 ))
14373 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14374 .progress_chars("#>-"),
14375 );
14376
14377 Some(pb)
14378 }
14379
14380 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14382 self.coa.clone()
14383 }
14384
14385 pub fn get_master_data(&self) -> &MasterDataSnapshot {
14387 &self.master_data
14388 }
14389
14390 fn phase_compliance_regulations(
14392 &mut self,
14393 _stats: &mut EnhancedGenerationStatistics,
14394 ) -> SynthResult<ComplianceRegulationsSnapshot> {
14395 if !self.phase_config.generate_compliance_regulations {
14396 return Ok(ComplianceRegulationsSnapshot::default());
14397 }
14398
14399 info!("Phase: Generating Compliance Regulations Data");
14400
14401 let cr_config = &self.config.compliance_regulations;
14402
14403 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14405 self.config
14406 .companies
14407 .iter()
14408 .map(|c| c.country.clone())
14409 .collect::<std::collections::HashSet<_>>()
14410 .into_iter()
14411 .collect()
14412 } else {
14413 cr_config.jurisdictions.clone()
14414 };
14415
14416 let fallback_date =
14418 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14419 let reference_date = cr_config
14420 .reference_date
14421 .as_ref()
14422 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14423 .unwrap_or_else(|| {
14424 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14425 .unwrap_or(fallback_date)
14426 });
14427
14428 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14430 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14431 let cross_reference_records = reg_gen.generate_cross_reference_records();
14432 let jurisdiction_records =
14433 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14434
14435 info!(
14436 " Standards: {} records, {} cross-references, {} jurisdictions",
14437 standard_records.len(),
14438 cross_reference_records.len(),
14439 jurisdiction_records.len()
14440 );
14441
14442 let audit_procedures = if cr_config.audit_procedures.enabled {
14444 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14445 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14446 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14447 confidence_level: cr_config.audit_procedures.confidence_level,
14448 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14449 };
14450 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14451 self.seed + 9000,
14452 proc_config,
14453 );
14454 let registry = reg_gen.registry();
14455 let mut all_procs = Vec::new();
14456 for jurisdiction in &jurisdictions {
14457 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14458 all_procs.extend(procs);
14459 }
14460 info!(" Audit procedures: {}", all_procs.len());
14461 all_procs
14462 } else {
14463 Vec::new()
14464 };
14465
14466 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14468 let finding_config =
14469 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14470 finding_rate: cr_config.findings.finding_rate,
14471 material_weakness_rate: cr_config.findings.material_weakness_rate,
14472 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14473 generate_remediation: cr_config.findings.generate_remediation,
14474 };
14475 let mut finding_gen =
14476 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14477 self.seed + 9100,
14478 finding_config,
14479 );
14480 let mut all_findings = Vec::new();
14481 for company in &self.config.companies {
14482 let company_findings =
14483 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14484 all_findings.extend(company_findings);
14485 }
14486 info!(" Compliance findings: {}", all_findings.len());
14487 all_findings
14488 } else {
14489 Vec::new()
14490 };
14491
14492 let filings = if cr_config.filings.enabled {
14494 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14495 filing_types: cr_config.filings.filing_types.clone(),
14496 generate_status_progression: cr_config.filings.generate_status_progression,
14497 };
14498 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14499 self.seed + 9200,
14500 filing_config,
14501 );
14502 let company_codes: Vec<String> = self
14503 .config
14504 .companies
14505 .iter()
14506 .map(|c| c.code.clone())
14507 .collect();
14508 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14509 .unwrap_or(fallback_date);
14510 let filings = filing_gen.generate_filings(
14511 &company_codes,
14512 &jurisdictions,
14513 start_date,
14514 self.config.global.period_months,
14515 );
14516 info!(" Regulatory filings: {}", filings.len());
14517 filings
14518 } else {
14519 Vec::new()
14520 };
14521
14522 let compliance_graph = if cr_config.graph.enabled {
14524 let graph_config = datasynth_graph::ComplianceGraphConfig {
14525 include_standard_nodes: cr_config.graph.include_compliance_nodes,
14526 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14527 include_cross_references: cr_config.graph.include_cross_references,
14528 include_supersession_edges: cr_config.graph.include_supersession_edges,
14529 include_account_links: cr_config.graph.include_account_links,
14530 include_control_links: cr_config.graph.include_control_links,
14531 include_company_links: cr_config.graph.include_company_links,
14532 };
14533 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14534
14535 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14537 .iter()
14538 .map(|r| datasynth_graph::StandardNodeInput {
14539 standard_id: r.standard_id.clone(),
14540 title: r.title.clone(),
14541 category: r.category.clone(),
14542 domain: r.domain.clone(),
14543 is_active: r.is_active,
14544 features: vec![if r.is_active { 1.0 } else { 0.0 }],
14545 applicable_account_types: r.applicable_account_types.clone(),
14546 applicable_processes: r.applicable_processes.clone(),
14547 })
14548 .collect();
14549 builder.add_standards(&standard_inputs);
14550
14551 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14553 jurisdiction_records
14554 .iter()
14555 .map(|r| datasynth_graph::JurisdictionNodeInput {
14556 country_code: r.country_code.clone(),
14557 country_name: r.country_name.clone(),
14558 framework: r.accounting_framework.clone(),
14559 standard_count: r.standard_count,
14560 tax_rate: r.statutory_tax_rate,
14561 })
14562 .collect();
14563 builder.add_jurisdictions(&jurisdiction_inputs);
14564
14565 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14567 cross_reference_records
14568 .iter()
14569 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14570 from_standard: r.from_standard.clone(),
14571 to_standard: r.to_standard.clone(),
14572 relationship: r.relationship.clone(),
14573 convergence_level: r.convergence_level,
14574 })
14575 .collect();
14576 builder.add_cross_references(&xref_inputs);
14577
14578 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14580 .iter()
14581 .map(|r| datasynth_graph::JurisdictionMappingInput {
14582 country_code: r.jurisdiction.clone(),
14583 standard_id: r.standard_id.clone(),
14584 })
14585 .collect();
14586 builder.add_jurisdiction_mappings(&mapping_inputs);
14587
14588 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14590 .iter()
14591 .map(|p| datasynth_graph::ProcedureNodeInput {
14592 procedure_id: p.procedure_id.clone(),
14593 standard_id: p.standard_id.clone(),
14594 procedure_type: p.procedure_type.clone(),
14595 sample_size: p.sample_size,
14596 confidence_level: p.confidence_level,
14597 })
14598 .collect();
14599 builder.add_procedures(&proc_inputs);
14600
14601 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14603 .iter()
14604 .map(|f| datasynth_graph::FindingNodeInput {
14605 finding_id: f.finding_id.to_string(),
14606 standard_id: f
14607 .related_standards
14608 .first()
14609 .map(|s| s.as_str().to_string())
14610 .unwrap_or_default(),
14611 severity: f.severity.to_string(),
14612 deficiency_level: f.deficiency_level.to_string(),
14613 severity_score: f.deficiency_level.severity_score(),
14614 control_id: f.control_id.clone(),
14615 affected_accounts: f.affected_accounts.clone(),
14616 })
14617 .collect();
14618 builder.add_findings(&finding_inputs);
14619
14620 if cr_config.graph.include_account_links {
14622 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14623 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14624 for std_record in &standard_records {
14625 if let Some(std_obj) =
14626 registry.get(&datasynth_core::models::compliance::StandardId::parse(
14627 &std_record.standard_id,
14628 ))
14629 {
14630 for acct_type in &std_obj.applicable_account_types {
14631 account_links.push(datasynth_graph::AccountLinkInput {
14632 standard_id: std_record.standard_id.clone(),
14633 account_code: acct_type.clone(),
14634 account_name: acct_type.clone(),
14635 });
14636 }
14637 }
14638 }
14639 builder.add_account_links(&account_links);
14640 }
14641
14642 if cr_config.graph.include_control_links {
14644 let mut control_links = Vec::new();
14645 let sox_like_ids: Vec<String> = standard_records
14647 .iter()
14648 .filter(|r| {
14649 r.standard_id.starts_with("SOX")
14650 || r.standard_id.starts_with("PCAOB-AS-2201")
14651 })
14652 .map(|r| r.standard_id.clone())
14653 .collect();
14654 let control_ids = [
14656 ("C001", "Cash Controls"),
14657 ("C002", "Large Transaction Approval"),
14658 ("C010", "PO Approval"),
14659 ("C011", "Three-Way Match"),
14660 ("C020", "Revenue Recognition"),
14661 ("C021", "Credit Check"),
14662 ("C030", "Manual JE Approval"),
14663 ("C031", "Period Close Review"),
14664 ("C032", "Account Reconciliation"),
14665 ("C040", "Payroll Processing"),
14666 ("C050", "Fixed Asset Capitalization"),
14667 ("C060", "Intercompany Elimination"),
14668 ];
14669 for sox_id in &sox_like_ids {
14670 for (ctrl_id, ctrl_name) in &control_ids {
14671 control_links.push(datasynth_graph::ControlLinkInput {
14672 standard_id: sox_id.clone(),
14673 control_id: ctrl_id.to_string(),
14674 control_name: ctrl_name.to_string(),
14675 });
14676 }
14677 }
14678 builder.add_control_links(&control_links);
14679 }
14680
14681 if cr_config.graph.include_company_links {
14683 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
14684 .iter()
14685 .enumerate()
14686 .map(|(i, f)| datasynth_graph::FilingNodeInput {
14687 filing_id: format!("F{:04}", i + 1),
14688 filing_type: f.filing_type.to_string(),
14689 company_code: f.company_code.clone(),
14690 jurisdiction: f.jurisdiction.clone(),
14691 status: format!("{:?}", f.status),
14692 })
14693 .collect();
14694 builder.add_filings(&filing_inputs);
14695 }
14696
14697 let graph = builder.build();
14698 info!(
14699 " Compliance graph: {} nodes, {} edges",
14700 graph.nodes.len(),
14701 graph.edges.len()
14702 );
14703 Some(graph)
14704 } else {
14705 None
14706 };
14707
14708 self.check_resources_with_log("post-compliance-regulations")?;
14709
14710 Ok(ComplianceRegulationsSnapshot {
14711 standard_records,
14712 cross_reference_records,
14713 jurisdiction_records,
14714 audit_procedures,
14715 findings,
14716 filings,
14717 compliance_graph,
14718 })
14719 }
14720
14721 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
14723 use super::lineage::LineageGraphBuilder;
14724
14725 let mut builder = LineageGraphBuilder::new();
14726
14727 builder.add_config_section("config:global", "Global Config");
14729 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
14730 builder.add_config_section("config:transactions", "Transaction Config");
14731
14732 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
14734 builder.add_generator_phase("phase:je", "Journal Entry Generation");
14735
14736 builder.configured_by("phase:coa", "config:chart_of_accounts");
14738 builder.configured_by("phase:je", "config:transactions");
14739
14740 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
14742 builder.produced_by("output:je", "phase:je");
14743
14744 if self.phase_config.generate_master_data {
14746 builder.add_config_section("config:master_data", "Master Data Config");
14747 builder.add_generator_phase("phase:master_data", "Master Data Generation");
14748 builder.configured_by("phase:master_data", "config:master_data");
14749 builder.input_to("phase:master_data", "phase:je");
14750 }
14751
14752 if self.phase_config.generate_document_flows {
14753 builder.add_config_section("config:document_flows", "Document Flow Config");
14754 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
14755 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
14756 builder.configured_by("phase:p2p", "config:document_flows");
14757 builder.configured_by("phase:o2c", "config:document_flows");
14758
14759 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
14760 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
14761 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
14762 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
14763 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
14764
14765 builder.produced_by("output:po", "phase:p2p");
14766 builder.produced_by("output:gr", "phase:p2p");
14767 builder.produced_by("output:vi", "phase:p2p");
14768 builder.produced_by("output:so", "phase:o2c");
14769 builder.produced_by("output:ci", "phase:o2c");
14770 }
14771
14772 if self.phase_config.inject_anomalies {
14773 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
14774 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
14775 builder.configured_by("phase:anomaly", "config:fraud");
14776 builder.add_output_file(
14777 "output:labels",
14778 "Anomaly Labels",
14779 "labels/anomaly_labels.csv",
14780 );
14781 builder.produced_by("output:labels", "phase:anomaly");
14782 }
14783
14784 if self.phase_config.generate_audit {
14785 builder.add_config_section("config:audit", "Audit Config");
14786 builder.add_generator_phase("phase:audit", "Audit Data Generation");
14787 builder.configured_by("phase:audit", "config:audit");
14788 }
14789
14790 if self.phase_config.generate_banking {
14791 builder.add_config_section("config:banking", "Banking Config");
14792 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
14793 builder.configured_by("phase:banking", "config:banking");
14794 }
14795
14796 if self.config.llm.enabled {
14797 builder.add_config_section("config:llm", "LLM Enrichment Config");
14798 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
14799 builder.configured_by("phase:llm_enrichment", "config:llm");
14800 }
14801
14802 if self.config.diffusion.enabled {
14803 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
14804 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
14805 builder.configured_by("phase:diffusion", "config:diffusion");
14806 }
14807
14808 if self.config.causal.enabled {
14809 builder.add_config_section("config:causal", "Causal Generation Config");
14810 builder.add_generator_phase("phase:causal", "Causal Overlay");
14811 builder.configured_by("phase:causal", "config:causal");
14812 }
14813
14814 builder.build()
14815 }
14816
14817 fn compute_company_revenue(
14826 entries: &[JournalEntry],
14827 company_code: &str,
14828 ) -> rust_decimal::Decimal {
14829 use rust_decimal::Decimal;
14830 let mut revenue = Decimal::ZERO;
14831 for je in entries {
14832 if je.header.company_code != company_code {
14833 continue;
14834 }
14835 for line in &je.lines {
14836 if line.gl_account.starts_with('4') {
14837 revenue += line.credit_amount - line.debit_amount;
14839 }
14840 }
14841 }
14842 revenue.max(Decimal::ZERO)
14843 }
14844
14845 fn compute_entity_net_assets(
14849 entries: &[JournalEntry],
14850 entity_code: &str,
14851 ) -> rust_decimal::Decimal {
14852 use rust_decimal::Decimal;
14853 let mut asset_net = Decimal::ZERO;
14854 let mut liability_net = Decimal::ZERO;
14855 for je in entries {
14856 if je.header.company_code != entity_code {
14857 continue;
14858 }
14859 for line in &je.lines {
14860 if line.gl_account.starts_with('1') {
14861 asset_net += line.debit_amount - line.credit_amount;
14862 } else if line.gl_account.starts_with('2') {
14863 liability_net += line.credit_amount - line.debit_amount;
14864 }
14865 }
14866 }
14867 asset_net - liability_net
14868 }
14869
14870 fn phase_statistical_validation(
14881 &self,
14882 entries: &[JournalEntry],
14883 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
14884 use datasynth_config::schema::StatisticalTestConfig;
14885 use datasynth_core::distributions::{
14886 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
14887 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
14888 };
14889 use rust_decimal::prelude::ToPrimitive;
14890
14891 let cfg = &self.config.distributions.validation;
14892 if !cfg.enabled {
14893 return Ok(None);
14894 }
14895
14896 let amounts: Vec<rust_decimal::Decimal> = entries
14899 .iter()
14900 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
14901 .filter(|a| *a > rust_decimal::Decimal::ZERO)
14902 .collect();
14903
14904 let paired_amount_linecount: Vec<(f64, f64)> = entries
14908 .iter()
14909 .filter_map(|je| {
14910 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
14911 if amt > rust_decimal::Decimal::ZERO {
14912 amt.to_f64().map(|a| (a, je.lines.len() as f64))
14913 } else {
14914 None
14915 }
14916 })
14917 .collect();
14918
14919 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
14920 for test_cfg in &cfg.tests {
14921 match test_cfg {
14922 StatisticalTestConfig::BenfordFirstDigit {
14923 threshold_mad,
14924 warning_mad,
14925 } => {
14926 results.push(run_benford_first_digit(
14927 &amounts,
14928 *threshold_mad,
14929 *warning_mad,
14930 ));
14931 }
14932 StatisticalTestConfig::ChiSquared { bins, significance } => {
14933 results.push(run_chi_squared(&amounts, *bins, *significance));
14934 }
14935 StatisticalTestConfig::DistributionFit {
14936 target: _,
14937 ks_significance,
14938 method: _,
14939 } => {
14940 results.push(run_ks_uniform_log(&amounts, *ks_significance));
14943 }
14944 StatisticalTestConfig::AndersonDarling {
14945 target: _,
14946 significance,
14947 } => {
14948 results.push(run_anderson_darling(&amounts, *significance));
14951 }
14952 StatisticalTestConfig::CorrelationCheck {
14953 expected_correlations,
14954 } => {
14955 if expected_correlations.is_empty() {
14959 results.push(StatisticalTestResult {
14960 name: "correlation_check".to_string(),
14961 outcome: TestOutcome::Skipped,
14962 statistic: 0.0,
14963 threshold: 0.0,
14964 message: "no expected correlations declared".to_string(),
14965 });
14966 } else {
14967 for ec in expected_correlations {
14968 let pair_key = format!("{}_{}", ec.field1, ec.field2);
14969 let is_amount_linecount = (ec.field1 == "amount"
14970 && ec.field2 == "line_count")
14971 || (ec.field1 == "line_count" && ec.field2 == "amount");
14972 if is_amount_linecount {
14973 let xs: Vec<f64> =
14974 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
14975 let ys: Vec<f64> =
14976 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
14977 results.push(run_correlation_check(
14978 &pair_key,
14979 &xs,
14980 &ys,
14981 ec.expected_r,
14982 ec.tolerance,
14983 ));
14984 } else {
14985 results.push(StatisticalTestResult {
14986 name: format!("correlation_check_{pair_key}"),
14987 outcome: TestOutcome::Skipped,
14988 statistic: 0.0,
14989 threshold: ec.tolerance,
14990 message: format!(
14991 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
14992 ec.field1, ec.field2
14993 ),
14994 });
14995 }
14996 }
14997 }
14998 }
14999 }
15000 }
15001
15002 let report = StatisticalValidationReport {
15003 sample_count: amounts.len(),
15004 results,
15005 };
15006
15007 if cfg.reporting.fail_on_error && !report.all_passed() {
15008 let failed = report.failed_names().join(", ");
15009 return Err(SynthError::validation(format!(
15010 "statistical validation failed: {failed}"
15011 )));
15012 }
15013
15014 Ok(Some(report))
15015 }
15016
15017 fn phase_analytics_metadata(
15030 &mut self,
15031 entries: &[JournalEntry],
15032 ) -> SynthResult<AnalyticsMetadataSnapshot> {
15033 use datasynth_generators::drift_event_generator::DriftEventGenerator;
15034 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15035 use datasynth_generators::management_report_generator::ManagementReportGenerator;
15036 use datasynth_generators::prior_year_generator::PriorYearGenerator;
15037 use std::collections::BTreeMap;
15038
15039 let mut snap = AnalyticsMetadataSnapshot::default();
15040
15041 if !self.phase_config.generate_analytics_metadata {
15042 return Ok(snap);
15043 }
15044
15045 let cfg = &self.config.analytics_metadata;
15046 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15047 .map(|d| d.year())
15048 .unwrap_or(2025);
15049
15050 if cfg.prior_year {
15052 let mut gen = PriorYearGenerator::new(self.seed + 9100);
15053 for company in &self.config.companies {
15054 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15057 BTreeMap::new();
15058 for je in entries {
15059 if je.header.company_code != company.code {
15060 continue;
15061 }
15062 for line in &je.lines {
15063 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15064 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15065 });
15066 entry.1 += line.debit_amount - line.credit_amount;
15067 }
15068 }
15069 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15070 .into_iter()
15071 .filter(|(_, (_, bal))| !bal.is_zero())
15072 .map(|(code, (name, bal))| (code, name, bal))
15073 .collect();
15074 if !current.is_empty() {
15075 let comparatives =
15076 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
15077 snap.prior_year_comparatives.extend(comparatives);
15078 }
15079 }
15080 info!(
15081 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15082 snap.prior_year_comparatives.len(),
15083 self.config.companies.len()
15084 );
15085 }
15086
15087 if cfg.industry_benchmark {
15089 use datasynth_core::models::IndustrySector;
15090 let industry = match self.config.global.industry {
15091 IndustrySector::Manufacturing => "manufacturing",
15092 IndustrySector::Retail => "retail",
15093 IndustrySector::FinancialServices => "financial_services",
15094 IndustrySector::Technology => "technology",
15095 IndustrySector::Healthcare => "healthcare",
15096 _ => "other",
15097 };
15098 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15099 let benchmarks = gen.generate(industry, fiscal_year);
15100 info!(
15101 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15102 benchmarks.len()
15103 );
15104 snap.industry_benchmarks = benchmarks;
15105 }
15106
15107 if cfg.management_reports {
15109 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15110 let period_months = self.config.global.period_months;
15111 for company in &self.config.companies {
15112 let reports =
15113 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15114 snap.management_reports.extend(reports);
15115 }
15116 info!(
15117 "v3.3.0 analytics: {} management reports across {} companies",
15118 snap.management_reports.len(),
15119 self.config.companies.len()
15120 );
15121 }
15122
15123 if cfg.drift_events {
15125 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15126 .expect("hardcoded NaiveDate 2025-01-01 is valid");
15127 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15128 .unwrap_or(fallback_start);
15129 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15130 let mut gen = DriftEventGenerator::new(self.seed + 9400);
15131 let drifts = gen.generate_standalone_drifts(start_date, end_date);
15132 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15133 snap.drift_events = drifts;
15134 }
15135 let _ = entries;
15137
15138 Ok(snap)
15139 }
15140}
15141
15142fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15144 match format {
15145 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15146 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15147 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15148 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15149 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15150 }
15151}
15152
15153fn compute_trial_balance_entries(
15158 entries: &[JournalEntry],
15159 entity_code: &str,
15160 fiscal_year: i32,
15161 coa: Option<&ChartOfAccounts>,
15162) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15163 use std::collections::BTreeMap;
15164
15165 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15166 BTreeMap::new();
15167
15168 for je in entries {
15169 for line in &je.lines {
15170 let entry = balances.entry(line.account_code.clone()).or_default();
15171 entry.0 += line.debit_amount;
15172 entry.1 += line.credit_amount;
15173 }
15174 }
15175
15176 balances
15177 .into_iter()
15178 .map(
15179 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15180 account_description: coa
15181 .and_then(|c| c.get_account(&account_code))
15182 .map(|a| a.description().to_string())
15183 .unwrap_or_else(|| account_code.clone()),
15184 account_code,
15185 debit_balance: debit,
15186 credit_balance: credit,
15187 net_balance: debit - credit,
15188 entity_code: entity_code.to_string(),
15189 period: format!("FY{}", fiscal_year),
15190 },
15191 )
15192 .collect()
15193}
15194
15195#[cfg(test)]
15196#[allow(clippy::unwrap_used)]
15197mod tests {
15198 use super::*;
15199 use datasynth_config::schema::*;
15200
15201 fn create_test_config() -> GeneratorConfig {
15202 GeneratorConfig {
15203 global: GlobalConfig {
15204 industry: IndustrySector::Manufacturing,
15205 start_date: "2024-01-01".to_string(),
15206 period_months: 1,
15207 seed: Some(42),
15208 parallel: false,
15209 group_currency: "USD".to_string(),
15210 presentation_currency: None,
15211 worker_threads: 0,
15212 memory_limit_mb: 0,
15213 fiscal_year_months: None,
15214 },
15215 companies: vec![CompanyConfig {
15216 code: "1000".to_string(),
15217 name: "Test Company".to_string(),
15218 currency: "USD".to_string(),
15219 functional_currency: None,
15220 country: "US".to_string(),
15221 annual_transaction_volume: TransactionVolume::TenK,
15222 volume_weight: 1.0,
15223 fiscal_year_variant: "K4".to_string(),
15224 }],
15225 chart_of_accounts: ChartOfAccountsConfig {
15226 complexity: CoAComplexity::Small,
15227 industry_specific: true,
15228 custom_accounts: None,
15229 min_hierarchy_depth: 2,
15230 max_hierarchy_depth: 4,
15231 },
15232 transactions: TransactionConfig::default(),
15233 output: OutputConfig::default(),
15234 fraud: FraudConfig::default(),
15235 internal_controls: InternalControlsConfig::default(),
15236 business_processes: BusinessProcessConfig::default(),
15237 user_personas: UserPersonaConfig::default(),
15238 templates: TemplateConfig::default(),
15239 approval: ApprovalConfig::default(),
15240 departments: DepartmentConfig::default(),
15241 master_data: MasterDataConfig::default(),
15242 document_flows: DocumentFlowConfig::default(),
15243 intercompany: IntercompanyConfig::default(),
15244 balance: BalanceConfig::default(),
15245 ocpm: OcpmConfig::default(),
15246 audit: AuditGenerationConfig::default(),
15247 banking: datasynth_banking::BankingConfig::default(),
15248 data_quality: DataQualitySchemaConfig::default(),
15249 scenario: ScenarioConfig::default(),
15250 temporal: TemporalDriftConfig::default(),
15251 graph_export: GraphExportConfig::default(),
15252 streaming: StreamingSchemaConfig::default(),
15253 rate_limit: RateLimitSchemaConfig::default(),
15254 temporal_attributes: TemporalAttributeSchemaConfig::default(),
15255 relationships: RelationshipSchemaConfig::default(),
15256 accounting_standards: AccountingStandardsConfig::default(),
15257 audit_standards: AuditStandardsConfig::default(),
15258 distributions: Default::default(),
15259 temporal_patterns: Default::default(),
15260 vendor_network: VendorNetworkSchemaConfig::default(),
15261 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15262 relationship_strength: RelationshipStrengthSchemaConfig::default(),
15263 cross_process_links: CrossProcessLinksSchemaConfig::default(),
15264 organizational_events: OrganizationalEventsSchemaConfig::default(),
15265 behavioral_drift: BehavioralDriftSchemaConfig::default(),
15266 market_drift: MarketDriftSchemaConfig::default(),
15267 drift_labeling: DriftLabelingSchemaConfig::default(),
15268 anomaly_injection: Default::default(),
15269 industry_specific: Default::default(),
15270 fingerprint_privacy: Default::default(),
15271 quality_gates: Default::default(),
15272 compliance: Default::default(),
15273 webhooks: Default::default(),
15274 llm: Default::default(),
15275 diffusion: Default::default(),
15276 causal: Default::default(),
15277 source_to_pay: Default::default(),
15278 financial_reporting: Default::default(),
15279 hr: Default::default(),
15280 manufacturing: Default::default(),
15281 sales_quotes: Default::default(),
15282 tax: Default::default(),
15283 treasury: Default::default(),
15284 project_accounting: Default::default(),
15285 esg: Default::default(),
15286 country_packs: None,
15287 scenarios: Default::default(),
15288 session: Default::default(),
15289 compliance_regulations: Default::default(),
15290 analytics_metadata: Default::default(),
15291 }
15292 }
15293
15294 #[test]
15295 fn test_enhanced_orchestrator_creation() {
15296 let config = create_test_config();
15297 let orchestrator = EnhancedOrchestrator::with_defaults(config);
15298 assert!(orchestrator.is_ok());
15299 }
15300
15301 #[test]
15302 fn test_minimal_generation() {
15303 let config = create_test_config();
15304 let phase_config = PhaseConfig {
15305 generate_master_data: false,
15306 generate_document_flows: false,
15307 generate_journal_entries: true,
15308 inject_anomalies: false,
15309 show_progress: false,
15310 ..Default::default()
15311 };
15312
15313 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15314 let result = orchestrator.generate();
15315
15316 assert!(result.is_ok());
15317 let result = result.unwrap();
15318 assert!(!result.journal_entries.is_empty());
15319 }
15320
15321 #[test]
15322 fn test_master_data_generation() {
15323 let config = create_test_config();
15324 let phase_config = PhaseConfig {
15325 generate_master_data: true,
15326 generate_document_flows: false,
15327 generate_journal_entries: false,
15328 inject_anomalies: false,
15329 show_progress: false,
15330 vendors_per_company: 5,
15331 customers_per_company: 5,
15332 materials_per_company: 10,
15333 assets_per_company: 5,
15334 employees_per_company: 10,
15335 ..Default::default()
15336 };
15337
15338 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15339 let result = orchestrator.generate().unwrap();
15340
15341 assert!(!result.master_data.vendors.is_empty());
15342 assert!(!result.master_data.customers.is_empty());
15343 assert!(!result.master_data.materials.is_empty());
15344 }
15345
15346 #[test]
15347 fn test_document_flow_generation() {
15348 let config = create_test_config();
15349 let phase_config = PhaseConfig {
15350 generate_master_data: true,
15351 generate_document_flows: true,
15352 generate_journal_entries: false,
15353 inject_anomalies: false,
15354 inject_data_quality: false,
15355 validate_balances: false,
15356 generate_ocpm_events: false,
15357 show_progress: false,
15358 vendors_per_company: 5,
15359 customers_per_company: 5,
15360 materials_per_company: 10,
15361 assets_per_company: 5,
15362 employees_per_company: 10,
15363 p2p_chains: 5,
15364 o2c_chains: 5,
15365 ..Default::default()
15366 };
15367
15368 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15369 let result = orchestrator.generate().unwrap();
15370
15371 assert!(!result.document_flows.p2p_chains.is_empty());
15373 assert!(!result.document_flows.o2c_chains.is_empty());
15374
15375 assert!(!result.document_flows.purchase_orders.is_empty());
15377 assert!(!result.document_flows.sales_orders.is_empty());
15378 }
15379
15380 #[test]
15381 fn test_anomaly_injection() {
15382 let config = create_test_config();
15383 let phase_config = PhaseConfig {
15384 generate_master_data: false,
15385 generate_document_flows: false,
15386 generate_journal_entries: true,
15387 inject_anomalies: true,
15388 show_progress: false,
15389 ..Default::default()
15390 };
15391
15392 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15393 let result = orchestrator.generate().unwrap();
15394
15395 assert!(!result.journal_entries.is_empty());
15397
15398 assert!(result.anomaly_labels.summary.is_some());
15401 }
15402
15403 #[test]
15404 fn test_full_generation_pipeline() {
15405 let config = create_test_config();
15406 let phase_config = PhaseConfig {
15407 generate_master_data: true,
15408 generate_document_flows: true,
15409 generate_journal_entries: true,
15410 inject_anomalies: false,
15411 inject_data_quality: false,
15412 validate_balances: true,
15413 generate_ocpm_events: false,
15414 show_progress: false,
15415 vendors_per_company: 3,
15416 customers_per_company: 3,
15417 materials_per_company: 5,
15418 assets_per_company: 3,
15419 employees_per_company: 5,
15420 p2p_chains: 3,
15421 o2c_chains: 3,
15422 ..Default::default()
15423 };
15424
15425 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15426 let result = orchestrator.generate().unwrap();
15427
15428 assert!(!result.master_data.vendors.is_empty());
15430 assert!(!result.master_data.customers.is_empty());
15431 assert!(!result.document_flows.p2p_chains.is_empty());
15432 assert!(!result.document_flows.o2c_chains.is_empty());
15433 assert!(!result.journal_entries.is_empty());
15434 assert!(result.statistics.accounts_count > 0);
15435
15436 assert!(!result.subledger.ap_invoices.is_empty());
15438 assert!(!result.subledger.ar_invoices.is_empty());
15439
15440 assert!(result.balance_validation.validated);
15442 assert!(result.balance_validation.entries_processed > 0);
15443 }
15444
15445 #[test]
15446 fn test_subledger_linking() {
15447 let config = create_test_config();
15448 let phase_config = PhaseConfig {
15449 generate_master_data: true,
15450 generate_document_flows: true,
15451 generate_journal_entries: false,
15452 inject_anomalies: false,
15453 inject_data_quality: false,
15454 validate_balances: false,
15455 generate_ocpm_events: false,
15456 show_progress: false,
15457 vendors_per_company: 5,
15458 customers_per_company: 5,
15459 materials_per_company: 10,
15460 assets_per_company: 3,
15461 employees_per_company: 5,
15462 p2p_chains: 5,
15463 o2c_chains: 5,
15464 ..Default::default()
15465 };
15466
15467 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15468 let result = orchestrator.generate().unwrap();
15469
15470 assert!(!result.document_flows.vendor_invoices.is_empty());
15472 assert!(!result.document_flows.customer_invoices.is_empty());
15473
15474 assert!(!result.subledger.ap_invoices.is_empty());
15476 assert!(!result.subledger.ar_invoices.is_empty());
15477
15478 assert_eq!(
15480 result.subledger.ap_invoices.len(),
15481 result.document_flows.vendor_invoices.len()
15482 );
15483
15484 assert_eq!(
15486 result.subledger.ar_invoices.len(),
15487 result.document_flows.customer_invoices.len()
15488 );
15489
15490 assert_eq!(
15492 result.statistics.ap_invoice_count,
15493 result.subledger.ap_invoices.len()
15494 );
15495 assert_eq!(
15496 result.statistics.ar_invoice_count,
15497 result.subledger.ar_invoices.len()
15498 );
15499 }
15500
15501 #[test]
15502 fn test_balance_validation() {
15503 let config = create_test_config();
15504 let phase_config = PhaseConfig {
15505 generate_master_data: false,
15506 generate_document_flows: false,
15507 generate_journal_entries: true,
15508 inject_anomalies: false,
15509 validate_balances: true,
15510 show_progress: false,
15511 ..Default::default()
15512 };
15513
15514 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15515 let result = orchestrator.generate().unwrap();
15516
15517 assert!(result.balance_validation.validated);
15519 assert!(result.balance_validation.entries_processed > 0);
15520
15521 assert!(!result.balance_validation.has_unbalanced_entries);
15523
15524 assert_eq!(
15526 result.balance_validation.total_debits,
15527 result.balance_validation.total_credits
15528 );
15529 }
15530
15531 #[test]
15532 fn test_statistics_accuracy() {
15533 let config = create_test_config();
15534 let phase_config = PhaseConfig {
15535 generate_master_data: true,
15536 generate_document_flows: false,
15537 generate_journal_entries: true,
15538 inject_anomalies: false,
15539 show_progress: false,
15540 vendors_per_company: 10,
15541 customers_per_company: 20,
15542 materials_per_company: 15,
15543 assets_per_company: 5,
15544 employees_per_company: 8,
15545 ..Default::default()
15546 };
15547
15548 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15549 let result = orchestrator.generate().unwrap();
15550
15551 assert_eq!(
15553 result.statistics.vendor_count,
15554 result.master_data.vendors.len()
15555 );
15556 assert_eq!(
15557 result.statistics.customer_count,
15558 result.master_data.customers.len()
15559 );
15560 assert_eq!(
15561 result.statistics.material_count,
15562 result.master_data.materials.len()
15563 );
15564 assert_eq!(
15565 result.statistics.total_entries as usize,
15566 result.journal_entries.len()
15567 );
15568 }
15569
15570 #[test]
15571 fn test_phase_config_defaults() {
15572 let config = PhaseConfig::default();
15573 assert!(config.generate_master_data);
15574 assert!(config.generate_document_flows);
15575 assert!(config.generate_journal_entries);
15576 assert!(!config.inject_anomalies);
15577 assert!(config.validate_balances);
15578 assert!(config.show_progress);
15579 assert!(config.vendors_per_company > 0);
15580 assert!(config.customers_per_company > 0);
15581 }
15582
15583 #[test]
15584 fn test_get_coa_before_generation() {
15585 let config = create_test_config();
15586 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15587
15588 assert!(orchestrator.get_coa().is_none());
15590 }
15591
15592 #[test]
15593 fn test_get_coa_after_generation() {
15594 let config = create_test_config();
15595 let phase_config = PhaseConfig {
15596 generate_master_data: false,
15597 generate_document_flows: false,
15598 generate_journal_entries: true,
15599 inject_anomalies: false,
15600 show_progress: false,
15601 ..Default::default()
15602 };
15603
15604 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15605 let _ = orchestrator.generate().unwrap();
15606
15607 assert!(orchestrator.get_coa().is_some());
15609 }
15610
15611 #[test]
15612 fn test_get_master_data() {
15613 let config = create_test_config();
15614 let phase_config = PhaseConfig {
15615 generate_master_data: true,
15616 generate_document_flows: false,
15617 generate_journal_entries: false,
15618 inject_anomalies: false,
15619 show_progress: false,
15620 vendors_per_company: 5,
15621 customers_per_company: 5,
15622 materials_per_company: 5,
15623 assets_per_company: 5,
15624 employees_per_company: 5,
15625 ..Default::default()
15626 };
15627
15628 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15629 let result = orchestrator.generate().unwrap();
15630
15631 assert!(!result.master_data.vendors.is_empty());
15633 }
15634
15635 #[test]
15636 fn test_with_progress_builder() {
15637 let config = create_test_config();
15638 let orchestrator = EnhancedOrchestrator::with_defaults(config)
15639 .unwrap()
15640 .with_progress(false);
15641
15642 assert!(!orchestrator.phase_config.show_progress);
15644 }
15645
15646 #[test]
15647 fn test_multi_company_generation() {
15648 let mut config = create_test_config();
15649 config.companies.push(CompanyConfig {
15650 code: "2000".to_string(),
15651 name: "Subsidiary".to_string(),
15652 currency: "EUR".to_string(),
15653 functional_currency: None,
15654 country: "DE".to_string(),
15655 annual_transaction_volume: TransactionVolume::TenK,
15656 volume_weight: 0.5,
15657 fiscal_year_variant: "K4".to_string(),
15658 });
15659
15660 let phase_config = PhaseConfig {
15661 generate_master_data: true,
15662 generate_document_flows: false,
15663 generate_journal_entries: true,
15664 inject_anomalies: false,
15665 show_progress: false,
15666 vendors_per_company: 5,
15667 customers_per_company: 5,
15668 materials_per_company: 5,
15669 assets_per_company: 5,
15670 employees_per_company: 5,
15671 ..Default::default()
15672 };
15673
15674 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15675 let result = orchestrator.generate().unwrap();
15676
15677 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
15680 assert!(result.statistics.companies_count == 2);
15681 }
15682
15683 #[test]
15684 fn test_empty_master_data_skips_document_flows() {
15685 let config = create_test_config();
15686 let phase_config = PhaseConfig {
15687 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
15690 inject_anomalies: false,
15691 show_progress: false,
15692 ..Default::default()
15693 };
15694
15695 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15696 let result = orchestrator.generate().unwrap();
15697
15698 assert!(result.document_flows.p2p_chains.is_empty());
15700 assert!(result.document_flows.o2c_chains.is_empty());
15701 }
15702
15703 #[test]
15704 fn test_journal_entry_line_item_count() {
15705 let config = create_test_config();
15706 let phase_config = PhaseConfig {
15707 generate_master_data: false,
15708 generate_document_flows: false,
15709 generate_journal_entries: true,
15710 inject_anomalies: false,
15711 show_progress: false,
15712 ..Default::default()
15713 };
15714
15715 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15716 let result = orchestrator.generate().unwrap();
15717
15718 let calculated_line_items: u64 = result
15720 .journal_entries
15721 .iter()
15722 .map(|e| e.line_count() as u64)
15723 .sum();
15724 assert_eq!(result.statistics.total_line_items, calculated_line_items);
15725 }
15726
15727 #[test]
15728 fn test_audit_generation() {
15729 let config = create_test_config();
15730 let phase_config = PhaseConfig {
15731 generate_master_data: false,
15732 generate_document_flows: false,
15733 generate_journal_entries: true,
15734 inject_anomalies: false,
15735 show_progress: false,
15736 generate_audit: true,
15737 audit_engagements: 2,
15738 workpapers_per_engagement: 5,
15739 evidence_per_workpaper: 2,
15740 risks_per_engagement: 3,
15741 findings_per_engagement: 2,
15742 judgments_per_engagement: 2,
15743 ..Default::default()
15744 };
15745
15746 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15747 let result = orchestrator.generate().unwrap();
15748
15749 assert_eq!(result.audit.engagements.len(), 2);
15751 assert!(!result.audit.workpapers.is_empty());
15752 assert!(!result.audit.evidence.is_empty());
15753 assert!(!result.audit.risk_assessments.is_empty());
15754 assert!(!result.audit.findings.is_empty());
15755 assert!(!result.audit.judgments.is_empty());
15756
15757 assert!(
15759 !result.audit.confirmations.is_empty(),
15760 "ISA 505 confirmations should be generated"
15761 );
15762 assert!(
15763 !result.audit.confirmation_responses.is_empty(),
15764 "ISA 505 confirmation responses should be generated"
15765 );
15766 assert!(
15767 !result.audit.procedure_steps.is_empty(),
15768 "ISA 330 procedure steps should be generated"
15769 );
15770 assert!(
15772 !result.audit.analytical_results.is_empty(),
15773 "ISA 520 analytical procedures should be generated"
15774 );
15775 assert!(
15776 !result.audit.ia_functions.is_empty(),
15777 "ISA 610 IA functions should be generated (one per engagement)"
15778 );
15779 assert!(
15780 !result.audit.related_parties.is_empty(),
15781 "ISA 550 related parties should be generated"
15782 );
15783
15784 assert_eq!(
15786 result.statistics.audit_engagement_count,
15787 result.audit.engagements.len()
15788 );
15789 assert_eq!(
15790 result.statistics.audit_workpaper_count,
15791 result.audit.workpapers.len()
15792 );
15793 assert_eq!(
15794 result.statistics.audit_evidence_count,
15795 result.audit.evidence.len()
15796 );
15797 assert_eq!(
15798 result.statistics.audit_risk_count,
15799 result.audit.risk_assessments.len()
15800 );
15801 assert_eq!(
15802 result.statistics.audit_finding_count,
15803 result.audit.findings.len()
15804 );
15805 assert_eq!(
15806 result.statistics.audit_judgment_count,
15807 result.audit.judgments.len()
15808 );
15809 assert_eq!(
15810 result.statistics.audit_confirmation_count,
15811 result.audit.confirmations.len()
15812 );
15813 assert_eq!(
15814 result.statistics.audit_confirmation_response_count,
15815 result.audit.confirmation_responses.len()
15816 );
15817 assert_eq!(
15818 result.statistics.audit_procedure_step_count,
15819 result.audit.procedure_steps.len()
15820 );
15821 assert_eq!(
15822 result.statistics.audit_sample_count,
15823 result.audit.samples.len()
15824 );
15825 assert_eq!(
15826 result.statistics.audit_analytical_result_count,
15827 result.audit.analytical_results.len()
15828 );
15829 assert_eq!(
15830 result.statistics.audit_ia_function_count,
15831 result.audit.ia_functions.len()
15832 );
15833 assert_eq!(
15834 result.statistics.audit_ia_report_count,
15835 result.audit.ia_reports.len()
15836 );
15837 assert_eq!(
15838 result.statistics.audit_related_party_count,
15839 result.audit.related_parties.len()
15840 );
15841 assert_eq!(
15842 result.statistics.audit_related_party_transaction_count,
15843 result.audit.related_party_transactions.len()
15844 );
15845 }
15846
15847 #[test]
15848 fn test_new_phases_disabled_by_default() {
15849 let config = create_test_config();
15850 assert!(!config.llm.enabled);
15852 assert!(!config.diffusion.enabled);
15853 assert!(!config.causal.enabled);
15854
15855 let phase_config = PhaseConfig {
15856 generate_master_data: false,
15857 generate_document_flows: false,
15858 generate_journal_entries: true,
15859 inject_anomalies: false,
15860 show_progress: false,
15861 ..Default::default()
15862 };
15863
15864 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15865 let result = orchestrator.generate().unwrap();
15866
15867 assert_eq!(result.statistics.llm_enrichment_ms, 0);
15869 assert_eq!(result.statistics.llm_vendors_enriched, 0);
15870 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
15871 assert_eq!(result.statistics.diffusion_samples_generated, 0);
15872 assert_eq!(result.statistics.causal_generation_ms, 0);
15873 assert_eq!(result.statistics.causal_samples_generated, 0);
15874 assert!(result.statistics.causal_validation_passed.is_none());
15875 assert_eq!(result.statistics.counterfactual_pair_count, 0);
15876 assert!(result.counterfactual_pairs.is_empty());
15877 }
15878
15879 #[test]
15880 fn test_counterfactual_generation_enabled() {
15881 let config = create_test_config();
15882 let phase_config = PhaseConfig {
15883 generate_master_data: false,
15884 generate_document_flows: false,
15885 generate_journal_entries: true,
15886 inject_anomalies: false,
15887 show_progress: false,
15888 generate_counterfactuals: true,
15889 generate_period_close: false, ..Default::default()
15891 };
15892
15893 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15894 let result = orchestrator.generate().unwrap();
15895
15896 if !result.journal_entries.is_empty() {
15898 assert_eq!(
15899 result.counterfactual_pairs.len(),
15900 result.journal_entries.len()
15901 );
15902 assert_eq!(
15903 result.statistics.counterfactual_pair_count,
15904 result.journal_entries.len()
15905 );
15906 let ids: std::collections::HashSet<_> = result
15908 .counterfactual_pairs
15909 .iter()
15910 .map(|p| p.pair_id.clone())
15911 .collect();
15912 assert_eq!(ids.len(), result.counterfactual_pairs.len());
15913 }
15914 }
15915
15916 #[test]
15917 fn test_llm_enrichment_enabled() {
15918 let mut config = create_test_config();
15919 config.llm.enabled = true;
15920 config.llm.max_vendor_enrichments = 3;
15921
15922 let phase_config = PhaseConfig {
15923 generate_master_data: true,
15924 generate_document_flows: false,
15925 generate_journal_entries: false,
15926 inject_anomalies: false,
15927 show_progress: false,
15928 vendors_per_company: 5,
15929 customers_per_company: 3,
15930 materials_per_company: 3,
15931 assets_per_company: 3,
15932 employees_per_company: 3,
15933 ..Default::default()
15934 };
15935
15936 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15937 let result = orchestrator.generate().unwrap();
15938
15939 assert!(result.statistics.llm_vendors_enriched > 0);
15941 assert!(result.statistics.llm_vendors_enriched <= 3);
15942 }
15943
15944 #[test]
15945 fn test_diffusion_enhancement_enabled() {
15946 let mut config = create_test_config();
15947 config.diffusion.enabled = true;
15948 config.diffusion.n_steps = 50;
15949 config.diffusion.sample_size = 20;
15950
15951 let phase_config = PhaseConfig {
15952 generate_master_data: false,
15953 generate_document_flows: false,
15954 generate_journal_entries: true,
15955 inject_anomalies: false,
15956 show_progress: false,
15957 ..Default::default()
15958 };
15959
15960 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15961 let result = orchestrator.generate().unwrap();
15962
15963 assert_eq!(result.statistics.diffusion_samples_generated, 20);
15965 }
15966
15967 #[test]
15968 fn test_causal_overlay_enabled() {
15969 let mut config = create_test_config();
15970 config.causal.enabled = true;
15971 config.causal.template = "fraud_detection".to_string();
15972 config.causal.sample_size = 100;
15973 config.causal.validate = true;
15974
15975 let phase_config = PhaseConfig {
15976 generate_master_data: false,
15977 generate_document_flows: false,
15978 generate_journal_entries: true,
15979 inject_anomalies: false,
15980 show_progress: false,
15981 ..Default::default()
15982 };
15983
15984 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15985 let result = orchestrator.generate().unwrap();
15986
15987 assert_eq!(result.statistics.causal_samples_generated, 100);
15989 assert!(result.statistics.causal_validation_passed.is_some());
15991 }
15992
15993 #[test]
15994 fn test_causal_overlay_revenue_cycle_template() {
15995 let mut config = create_test_config();
15996 config.causal.enabled = true;
15997 config.causal.template = "revenue_cycle".to_string();
15998 config.causal.sample_size = 50;
15999 config.causal.validate = false;
16000
16001 let phase_config = PhaseConfig {
16002 generate_master_data: false,
16003 generate_document_flows: false,
16004 generate_journal_entries: true,
16005 inject_anomalies: false,
16006 show_progress: false,
16007 ..Default::default()
16008 };
16009
16010 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16011 let result = orchestrator.generate().unwrap();
16012
16013 assert_eq!(result.statistics.causal_samples_generated, 50);
16015 assert!(result.statistics.causal_validation_passed.is_none());
16017 }
16018
16019 #[test]
16020 fn test_all_new_phases_enabled_together() {
16021 let mut config = create_test_config();
16022 config.llm.enabled = true;
16023 config.llm.max_vendor_enrichments = 2;
16024 config.diffusion.enabled = true;
16025 config.diffusion.n_steps = 20;
16026 config.diffusion.sample_size = 10;
16027 config.causal.enabled = true;
16028 config.causal.sample_size = 50;
16029 config.causal.validate = true;
16030
16031 let phase_config = PhaseConfig {
16032 generate_master_data: true,
16033 generate_document_flows: false,
16034 generate_journal_entries: true,
16035 inject_anomalies: false,
16036 show_progress: false,
16037 vendors_per_company: 5,
16038 customers_per_company: 3,
16039 materials_per_company: 3,
16040 assets_per_company: 3,
16041 employees_per_company: 3,
16042 ..Default::default()
16043 };
16044
16045 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16046 let result = orchestrator.generate().unwrap();
16047
16048 assert!(result.statistics.llm_vendors_enriched > 0);
16050 assert_eq!(result.statistics.diffusion_samples_generated, 10);
16051 assert_eq!(result.statistics.causal_samples_generated, 50);
16052 assert!(result.statistics.causal_validation_passed.is_some());
16053 }
16054
16055 #[test]
16056 fn test_statistics_serialization_with_new_fields() {
16057 let stats = EnhancedGenerationStatistics {
16058 total_entries: 100,
16059 total_line_items: 500,
16060 llm_enrichment_ms: 42,
16061 llm_vendors_enriched: 10,
16062 diffusion_enhancement_ms: 100,
16063 diffusion_samples_generated: 50,
16064 causal_generation_ms: 200,
16065 causal_samples_generated: 100,
16066 causal_validation_passed: Some(true),
16067 ..Default::default()
16068 };
16069
16070 let json = serde_json::to_string(&stats).unwrap();
16071 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16072
16073 assert_eq!(deserialized.llm_enrichment_ms, 42);
16074 assert_eq!(deserialized.llm_vendors_enriched, 10);
16075 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16076 assert_eq!(deserialized.diffusion_samples_generated, 50);
16077 assert_eq!(deserialized.causal_generation_ms, 200);
16078 assert_eq!(deserialized.causal_samples_generated, 100);
16079 assert_eq!(deserialized.causal_validation_passed, Some(true));
16080 }
16081
16082 #[test]
16083 fn test_statistics_backward_compat_deserialization() {
16084 let old_json = r#"{
16086 "total_entries": 100,
16087 "total_line_items": 500,
16088 "accounts_count": 50,
16089 "companies_count": 1,
16090 "period_months": 12,
16091 "vendor_count": 10,
16092 "customer_count": 20,
16093 "material_count": 15,
16094 "asset_count": 5,
16095 "employee_count": 8,
16096 "p2p_chain_count": 5,
16097 "o2c_chain_count": 5,
16098 "ap_invoice_count": 5,
16099 "ar_invoice_count": 5,
16100 "ocpm_event_count": 0,
16101 "ocpm_object_count": 0,
16102 "ocpm_case_count": 0,
16103 "audit_engagement_count": 0,
16104 "audit_workpaper_count": 0,
16105 "audit_evidence_count": 0,
16106 "audit_risk_count": 0,
16107 "audit_finding_count": 0,
16108 "audit_judgment_count": 0,
16109 "anomalies_injected": 0,
16110 "data_quality_issues": 0,
16111 "banking_customer_count": 0,
16112 "banking_account_count": 0,
16113 "banking_transaction_count": 0,
16114 "banking_suspicious_count": 0,
16115 "graph_export_count": 0,
16116 "graph_node_count": 0,
16117 "graph_edge_count": 0
16118 }"#;
16119
16120 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16121
16122 assert_eq!(stats.llm_enrichment_ms, 0);
16124 assert_eq!(stats.llm_vendors_enriched, 0);
16125 assert_eq!(stats.diffusion_enhancement_ms, 0);
16126 assert_eq!(stats.diffusion_samples_generated, 0);
16127 assert_eq!(stats.causal_generation_ms, 0);
16128 assert_eq!(stats.causal_samples_generated, 0);
16129 assert!(stats.causal_validation_passed.is_none());
16130 }
16131}