1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338 fn default() -> Self {
339 Self {
340 generate_master_data: true,
341 generate_document_flows: true,
342 generate_ocpm_events: false, generate_journal_entries: true,
344 inject_anomalies: false,
345 inject_data_quality: false, validate_balances: true,
347 show_progress: true,
348 vendors_per_company: 50,
349 customers_per_company: 100,
350 materials_per_company: 200,
351 assets_per_company: 50,
352 employees_per_company: 100,
353 p2p_chains: 100,
354 o2c_chains: 100,
355 generate_audit: false, audit_engagements: 5,
357 workpapers_per_engagement: 20,
358 evidence_per_workpaper: 5,
359 risks_per_engagement: 15,
360 findings_per_engagement: 8,
361 judgments_per_engagement: 10,
362 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
381 }
382}
383
384impl PhaseConfig {
385 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390 Self {
391 generate_master_data: true,
393 generate_document_flows: true,
394 generate_journal_entries: true,
395 validate_balances: true,
396 generate_period_close: true,
397 generate_evolution_events: true,
398 show_progress: true,
399
400 generate_audit: cfg.audit.enabled,
402 generate_banking: cfg.banking.enabled,
403 generate_graph_export: cfg.graph_export.enabled,
404 generate_sourcing: cfg.source_to_pay.enabled,
405 generate_intercompany: cfg.intercompany.enabled,
406 generate_financial_statements: cfg.financial_reporting.enabled,
407 generate_bank_reconciliation: cfg.financial_reporting.enabled,
408 generate_accounting_standards: cfg.accounting_standards.enabled,
409 generate_manufacturing: cfg.manufacturing.enabled,
410 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411 generate_tax: cfg.tax.enabled,
412 generate_esg: cfg.esg.enabled,
413 generate_ocpm_events: cfg.ocpm.enabled,
414 generate_compliance_regulations: cfg.compliance_regulations.enabled,
415 generate_hr: cfg.hr.enabled,
416 generate_treasury: cfg.treasury.enabled,
417 generate_project_accounting: cfg.project_accounting.enabled,
418
419 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423 inject_data_quality: cfg.data_quality.enabled,
424
425 vendors_per_company: 50,
427 customers_per_company: 100,
428 materials_per_company: 200,
429 assets_per_company: 50,
430 employees_per_company: 100,
431 p2p_chains: 100,
432 o2c_chains: 100,
433 audit_engagements: 5,
434 workpapers_per_engagement: 20,
435 evidence_per_workpaper: 5,
436 risks_per_engagement: 15,
437 findings_per_engagement: 8,
438 judgments_per_engagement: 10,
439 }
440 }
441}
442
443#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446 pub vendors: Vec<Vendor>,
448 pub customers: Vec<Customer>,
450 pub materials: Vec<Material>,
452 pub assets: Vec<FixedAsset>,
454 pub employees: Vec<Employee>,
456 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465 pub node_count: usize,
467 pub edge_count: usize,
469 pub hyperedge_count: usize,
471 pub output_path: PathBuf,
473}
474
475#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478 pub p2p_chains: Vec<P2PDocumentChain>,
480 pub o2c_chains: Vec<O2CDocumentChain>,
482 pub purchase_orders: Vec<documents::PurchaseOrder>,
484 pub goods_receipts: Vec<documents::GoodsReceipt>,
486 pub vendor_invoices: Vec<documents::VendorInvoice>,
488 pub sales_orders: Vec<documents::SalesOrder>,
490 pub deliveries: Vec<documents::Delivery>,
492 pub customer_invoices: Vec<documents::CustomerInvoice>,
494 pub payments: Vec<documents::Payment>,
496 pub document_references: Vec<documents::DocumentReference>,
499}
500
501#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504 pub ap_invoices: Vec<APInvoice>,
506 pub ar_invoices: Vec<ARInvoice>,
508 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514 pub ar_aging_reports: Vec<ARAgingReport>,
516 pub ap_aging_reports: Vec<APAgingReport>,
518 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531 pub event_log: Option<OcpmEventLog>,
533 pub event_count: usize,
535 pub object_count: usize,
537 pub case_count: usize,
539}
540
541#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544 pub engagements: Vec<AuditEngagement>,
546 pub workpapers: Vec<Workpaper>,
548 pub evidence: Vec<AuditEvidence>,
550 pub risk_assessments: Vec<RiskAssessment>,
552 pub findings: Vec<AuditFinding>,
554 pub judgments: Vec<ProfessionalJudgment>,
556 pub confirmations: Vec<ExternalConfirmation>,
558 pub confirmation_responses: Vec<ConfirmationResponse>,
560 pub procedure_steps: Vec<AuditProcedureStep>,
562 pub samples: Vec<AuditSample>,
564 pub analytical_results: Vec<AnalyticalProcedureResult>,
566 pub ia_functions: Vec<InternalAuditFunction>,
568 pub ia_reports: Vec<InternalAuditReport>,
570 pub related_parties: Vec<RelatedParty>,
572 pub related_party_transactions: Vec<RelatedPartyTransaction>,
574 pub component_auditors: Vec<ComponentAuditor>,
577 pub group_audit_plan: Option<GroupAuditPlan>,
579 pub component_instructions: Vec<ComponentInstruction>,
581 pub component_reports: Vec<ComponentAuditorReport>,
583 pub engagement_letters: Vec<EngagementLetter>,
586 pub subsequent_events: Vec<SubsequentEvent>,
589 pub service_organizations: Vec<ServiceOrganization>,
592 pub soc_reports: Vec<SocReport>,
594 pub user_entity_controls: Vec<UserEntityControl>,
596 pub going_concern_assessments:
599 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600 pub accounting_estimates:
603 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614 pub materiality_calculations:
617 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618 pub combined_risk_assessments:
621 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627 pub significant_transaction_classes:
630 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634 pub analytical_relationships:
637 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657 pub customers: Vec<BankingCustomer>,
659 pub accounts: Vec<BankAccount>,
661 pub transactions: Vec<BankTransaction>,
663 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673 pub suspicious_count: usize,
675 pub scenario_count: usize,
677}
678
679#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682 pub exported: bool,
684 pub graph_count: usize,
686 pub exports: HashMap<String, GraphExportInfo>,
688}
689
690#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693 pub name: String,
695 pub format: String,
697 pub output_path: PathBuf,
699 pub node_count: usize,
701 pub edge_count: usize,
703}
704
705#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708 pub spend_analyses: Vec<SpendAnalysis>,
710 pub sourcing_projects: Vec<SourcingProject>,
712 pub qualifications: Vec<SupplierQualification>,
714 pub rfx_events: Vec<RfxEvent>,
716 pub bids: Vec<SupplierBid>,
718 pub bid_evaluations: Vec<BidEvaluation>,
720 pub contracts: Vec<ProcurementContract>,
722 pub catalog_items: Vec<CatalogItem>,
724 pub scorecards: Vec<SupplierScorecard>,
726}
727
728#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731 pub fiscal_year: u16,
733 pub fiscal_period: u8,
735 pub period_start: NaiveDate,
737 pub period_end: NaiveDate,
739 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746 pub financial_statements: Vec<FinancialStatement>,
749 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752 pub consolidated_statements: Vec<FinancialStatement>,
754 pub consolidation_schedules: Vec<ConsolidationSchedule>,
756 pub bank_reconciliations: Vec<BankReconciliation>,
758 pub trial_balances: Vec<PeriodTrialBalance>,
760 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771 pub payroll_runs: Vec<PayrollRun>,
773 pub payroll_line_items: Vec<PayrollLineItem>,
775 pub time_entries: Vec<TimeEntry>,
777 pub expense_reports: Vec<ExpenseReport>,
779 pub benefit_enrollments: Vec<BenefitEnrollment>,
781 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789 pub pension_journal_entries: Vec<JournalEntry>,
791 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795 pub stock_comp_journal_entries: Vec<JournalEntry>,
797 pub payroll_run_count: usize,
799 pub payroll_line_item_count: usize,
801 pub time_entry_count: usize,
803 pub expense_report_count: usize,
805 pub benefit_enrollment_count: usize,
807 pub pension_plan_count: usize,
809 pub stock_grant_count: usize,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820 pub business_combinations:
822 Vec<datasynth_core::models::business_combination::BusinessCombination>,
823 pub business_combination_journal_entries: Vec<JournalEntry>,
825 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827 pub ecl_provision_movements:
829 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830 pub ecl_journal_entries: Vec<JournalEntry>,
832 pub provisions: Vec<datasynth_core::models::provision::Provision>,
834 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838 pub provision_journal_entries: Vec<JournalEntry>,
840 pub currency_translation_results:
842 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843 pub revenue_contract_count: usize,
845 pub impairment_test_count: usize,
847 pub business_combination_count: usize,
849 pub ecl_model_count: usize,
851 pub provision_count: usize,
853 pub currency_translation_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872 pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879 pub production_orders: Vec<ProductionOrder>,
881 pub quality_inspections: Vec<QualityInspection>,
883 pub cycle_counts: Vec<CycleCount>,
885 pub bom_components: Vec<BomComponent>,
887 pub inventory_movements: Vec<InventoryMovement>,
889 pub production_order_count: usize,
891 pub quality_inspection_count: usize,
893 pub cycle_count_count: usize,
895 pub bom_component_count: usize,
897 pub inventory_movement_count: usize,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904 pub sales_quotes: Vec<SalesQuote>,
906 pub kpis: Vec<ManagementKpi>,
908 pub budgets: Vec<Budget>,
910 pub sales_quote_count: usize,
912 pub kpi_count: usize,
914 pub budget_line_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921 pub labels: Vec<LabeledAnomaly>,
923 pub summary: Option<AnomalySummary>,
925 pub by_type: HashMap<String, usize>,
927}
928
929#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932 pub validated: bool,
934 pub is_balanced: bool,
936 pub entries_processed: u64,
938 pub total_debits: rust_decimal::Decimal,
940 pub total_credits: rust_decimal::Decimal,
942 pub accounts_tracked: usize,
944 pub companies_tracked: usize,
946 pub validation_errors: Vec<ValidationError>,
948 pub has_unbalanced_entries: bool,
950}
951
952#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955 pub jurisdictions: Vec<TaxJurisdiction>,
957 pub codes: Vec<TaxCode>,
959 pub tax_lines: Vec<TaxLine>,
961 pub tax_returns: Vec<TaxReturn>,
963 pub tax_provisions: Vec<TaxProvision>,
965 pub withholding_records: Vec<WithholdingTaxRecord>,
967 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969 pub jurisdiction_count: usize,
971 pub code_count: usize,
973 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975 pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986 pub seller_journal_entries: Vec<JournalEntry>,
988 pub buyer_journal_entries: Vec<JournalEntry>,
990 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994 #[serde(skip)]
996 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997 pub matched_pair_count: usize,
999 pub elimination_entry_count: usize,
1001 pub match_rate: f64,
1003}
1004
1005#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008 pub emissions: Vec<EmissionRecord>,
1010 pub energy: Vec<EnergyConsumption>,
1012 pub water: Vec<WaterUsage>,
1014 pub waste: Vec<WasteRecord>,
1016 pub diversity: Vec<WorkforceDiversityMetric>,
1018 pub pay_equity: Vec<PayEquityMetric>,
1020 pub safety_incidents: Vec<SafetyIncident>,
1022 pub safety_metrics: Vec<SafetyMetric>,
1024 pub governance: Vec<GovernanceMetric>,
1026 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028 pub materiality: Vec<MaterialityAssessment>,
1030 pub disclosures: Vec<EsgDisclosure>,
1032 pub climate_scenarios: Vec<ClimateScenario>,
1034 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036 pub emission_count: usize,
1038 pub disclosure_count: usize,
1040}
1041
1042#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045 pub cash_positions: Vec<CashPosition>,
1047 pub cash_forecasts: Vec<CashForecast>,
1049 pub cash_pools: Vec<CashPool>,
1051 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053 pub hedging_instruments: Vec<HedgingInstrument>,
1055 pub hedge_relationships: Vec<HedgeRelationship>,
1057 pub debt_instruments: Vec<DebtInstrument>,
1059 pub bank_guarantees: Vec<BankGuarantee>,
1061 pub netting_runs: Vec<NettingRun>,
1063 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065 pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073 pub projects: Vec<Project>,
1075 pub cost_lines: Vec<ProjectCostLine>,
1077 pub revenue_records: Vec<ProjectRevenue>,
1079 pub earned_value_metrics: Vec<EarnedValueMetric>,
1081 pub change_orders: Vec<ChangeOrder>,
1083 pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090 pub chart_of_accounts: ChartOfAccounts,
1092 pub master_data: MasterDataSnapshot,
1094 pub document_flows: DocumentFlowSnapshot,
1096 pub subledger: SubledgerSnapshot,
1098 pub ocpm: OcpmSnapshot,
1100 pub audit: AuditSnapshot,
1102 pub banking: BankingSnapshot,
1104 pub graph_export: GraphExportSnapshot,
1106 pub sourcing: SourcingSnapshot,
1108 pub financial_reporting: FinancialReportingSnapshot,
1110 pub hr: HrSnapshot,
1112 pub accounting_standards: AccountingStandardsSnapshot,
1114 pub manufacturing: ManufacturingSnapshot,
1116 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118 pub tax: TaxSnapshot,
1120 pub esg: EsgSnapshot,
1122 pub treasury: TreasurySnapshot,
1124 pub project_accounting: ProjectAccountingSnapshot,
1126 pub process_evolution: Vec<ProcessEvolutionEvent>,
1128 pub organizational_events: Vec<OrganizationalEvent>,
1130 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132 pub intercompany: IntercompanySnapshot,
1134 pub journal_entries: Vec<JournalEntry>,
1136 pub anomaly_labels: AnomalyLabels,
1138 pub balance_validation: BalanceValidationResult,
1140 pub data_quality_stats: DataQualityStats,
1142 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144 pub statistics: EnhancedGenerationStatistics,
1146 pub lineage: Option<super::lineage::LineageGraph>,
1148 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150 pub internal_controls: Vec<InternalControl>,
1152 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156 pub opening_balances: Vec<GeneratedOpeningBalance>,
1158 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166 pub temporal_vendor_chains:
1168 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175 pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182 pub total_entries: u64,
1184 pub total_line_items: u64,
1186 pub accounts_count: usize,
1188 pub companies_count: usize,
1190 pub period_months: u32,
1192 pub vendor_count: usize,
1194 pub customer_count: usize,
1195 pub material_count: usize,
1196 pub asset_count: usize,
1197 pub employee_count: usize,
1198 pub p2p_chain_count: usize,
1200 pub o2c_chain_count: usize,
1201 pub ap_invoice_count: usize,
1203 pub ar_invoice_count: usize,
1204 pub ocpm_event_count: usize,
1206 pub ocpm_object_count: usize,
1207 pub ocpm_case_count: usize,
1208 pub audit_engagement_count: usize,
1210 pub audit_workpaper_count: usize,
1211 pub audit_evidence_count: usize,
1212 pub audit_risk_count: usize,
1213 pub audit_finding_count: usize,
1214 pub audit_judgment_count: usize,
1215 #[serde(default)]
1217 pub audit_confirmation_count: usize,
1218 #[serde(default)]
1219 pub audit_confirmation_response_count: usize,
1220 #[serde(default)]
1222 pub audit_procedure_step_count: usize,
1223 #[serde(default)]
1224 pub audit_sample_count: usize,
1225 #[serde(default)]
1227 pub audit_analytical_result_count: usize,
1228 #[serde(default)]
1230 pub audit_ia_function_count: usize,
1231 #[serde(default)]
1232 pub audit_ia_report_count: usize,
1233 #[serde(default)]
1235 pub audit_related_party_count: usize,
1236 #[serde(default)]
1237 pub audit_related_party_transaction_count: usize,
1238 pub anomalies_injected: usize,
1240 pub data_quality_issues: usize,
1242 pub banking_customer_count: usize,
1244 pub banking_account_count: usize,
1245 pub banking_transaction_count: usize,
1246 pub banking_suspicious_count: usize,
1247 pub graph_export_count: usize,
1249 pub graph_node_count: usize,
1250 pub graph_edge_count: usize,
1251 #[serde(default)]
1253 pub llm_enrichment_ms: u64,
1254 #[serde(default)]
1256 pub llm_vendors_enriched: usize,
1257 #[serde(default)]
1259 pub diffusion_enhancement_ms: u64,
1260 #[serde(default)]
1262 pub diffusion_samples_generated: usize,
1263 #[serde(default, skip_serializing_if = "Option::is_none")]
1266 pub neural_hybrid_weight: Option<f64>,
1267 #[serde(default, skip_serializing_if = "Option::is_none")]
1269 pub neural_hybrid_strategy: Option<String>,
1270 #[serde(default, skip_serializing_if = "Option::is_none")]
1272 pub neural_routed_column_count: Option<usize>,
1273 #[serde(default)]
1275 pub causal_generation_ms: u64,
1276 #[serde(default)]
1278 pub causal_samples_generated: usize,
1279 #[serde(default)]
1281 pub causal_validation_passed: Option<bool>,
1282 #[serde(default)]
1284 pub sourcing_project_count: usize,
1285 #[serde(default)]
1286 pub rfx_event_count: usize,
1287 #[serde(default)]
1288 pub bid_count: usize,
1289 #[serde(default)]
1290 pub contract_count: usize,
1291 #[serde(default)]
1292 pub catalog_item_count: usize,
1293 #[serde(default)]
1294 pub scorecard_count: usize,
1295 #[serde(default)]
1297 pub financial_statement_count: usize,
1298 #[serde(default)]
1299 pub bank_reconciliation_count: usize,
1300 #[serde(default)]
1302 pub payroll_run_count: usize,
1303 #[serde(default)]
1304 pub time_entry_count: usize,
1305 #[serde(default)]
1306 pub expense_report_count: usize,
1307 #[serde(default)]
1308 pub benefit_enrollment_count: usize,
1309 #[serde(default)]
1310 pub pension_plan_count: usize,
1311 #[serde(default)]
1312 pub stock_grant_count: usize,
1313 #[serde(default)]
1315 pub revenue_contract_count: usize,
1316 #[serde(default)]
1317 pub impairment_test_count: usize,
1318 #[serde(default)]
1319 pub business_combination_count: usize,
1320 #[serde(default)]
1321 pub ecl_model_count: usize,
1322 #[serde(default)]
1323 pub provision_count: usize,
1324 #[serde(default)]
1326 pub production_order_count: usize,
1327 #[serde(default)]
1328 pub quality_inspection_count: usize,
1329 #[serde(default)]
1330 pub cycle_count_count: usize,
1331 #[serde(default)]
1332 pub bom_component_count: usize,
1333 #[serde(default)]
1334 pub inventory_movement_count: usize,
1335 #[serde(default)]
1337 pub sales_quote_count: usize,
1338 #[serde(default)]
1339 pub kpi_count: usize,
1340 #[serde(default)]
1341 pub budget_line_count: usize,
1342 #[serde(default)]
1344 pub tax_jurisdiction_count: usize,
1345 #[serde(default)]
1346 pub tax_code_count: usize,
1347 #[serde(default)]
1349 pub esg_emission_count: usize,
1350 #[serde(default)]
1351 pub esg_disclosure_count: usize,
1352 #[serde(default)]
1354 pub ic_matched_pair_count: usize,
1355 #[serde(default)]
1356 pub ic_elimination_count: usize,
1357 #[serde(default)]
1359 pub ic_transaction_count: usize,
1360 #[serde(default)]
1362 pub fa_subledger_count: usize,
1363 #[serde(default)]
1365 pub inventory_subledger_count: usize,
1366 #[serde(default)]
1368 pub treasury_debt_instrument_count: usize,
1369 #[serde(default)]
1371 pub treasury_hedging_instrument_count: usize,
1372 #[serde(default)]
1374 pub project_count: usize,
1375 #[serde(default)]
1377 pub project_change_order_count: usize,
1378 #[serde(default)]
1380 pub tax_provision_count: usize,
1381 #[serde(default)]
1383 pub opening_balance_count: usize,
1384 #[serde(default)]
1386 pub subledger_reconciliation_count: usize,
1387 #[serde(default)]
1389 pub tax_line_count: usize,
1390 #[serde(default)]
1392 pub project_cost_line_count: usize,
1393 #[serde(default)]
1395 pub cash_position_count: usize,
1396 #[serde(default)]
1398 pub cash_forecast_count: usize,
1399 #[serde(default)]
1401 pub cash_pool_count: usize,
1402 #[serde(default)]
1404 pub process_evolution_event_count: usize,
1405 #[serde(default)]
1407 pub organizational_event_count: usize,
1408 #[serde(default)]
1410 pub counterfactual_pair_count: usize,
1411 #[serde(default)]
1413 pub red_flag_count: usize,
1414 #[serde(default)]
1416 pub collusion_ring_count: usize,
1417 #[serde(default)]
1419 pub temporal_version_chain_count: usize,
1420 #[serde(default)]
1422 pub entity_relationship_node_count: usize,
1423 #[serde(default)]
1425 pub entity_relationship_edge_count: usize,
1426 #[serde(default)]
1428 pub cross_process_link_count: usize,
1429 #[serde(default)]
1431 pub disruption_event_count: usize,
1432 #[serde(default)]
1434 pub industry_gl_account_count: usize,
1435 #[serde(default)]
1437 pub period_close_je_count: usize,
1438}
1439
1440pub struct EnhancedOrchestrator {
1442 config: GeneratorConfig,
1443 phase_config: PhaseConfig,
1444 coa: Option<Arc<ChartOfAccounts>>,
1445 master_data: MasterDataSnapshot,
1446 seed: u64,
1447 multi_progress: Option<MultiProgress>,
1448 resource_guard: ResourceGuard,
1450 output_path: Option<PathBuf>,
1452 copula_generators: Vec<CopulaGeneratorSpec>,
1454 country_pack_registry: datasynth_core::CountryPackRegistry,
1456 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1458}
1459
1460impl EnhancedOrchestrator {
1461 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1463 datasynth_config::validate_config(&config)?;
1464
1465 let seed = config.global.seed.unwrap_or_else(rand::random);
1466
1467 let resource_guard = Self::build_resource_guard(&config, None);
1469
1470 let country_pack_registry = match &config.country_packs {
1472 Some(cp) => {
1473 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1474 .map_err(|e| SynthError::config(e.to_string()))?
1475 }
1476 None => datasynth_core::CountryPackRegistry::builtin_only()
1477 .map_err(|e| SynthError::config(e.to_string()))?,
1478 };
1479
1480 Ok(Self {
1481 config,
1482 phase_config,
1483 coa: None,
1484 master_data: MasterDataSnapshot::default(),
1485 seed,
1486 multi_progress: None,
1487 resource_guard,
1488 output_path: None,
1489 copula_generators: Vec::new(),
1490 country_pack_registry,
1491 phase_sink: None,
1492 })
1493 }
1494
1495 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1497 Self::new(config, PhaseConfig::default())
1498 }
1499
1500 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1502 self.phase_sink = Some(sink);
1503 self
1504 }
1505
1506 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1508 self.phase_sink = Some(sink);
1509 }
1510
1511 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1513 if let Some(ref sink) = self.phase_sink {
1514 for item in items {
1515 if let Ok(value) = serde_json::to_value(item) {
1516 if let Err(e) = sink.emit(phase, type_name, &value) {
1517 warn!(
1518 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1519 );
1520 }
1521 }
1522 }
1523 if let Err(e) = sink.phase_complete(phase) {
1524 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1525 }
1526 }
1527 }
1528
1529 pub fn with_progress(mut self, show: bool) -> Self {
1531 self.phase_config.show_progress = show;
1532 if show {
1533 self.multi_progress = Some(MultiProgress::new());
1534 }
1535 self
1536 }
1537
1538 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1540 let path = path.into();
1541 self.output_path = Some(path.clone());
1542 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1544 self
1545 }
1546
1547 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1549 &self.country_pack_registry
1550 }
1551
1552 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1554 self.country_pack_registry.get_by_str(country)
1555 }
1556
1557 fn primary_country_code(&self) -> &str {
1560 self.config
1561 .companies
1562 .first()
1563 .map(|c| c.country.as_str())
1564 .unwrap_or("US")
1565 }
1566
1567 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1569 self.country_pack_for(self.primary_country_code())
1570 }
1571
1572 fn resolve_coa_framework(&self) -> CoAFramework {
1574 if self.config.accounting_standards.enabled {
1575 match self.config.accounting_standards.framework {
1576 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1577 return CoAFramework::FrenchPcg;
1578 }
1579 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1580 return CoAFramework::GermanSkr04;
1581 }
1582 _ => {}
1583 }
1584 }
1585 let pack = self.primary_pack();
1587 match pack.accounting.framework.as_str() {
1588 "french_gaap" => CoAFramework::FrenchPcg,
1589 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1590 _ => CoAFramework::UsGaap,
1591 }
1592 }
1593
1594 pub fn has_copulas(&self) -> bool {
1599 !self.copula_generators.is_empty()
1600 }
1601
1602 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1608 &self.copula_generators
1609 }
1610
1611 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1615 &mut self.copula_generators
1616 }
1617
1618 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1622 self.copula_generators
1623 .iter_mut()
1624 .find(|c| c.name == copula_name)
1625 .map(|c| c.generator.sample())
1626 }
1627
1628 pub fn from_fingerprint(
1651 fingerprint_path: &std::path::Path,
1652 phase_config: PhaseConfig,
1653 scale: f64,
1654 ) -> SynthResult<Self> {
1655 info!("Loading fingerprint from: {}", fingerprint_path.display());
1656
1657 let reader = FingerprintReader::new();
1659 let fingerprint = reader
1660 .read_from_file(fingerprint_path)
1661 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1662
1663 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1664 }
1665
1666 pub fn from_fingerprint_data(
1673 fingerprint: Fingerprint,
1674 phase_config: PhaseConfig,
1675 scale: f64,
1676 ) -> SynthResult<Self> {
1677 info!(
1678 "Synthesizing config from fingerprint (version: {}, tables: {})",
1679 fingerprint.manifest.version,
1680 fingerprint.schema.tables.len()
1681 );
1682
1683 let seed: u64 = rand::random();
1685 info!("Fingerprint synthesis seed: {}", seed);
1686
1687 let options = SynthesisOptions {
1689 scale,
1690 seed: Some(seed),
1691 preserve_correlations: true,
1692 inject_anomalies: true,
1693 };
1694 let synthesizer = ConfigSynthesizer::with_options(options);
1695
1696 let synthesis_result = synthesizer
1698 .synthesize_full(&fingerprint, seed)
1699 .map_err(|e| {
1700 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1701 })?;
1702
1703 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1705 Self::base_config_for_industry(industry)
1706 } else {
1707 Self::base_config_for_industry("manufacturing")
1708 };
1709
1710 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1712
1713 info!(
1715 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1716 fingerprint.schema.tables.len(),
1717 scale,
1718 synthesis_result.copula_generators.len()
1719 );
1720
1721 if !synthesis_result.copula_generators.is_empty() {
1722 for spec in &synthesis_result.copula_generators {
1723 info!(
1724 " Copula '{}' for table '{}': {} columns",
1725 spec.name,
1726 spec.table,
1727 spec.columns.len()
1728 );
1729 }
1730 }
1731
1732 let mut orchestrator = Self::new(config, phase_config)?;
1734
1735 orchestrator.copula_generators = synthesis_result.copula_generators;
1737
1738 Ok(orchestrator)
1739 }
1740
1741 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1743 use datasynth_config::presets::create_preset;
1744 use datasynth_config::TransactionVolume;
1745 use datasynth_core::models::{CoAComplexity, IndustrySector};
1746
1747 let sector = match industry.to_lowercase().as_str() {
1748 "manufacturing" => IndustrySector::Manufacturing,
1749 "retail" => IndustrySector::Retail,
1750 "financial" | "financial_services" => IndustrySector::FinancialServices,
1751 "healthcare" => IndustrySector::Healthcare,
1752 "technology" | "tech" => IndustrySector::Technology,
1753 _ => IndustrySector::Manufacturing,
1754 };
1755
1756 create_preset(
1758 sector,
1759 1, 12, CoAComplexity::Medium,
1762 TransactionVolume::TenK,
1763 )
1764 }
1765
1766 fn apply_config_patch(
1768 mut config: GeneratorConfig,
1769 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1770 ) -> GeneratorConfig {
1771 use datasynth_fingerprint::synthesis::ConfigValue;
1772
1773 for (key, value) in patch.values() {
1774 match (key.as_str(), value) {
1775 ("transactions.count", ConfigValue::Integer(n)) => {
1778 info!(
1779 "Fingerprint suggests {} transactions (apply via company volumes)",
1780 n
1781 );
1782 }
1783 ("global.period_months", ConfigValue::Integer(n)) => {
1784 config.global.period_months = (*n).clamp(1, 120) as u32;
1785 }
1786 ("global.start_date", ConfigValue::String(s)) => {
1787 config.global.start_date = s.clone();
1788 }
1789 ("global.seed", ConfigValue::Integer(n)) => {
1790 config.global.seed = Some(*n as u64);
1791 }
1792 ("fraud.enabled", ConfigValue::Bool(b)) => {
1793 config.fraud.enabled = *b;
1794 }
1795 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1796 config.fraud.fraud_rate = *f;
1797 }
1798 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1799 config.data_quality.enabled = *b;
1800 }
1801 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1803 config.fraud.enabled = *b;
1804 }
1805 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1806 config.fraud.fraud_rate = *f;
1807 }
1808 _ => {
1809 debug!("Ignoring unknown config patch key: {}", key);
1810 }
1811 }
1812 }
1813
1814 config
1815 }
1816
1817 fn build_resource_guard(
1819 config: &GeneratorConfig,
1820 output_path: Option<PathBuf>,
1821 ) -> ResourceGuard {
1822 let mut builder = ResourceGuardBuilder::new();
1823
1824 if config.global.memory_limit_mb > 0 {
1826 builder = builder.memory_limit(config.global.memory_limit_mb);
1827 }
1828
1829 if let Some(path) = output_path {
1831 builder = builder.output_path(path).min_free_disk(100); }
1833
1834 builder = builder.conservative();
1836
1837 builder.build()
1838 }
1839
1840 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1845 self.resource_guard.check()
1846 }
1847
1848 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1850 let level = self.resource_guard.check()?;
1851
1852 if level != DegradationLevel::Normal {
1853 warn!(
1854 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1855 phase,
1856 level,
1857 self.resource_guard.current_memory_mb(),
1858 self.resource_guard.available_disk_mb()
1859 );
1860 }
1861
1862 Ok(level)
1863 }
1864
1865 fn get_degradation_actions(&self) -> DegradationActions {
1867 self.resource_guard.get_actions()
1868 }
1869
1870 fn check_memory_limit(&self) -> SynthResult<()> {
1872 self.check_resources()?;
1873 Ok(())
1874 }
1875
1876 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1878 info!("Starting enhanced generation workflow");
1879 info!(
1880 "Config: industry={:?}, period_months={}, companies={}",
1881 self.config.global.industry,
1882 self.config.global.period_months,
1883 self.config.companies.len()
1884 );
1885
1886 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1889 datasynth_core::serde_decimal::set_numeric_native(is_native);
1890 struct NumericModeGuard;
1891 impl Drop for NumericModeGuard {
1892 fn drop(&mut self) {
1893 datasynth_core::serde_decimal::set_numeric_native(false);
1894 }
1895 }
1896 let _numeric_guard = if is_native {
1897 Some(NumericModeGuard)
1898 } else {
1899 None
1900 };
1901
1902 let initial_level = self.check_resources_with_log("initial")?;
1904 if initial_level == DegradationLevel::Emergency {
1905 return Err(SynthError::resource(
1906 "Insufficient resources to start generation",
1907 ));
1908 }
1909
1910 let mut stats = EnhancedGenerationStatistics {
1911 companies_count: self.config.companies.len(),
1912 period_months: self.config.global.period_months,
1913 ..Default::default()
1914 };
1915
1916 let coa = self.phase_chart_of_accounts(&mut stats)?;
1918
1919 self.phase_master_data(&mut stats)?;
1921
1922 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1924 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1925 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1926
1927 let (mut document_flows, mut subledger, fa_journal_entries) =
1929 self.phase_document_flows(&mut stats)?;
1930
1931 self.emit_phase_items(
1933 "document_flows",
1934 "PurchaseOrder",
1935 &document_flows.purchase_orders,
1936 );
1937 self.emit_phase_items(
1938 "document_flows",
1939 "GoodsReceipt",
1940 &document_flows.goods_receipts,
1941 );
1942 self.emit_phase_items(
1943 "document_flows",
1944 "VendorInvoice",
1945 &document_flows.vendor_invoices,
1946 );
1947 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1948 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1949
1950 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1952
1953 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1958 .iter()
1959 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1960 .collect();
1961 if !opening_balance_jes.is_empty() {
1962 debug!(
1963 "Prepending {} opening balance JEs to entries",
1964 opening_balance_jes.len()
1965 );
1966 }
1967
1968 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1970
1971 if !opening_balance_jes.is_empty() {
1974 let mut combined = opening_balance_jes;
1975 combined.extend(entries);
1976 entries = combined;
1977 }
1978
1979 if !fa_journal_entries.is_empty() {
1981 debug!(
1982 "Appending {} FA acquisition JEs to main entries",
1983 fa_journal_entries.len()
1984 );
1985 entries.extend(fa_journal_entries);
1986 }
1987
1988 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1990
1991 let actions = self.get_degradation_actions();
1993
1994 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1996
1997 if !sourcing.contracts.is_empty() {
2000 let mut linked_count = 0usize;
2001 let po_vendor_pairs: Vec<(String, String)> = document_flows
2003 .p2p_chains
2004 .iter()
2005 .map(|chain| {
2006 (
2007 chain.purchase_order.vendor_id.clone(),
2008 chain.purchase_order.header.document_id.clone(),
2009 )
2010 })
2011 .collect();
2012
2013 for chain in &mut document_flows.p2p_chains {
2014 if chain.purchase_order.contract_id.is_none() {
2015 if let Some(contract) = sourcing
2016 .contracts
2017 .iter()
2018 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2019 {
2020 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2021 linked_count += 1;
2022 }
2023 }
2024 }
2025
2026 for contract in &mut sourcing.contracts {
2028 let po_ids: Vec<String> = po_vendor_pairs
2029 .iter()
2030 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2031 .map(|(_, po_id)| po_id.clone())
2032 .collect();
2033 if !po_ids.is_empty() {
2034 contract.purchase_order_ids = po_ids;
2035 }
2036 }
2037
2038 if linked_count > 0 {
2039 debug!(
2040 "Linked {} purchase orders to S2C contracts by vendor match",
2041 linked_count
2042 );
2043 }
2044 }
2045
2046 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2048
2049 if !intercompany.seller_journal_entries.is_empty()
2051 || !intercompany.buyer_journal_entries.is_empty()
2052 {
2053 let ic_je_count = intercompany.seller_journal_entries.len()
2054 + intercompany.buyer_journal_entries.len();
2055 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2056 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2057 debug!(
2058 "Appended {} IC journal entries to main entries",
2059 ic_je_count
2060 );
2061 }
2062
2063 if !intercompany.elimination_entries.is_empty() {
2065 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2066 &intercompany.elimination_entries,
2067 );
2068 if !elim_jes.is_empty() {
2069 debug!(
2070 "Appended {} elimination journal entries to main entries",
2071 elim_jes.len()
2072 );
2073 let elim_debit: rust_decimal::Decimal =
2075 elim_jes.iter().map(|je| je.total_debit()).sum();
2076 let elim_credit: rust_decimal::Decimal =
2077 elim_jes.iter().map(|je| je.total_credit()).sum();
2078 let elim_diff = (elim_debit - elim_credit).abs();
2079 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2081 return Err(datasynth_core::error::SynthError::generation(format!(
2082 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2083 elim_debit, elim_credit, elim_diff, tolerance
2084 )));
2085 }
2086 debug!(
2087 "IC elimination balance verified: debits={}, credits={} (diff={})",
2088 elim_debit, elim_credit, elim_diff
2089 );
2090 entries.extend(elim_jes);
2091 }
2092 }
2093
2094 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2096 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2097 document_flows
2098 .customer_invoices
2099 .extend(ic_docs.seller_invoices.iter().cloned());
2100 document_flows
2101 .purchase_orders
2102 .extend(ic_docs.buyer_orders.iter().cloned());
2103 document_flows
2104 .goods_receipts
2105 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2106 document_flows
2107 .vendor_invoices
2108 .extend(ic_docs.buyer_invoices.iter().cloned());
2109 debug!(
2110 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2111 ic_docs.seller_invoices.len(),
2112 ic_docs.buyer_orders.len(),
2113 ic_docs.buyer_goods_receipts.len(),
2114 ic_docs.buyer_invoices.len(),
2115 );
2116 }
2117 }
2118
2119 let hr = self.phase_hr_data(&mut stats)?;
2121
2122 if !hr.payroll_runs.is_empty() {
2124 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2125 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2126 entries.extend(payroll_jes);
2127 }
2128
2129 if !hr.pension_journal_entries.is_empty() {
2131 debug!(
2132 "Generated {} JEs from pension plans",
2133 hr.pension_journal_entries.len()
2134 );
2135 entries.extend(hr.pension_journal_entries.iter().cloned());
2136 }
2137
2138 if !hr.stock_comp_journal_entries.is_empty() {
2140 debug!(
2141 "Generated {} JEs from stock-based compensation",
2142 hr.stock_comp_journal_entries.len()
2143 );
2144 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2145 }
2146
2147 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2149
2150 if !manufacturing_snap.production_orders.is_empty() {
2152 let currency = self
2153 .config
2154 .companies
2155 .first()
2156 .map(|c| c.currency.as_str())
2157 .unwrap_or("USD");
2158 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2159 &manufacturing_snap.production_orders,
2160 &manufacturing_snap.quality_inspections,
2161 currency,
2162 );
2163 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2164 entries.extend(mfg_jes);
2165 }
2166
2167 if !manufacturing_snap.quality_inspections.is_empty() {
2169 let framework = match self.config.accounting_standards.framework {
2170 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2171 _ => "US_GAAP",
2172 };
2173 for company in &self.config.companies {
2174 let company_orders: Vec<_> = manufacturing_snap
2175 .production_orders
2176 .iter()
2177 .filter(|o| o.company_code == company.code)
2178 .cloned()
2179 .collect();
2180 let company_inspections: Vec<_> = manufacturing_snap
2181 .quality_inspections
2182 .iter()
2183 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2184 .cloned()
2185 .collect();
2186 if company_inspections.is_empty() {
2187 continue;
2188 }
2189 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2190 let warranty_result = warranty_gen.generate(
2191 &company.code,
2192 &company_orders,
2193 &company_inspections,
2194 &company.currency,
2195 framework,
2196 );
2197 if !warranty_result.journal_entries.is_empty() {
2198 debug!(
2199 "Generated {} warranty provision JEs for {}",
2200 warranty_result.journal_entries.len(),
2201 company.code
2202 );
2203 entries.extend(warranty_result.journal_entries);
2204 }
2205 }
2206 }
2207
2208 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2210 {
2211 let cogs_currency = self
2212 .config
2213 .companies
2214 .first()
2215 .map(|c| c.currency.as_str())
2216 .unwrap_or("USD");
2217 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2218 &document_flows.deliveries,
2219 &manufacturing_snap.production_orders,
2220 cogs_currency,
2221 );
2222 if !cogs_jes.is_empty() {
2223 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2224 entries.extend(cogs_jes);
2225 }
2226 }
2227
2228 if !manufacturing_snap.inventory_movements.is_empty()
2234 && !subledger.inventory_positions.is_empty()
2235 {
2236 use datasynth_core::models::MovementType as MfgMovementType;
2237 let mut receipt_count = 0usize;
2238 let mut issue_count = 0usize;
2239 for movement in &manufacturing_snap.inventory_movements {
2240 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2242 p.material_id == movement.material_code
2243 && p.company_code == movement.entity_code
2244 }) {
2245 match movement.movement_type {
2246 MfgMovementType::GoodsReceipt => {
2247 pos.add_quantity(
2249 movement.quantity,
2250 movement.value,
2251 movement.movement_date,
2252 );
2253 receipt_count += 1;
2254 }
2255 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2256 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2258 issue_count += 1;
2259 }
2260 _ => {}
2261 }
2262 }
2263 }
2264 debug!(
2265 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2266 manufacturing_snap.inventory_movements.len(),
2267 receipt_count,
2268 issue_count,
2269 );
2270 }
2271
2272 if !entries.is_empty() {
2275 stats.total_entries = entries.len() as u64;
2276 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2277 debug!(
2278 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2279 stats.total_entries, stats.total_line_items
2280 );
2281 }
2282
2283 if self.config.internal_controls.enabled && !entries.is_empty() {
2285 info!("Phase 7b: Applying internal controls to journal entries");
2286 let control_config = ControlGeneratorConfig {
2287 exception_rate: self.config.internal_controls.exception_rate,
2288 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2289 enable_sox_marking: true,
2290 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2291 self.config.internal_controls.sox_materiality_threshold,
2292 )
2293 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2294 ..Default::default()
2295 };
2296 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2297 for entry in &mut entries {
2298 control_gen.apply_controls(entry, &coa);
2299 }
2300 let with_controls = entries
2301 .iter()
2302 .filter(|e| !e.header.control_ids.is_empty())
2303 .count();
2304 info!(
2305 "Applied controls to {} entries ({} with control IDs assigned)",
2306 entries.len(),
2307 with_controls
2308 );
2309 }
2310
2311 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2315 .iter()
2316 .filter(|e| e.header.sod_violation)
2317 .filter_map(|e| {
2318 e.header.sod_conflict_type.map(|ct| {
2319 use datasynth_core::models::{RiskLevel, SodViolation};
2320 let severity = match ct {
2321 datasynth_core::models::SodConflictType::PaymentReleaser
2322 | datasynth_core::models::SodConflictType::RequesterApprover => {
2323 RiskLevel::Critical
2324 }
2325 datasynth_core::models::SodConflictType::PreparerApprover
2326 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2327 | datasynth_core::models::SodConflictType::JournalEntryPoster
2328 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2329 RiskLevel::High
2330 }
2331 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2332 RiskLevel::Medium
2333 }
2334 };
2335 let action = format!(
2336 "SoD conflict {:?} on entry {} ({})",
2337 ct, e.header.document_id, e.header.company_code
2338 );
2339 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2340 })
2341 })
2342 .collect();
2343 if !sod_violations.is_empty() {
2344 info!(
2345 "Phase 7c: Extracted {} SoD violations from {} entries",
2346 sod_violations.len(),
2347 entries.len()
2348 );
2349 }
2350
2351 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2353
2354 {
2362 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2363 if self.config.fraud.enabled && doc_rate > 0.0 {
2364 use datasynth_core::fraud_propagation::{
2365 inject_document_fraud, propagate_documents_to_entries,
2366 };
2367 use datasynth_core::utils::weighted_select;
2368 use datasynth_core::FraudType;
2369 use rand_chacha::rand_core::SeedableRng;
2370
2371 let dist = &self.config.fraud.fraud_type_distribution;
2372 let fraud_type_weights: [(FraudType, f64); 8] = [
2373 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2374 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2375 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2376 (
2377 FraudType::ImproperCapitalization,
2378 dist.expense_capitalization,
2379 ),
2380 (FraudType::SplitTransaction, dist.split_transaction),
2381 (FraudType::TimingAnomaly, dist.timing_anomaly),
2382 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2383 (FraudType::DuplicatePayment, dist.duplicate_payment),
2384 ];
2385 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2386 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2387 if weights_sum <= 0.0 {
2388 FraudType::FictitiousEntry
2389 } else {
2390 *weighted_select(rng, &fraud_type_weights)
2391 }
2392 };
2393
2394 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2395 let mut doc_tagged = 0usize;
2396 macro_rules! inject_into {
2397 ($collection:expr) => {{
2398 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2399 $collection.iter_mut().map(|d| &mut d.header).collect();
2400 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2401 }};
2402 }
2403 inject_into!(document_flows.purchase_orders);
2404 inject_into!(document_flows.goods_receipts);
2405 inject_into!(document_flows.vendor_invoices);
2406 inject_into!(document_flows.payments);
2407 inject_into!(document_flows.sales_orders);
2408 inject_into!(document_flows.deliveries);
2409 inject_into!(document_flows.customer_invoices);
2410 if doc_tagged > 0 {
2411 info!(
2412 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2413 );
2414 }
2415
2416 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2417 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2418 Vec::new();
2419 headers.extend(
2420 document_flows
2421 .purchase_orders
2422 .iter()
2423 .map(|d| d.header.clone()),
2424 );
2425 headers.extend(
2426 document_flows
2427 .goods_receipts
2428 .iter()
2429 .map(|d| d.header.clone()),
2430 );
2431 headers.extend(
2432 document_flows
2433 .vendor_invoices
2434 .iter()
2435 .map(|d| d.header.clone()),
2436 );
2437 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2438 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2439 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2440 headers.extend(
2441 document_flows
2442 .customer_invoices
2443 .iter()
2444 .map(|d| d.header.clone()),
2445 );
2446 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2447 if propagated > 0 {
2448 info!(
2449 "Propagated document-level fraud to {propagated} derived journal entries"
2450 );
2451 }
2452 }
2453 }
2454 }
2455
2456 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2458
2459 self.emit_phase_items(
2461 "anomaly_injection",
2462 "LabeledAnomaly",
2463 &anomaly_labels.labels,
2464 );
2465
2466 if self.config.fraud.propagate_to_document {
2474 use std::collections::HashMap;
2475 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2488 for je in &entries {
2489 if je.header.is_fraud {
2490 if let Some(ref fraud_type) = je.header.fraud_type {
2491 if let Some(ref reference) = je.header.reference {
2492 fraud_map.insert(reference.clone(), *fraud_type);
2494 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2497 if !bare.is_empty() {
2498 fraud_map.insert(bare.to_string(), *fraud_type);
2499 }
2500 }
2501 }
2502 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2504 }
2505 }
2506 }
2507 if !fraud_map.is_empty() {
2508 let mut propagated = 0usize;
2509 macro_rules! propagate_to {
2511 ($collection:expr) => {
2512 for doc in &mut $collection {
2513 if doc.header.propagate_fraud(&fraud_map) {
2514 propagated += 1;
2515 }
2516 }
2517 };
2518 }
2519 propagate_to!(document_flows.purchase_orders);
2520 propagate_to!(document_flows.goods_receipts);
2521 propagate_to!(document_flows.vendor_invoices);
2522 propagate_to!(document_flows.payments);
2523 propagate_to!(document_flows.sales_orders);
2524 propagate_to!(document_flows.deliveries);
2525 propagate_to!(document_flows.customer_invoices);
2526 if propagated > 0 {
2527 info!(
2528 "Propagated fraud labels to {} document flow records",
2529 propagated
2530 );
2531 }
2532 }
2533 }
2534
2535 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2537
2538 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2540
2541 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2543
2544 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2546
2547 let balance_validation = self.phase_balance_validation(&entries)?;
2549
2550 let subledger_reconciliation =
2552 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2553
2554 let (data_quality_stats, quality_issues) =
2556 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2557
2558 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2560
2561 {
2563 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2568 for je in &entries {
2569 if je.header.is_fraud || je.header.is_anomaly {
2570 continue;
2571 }
2572 let diff = (je.total_debit() - je.total_credit()).abs();
2573 if diff > tolerance {
2574 unbalanced_clean += 1;
2575 if unbalanced_clean <= 3 {
2576 warn!(
2577 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2578 je.header.document_id,
2579 je.total_debit(),
2580 je.total_credit(),
2581 diff
2582 );
2583 }
2584 }
2585 }
2586 if unbalanced_clean > 0 {
2587 return Err(datasynth_core::error::SynthError::generation(format!(
2588 "{} non-anomaly JEs are unbalanced (debits != credits). \
2589 First few logged above. Tolerance={}",
2590 unbalanced_clean, tolerance
2591 )));
2592 }
2593 debug!(
2594 "Phase 10c: All {} non-anomaly JEs individually balanced",
2595 entries
2596 .iter()
2597 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2598 .count()
2599 );
2600
2601 let company_codes: Vec<String> = self
2603 .config
2604 .companies
2605 .iter()
2606 .map(|c| c.code.clone())
2607 .collect();
2608 for company_code in &company_codes {
2609 let mut assets = rust_decimal::Decimal::ZERO;
2610 let mut liab_equity = rust_decimal::Decimal::ZERO;
2611
2612 for entry in &entries {
2613 if entry.header.company_code != *company_code {
2614 continue;
2615 }
2616 for line in &entry.lines {
2617 let acct = &line.gl_account;
2618 let net = line.debit_amount - line.credit_amount;
2619 if acct.starts_with('1') {
2621 assets += net;
2622 }
2623 else if acct.starts_with('2') || acct.starts_with('3') {
2625 liab_equity -= net; }
2627 }
2630 }
2631
2632 let bs_diff = (assets - liab_equity).abs();
2633 if bs_diff > tolerance {
2634 warn!(
2635 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2636 revenue/expense closing entries may not fully offset",
2637 company_code, assets, liab_equity, bs_diff
2638 );
2639 } else {
2643 debug!(
2644 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2645 company_code, assets, liab_equity, bs_diff
2646 );
2647 }
2648 }
2649
2650 info!("Phase 10c: All generation-time accounting assertions passed");
2651 }
2652
2653 let audit = self.phase_audit_data(&entries, &mut stats)?;
2655
2656 let mut banking = self.phase_banking_data(&mut stats)?;
2658
2659 if self.phase_config.generate_banking
2664 && !document_flows.payments.is_empty()
2665 && !banking.accounts.is_empty()
2666 {
2667 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2668 if bridge_rate > 0.0 {
2669 let mut bridge =
2670 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2671 self.seed,
2672 );
2673 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2674 &document_flows.payments,
2675 &banking.customers,
2676 &banking.accounts,
2677 bridge_rate,
2678 );
2679 info!(
2680 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2681 bridge_stats.bridged_count,
2682 bridge_stats.transactions_emitted,
2683 bridge_stats.fraud_propagated,
2684 );
2685 let bridged_count = bridged_txns.len();
2686 banking.transactions.extend(bridged_txns);
2687
2688 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2691 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2692 &mut banking.transactions,
2693 );
2694 }
2695
2696 banking.suspicious_count = banking
2698 .transactions
2699 .iter()
2700 .filter(|t| t.is_suspicious)
2701 .count();
2702 stats.banking_transaction_count = banking.transactions.len();
2703 stats.banking_suspicious_count = banking.suspicious_count;
2704 }
2705 }
2706
2707 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2709
2710 self.phase_llm_enrichment(&mut stats);
2712
2713 self.phase_diffusion_enhancement(&mut stats);
2715
2716 self.phase_causal_overlay(&mut stats);
2718
2719 let mut financial_reporting = self.phase_financial_reporting(
2723 &document_flows,
2724 &entries,
2725 &coa,
2726 &hr,
2727 &audit,
2728 &mut stats,
2729 )?;
2730
2731 {
2733 use datasynth_core::models::StatementType;
2734 for stmt in &financial_reporting.consolidated_statements {
2735 if stmt.statement_type == StatementType::BalanceSheet {
2736 let total_assets: rust_decimal::Decimal = stmt
2737 .line_items
2738 .iter()
2739 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2740 .map(|li| li.amount)
2741 .sum();
2742 let total_le: rust_decimal::Decimal = stmt
2743 .line_items
2744 .iter()
2745 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2746 .map(|li| li.amount)
2747 .sum();
2748 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2749 warn!(
2750 "BS equation imbalance: assets={}, L+E={}",
2751 total_assets, total_le
2752 );
2753 }
2754 }
2755 }
2756 }
2757
2758 let accounting_standards =
2760 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2761
2762 if !accounting_standards.ecl_journal_entries.is_empty() {
2764 debug!(
2765 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2766 accounting_standards.ecl_journal_entries.len()
2767 );
2768 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2769 }
2770
2771 if !accounting_standards.provision_journal_entries.is_empty() {
2773 debug!(
2774 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2775 accounting_standards.provision_journal_entries.len()
2776 );
2777 entries.extend(
2778 accounting_standards
2779 .provision_journal_entries
2780 .iter()
2781 .cloned(),
2782 );
2783 }
2784
2785 let mut ocpm = self.phase_ocpm_events(
2787 &document_flows,
2788 &sourcing,
2789 &hr,
2790 &manufacturing_snap,
2791 &banking,
2792 &audit,
2793 &financial_reporting,
2794 &mut stats,
2795 )?;
2796
2797 if let Some(ref event_log) = ocpm.event_log {
2799 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2800 }
2801
2802 if let Some(ref event_log) = ocpm.event_log {
2804 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2806 std::collections::HashMap::new();
2807 for (idx, event) in event_log.events.iter().enumerate() {
2808 if let Some(ref doc_ref) = event.document_ref {
2809 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2810 }
2811 }
2812
2813 if !doc_index.is_empty() {
2814 let mut annotated = 0usize;
2815 for entry in &mut entries {
2816 let doc_id_str = entry.header.document_id.to_string();
2817 let mut matched_indices: Vec<usize> = Vec::new();
2819 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2820 matched_indices.extend(indices);
2821 }
2822 if let Some(ref reference) = entry.header.reference {
2823 let bare_ref = reference
2824 .find(':')
2825 .map(|i| &reference[i + 1..])
2826 .unwrap_or(reference.as_str());
2827 if let Some(indices) = doc_index.get(bare_ref) {
2828 for &idx in indices {
2829 if !matched_indices.contains(&idx) {
2830 matched_indices.push(idx);
2831 }
2832 }
2833 }
2834 }
2835 if !matched_indices.is_empty() {
2837 for &idx in &matched_indices {
2838 let event = &event_log.events[idx];
2839 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2840 entry.header.ocpm_event_ids.push(event.event_id);
2841 }
2842 for obj_ref in &event.object_refs {
2843 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2844 entry.header.ocpm_object_ids.push(obj_ref.object_id);
2845 }
2846 }
2847 if entry.header.ocpm_case_id.is_none() {
2848 entry.header.ocpm_case_id = event.case_id;
2849 }
2850 }
2851 annotated += 1;
2852 }
2853 }
2854 debug!(
2855 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2856 annotated
2857 );
2858 }
2859 }
2860
2861 if let Some(ref mut event_log) = ocpm.event_log {
2865 let synthesized =
2866 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
2867 if synthesized > 0 {
2868 info!(
2869 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
2870 );
2871 }
2872
2873 let anomaly_events =
2878 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
2879 if anomaly_events > 0 {
2880 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
2881 }
2882
2883 let p2p_cfg = &self.config.ocpm.p2p_process;
2888 let any_imperfection = p2p_cfg.rework_probability > 0.0
2889 || p2p_cfg.skip_step_probability > 0.0
2890 || p2p_cfg.out_of_order_probability > 0.0;
2891 if any_imperfection {
2892 use rand_chacha::rand_core::SeedableRng;
2893 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
2894 rework_rate: p2p_cfg.rework_probability,
2895 skip_rate: p2p_cfg.skip_step_probability,
2896 out_of_order_rate: p2p_cfg.out_of_order_probability,
2897 };
2898 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
2899 let stats =
2900 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
2901 if stats.rework + stats.skipped + stats.out_of_order > 0 {
2902 info!(
2903 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
2904 stats.rework, stats.skipped, stats.out_of_order
2905 );
2906 }
2907 }
2908 }
2909
2910 let sales_kpi_budgets =
2912 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2913
2914 let treasury =
2918 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2919
2920 if !treasury.journal_entries.is_empty() {
2922 debug!(
2923 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2924 treasury.journal_entries.len()
2925 );
2926 entries.extend(treasury.journal_entries.iter().cloned());
2927 }
2928
2929 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2931
2932 if !tax.tax_posting_journal_entries.is_empty() {
2934 debug!(
2935 "Merging {} tax posting JEs into GL",
2936 tax.tax_posting_journal_entries.len()
2937 );
2938 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2939 }
2940
2941 {
2945 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2946
2947 let framework_str = {
2948 use datasynth_config::schema::AccountingFrameworkConfig;
2949 match self
2950 .config
2951 .accounting_standards
2952 .framework
2953 .unwrap_or_default()
2954 {
2955 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2956 "IFRS"
2957 }
2958 _ => "US_GAAP",
2959 }
2960 };
2961
2962 let depreciation_total: rust_decimal::Decimal = entries
2964 .iter()
2965 .filter(|je| je.header.document_type == "CL")
2966 .flat_map(|je| je.lines.iter())
2967 .filter(|l| l.gl_account.starts_with("6000"))
2968 .map(|l| l.debit_amount)
2969 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2970
2971 let interest_paid: rust_decimal::Decimal = entries
2973 .iter()
2974 .flat_map(|je| je.lines.iter())
2975 .filter(|l| l.gl_account.starts_with("7100"))
2976 .map(|l| l.debit_amount)
2977 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2978
2979 let tax_paid: rust_decimal::Decimal = entries
2981 .iter()
2982 .flat_map(|je| je.lines.iter())
2983 .filter(|l| l.gl_account.starts_with("8000"))
2984 .map(|l| l.debit_amount)
2985 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2986
2987 let capex: rust_decimal::Decimal = entries
2989 .iter()
2990 .flat_map(|je| je.lines.iter())
2991 .filter(|l| l.gl_account.starts_with("1500"))
2992 .map(|l| l.debit_amount)
2993 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2994
2995 let dividends_paid: rust_decimal::Decimal = entries
2997 .iter()
2998 .flat_map(|je| je.lines.iter())
2999 .filter(|l| l.gl_account == "2170")
3000 .map(|l| l.debit_amount)
3001 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3002
3003 let cf_data = CashFlowSourceData {
3004 depreciation_total,
3005 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3007 delta_ap: rust_decimal::Decimal::ZERO,
3008 delta_inventory: rust_decimal::Decimal::ZERO,
3009 capex,
3010 debt_issuance: rust_decimal::Decimal::ZERO,
3011 debt_repayment: rust_decimal::Decimal::ZERO,
3012 interest_paid,
3013 tax_paid,
3014 dividends_paid,
3015 framework: framework_str.to_string(),
3016 };
3017
3018 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3019 if !enhanced_cf_items.is_empty() {
3020 use datasynth_core::models::StatementType;
3022 let merge_count = enhanced_cf_items.len();
3023 for stmt in financial_reporting
3024 .financial_statements
3025 .iter_mut()
3026 .chain(financial_reporting.consolidated_statements.iter_mut())
3027 .chain(
3028 financial_reporting
3029 .standalone_statements
3030 .values_mut()
3031 .flat_map(|v| v.iter_mut()),
3032 )
3033 {
3034 if stmt.statement_type == StatementType::CashFlowStatement {
3035 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3036 }
3037 }
3038 info!(
3039 "Enhanced cash flow: {} supplementary items merged into CF statements",
3040 merge_count
3041 );
3042 }
3043 }
3044
3045 self.generate_notes_to_financial_statements(
3048 &mut financial_reporting,
3049 &accounting_standards,
3050 &tax,
3051 &hr,
3052 &audit,
3053 &treasury,
3054 );
3055
3056 if self.config.companies.len() >= 2 && !entries.is_empty() {
3060 let companies: Vec<(String, String)> = self
3061 .config
3062 .companies
3063 .iter()
3064 .map(|c| (c.code.clone(), c.name.clone()))
3065 .collect();
3066 let ic_elim: rust_decimal::Decimal =
3067 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3068 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3069 .unwrap_or(NaiveDate::MIN);
3070 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3071 let period_label = format!(
3072 "{}-{:02}",
3073 end_date.year(),
3074 (end_date - chrono::Days::new(1)).month()
3075 );
3076
3077 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3078 let (je_segments, je_recon) =
3079 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3080 if !je_segments.is_empty() {
3081 info!(
3082 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3083 je_segments.len(),
3084 ic_elim,
3085 );
3086 if financial_reporting.segment_reports.is_empty() {
3088 financial_reporting.segment_reports = je_segments;
3089 financial_reporting.segment_reconciliations = vec![je_recon];
3090 } else {
3091 financial_reporting.segment_reports.extend(je_segments);
3092 financial_reporting.segment_reconciliations.push(je_recon);
3093 }
3094 }
3095 }
3096
3097 let esg_snap =
3099 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3100
3101 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3103
3104 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3106
3107 let disruption_events = self.phase_disruption_events(&mut stats)?;
3109
3110 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3112
3113 let (entity_relationship_graph, cross_process_links) =
3115 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3116
3117 let industry_output = self.phase_industry_data(&mut stats);
3119
3120 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3122
3123 if self.config.diffusion.enabled
3125 && (self.config.diffusion.backend == "neural"
3126 || self.config.diffusion.backend == "hybrid")
3127 {
3128 let neural = &self.config.diffusion.neural;
3129 const VALID_STRATEGIES: &[&str] = &["weighted_average", "column_select", "threshold"];
3132 if !VALID_STRATEGIES.contains(&neural.hybrid_strategy.as_str()) {
3133 warn!(
3134 "Unknown diffusion.neural.hybrid_strategy='{}' — expected one of {:?}; \
3135 falling back to 'weighted_average'.",
3136 neural.hybrid_strategy, VALID_STRATEGIES
3137 );
3138 }
3139 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3140 if (weight - neural.hybrid_weight).abs() > f64::EPSILON {
3141 warn!(
3142 "diffusion.neural.hybrid_weight={} clamped to [0,1] → {}",
3143 neural.hybrid_weight, weight
3144 );
3145 }
3146 info!(
3147 "Phase neural enhancement: backend={} strategy={} weight={:.2} columns={} \
3148 (neural_columns: {:?})",
3149 self.config.diffusion.backend,
3150 neural.hybrid_strategy,
3151 weight,
3152 neural.neural_columns.len(),
3153 neural.neural_columns,
3154 );
3155 stats.neural_hybrid_weight = Some(weight);
3156 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3157 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3158 }
3166
3167 self.phase_hypergraph_export(
3169 &coa,
3170 &entries,
3171 &document_flows,
3172 &sourcing,
3173 &hr,
3174 &manufacturing_snap,
3175 &banking,
3176 &audit,
3177 &financial_reporting,
3178 &ocpm,
3179 &compliance_regulations,
3180 &mut stats,
3181 )?;
3182
3183 if self.phase_config.generate_graph_export {
3186 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3187 }
3188
3189 if self.config.streaming.enabled {
3191 info!("Note: streaming config is enabled but batch mode does not use it");
3192 }
3193 if self.config.vendor_network.enabled {
3194 debug!("Vendor network config available; relationship graph generation is partial");
3195 }
3196 if self.config.customer_segmentation.enabled {
3197 debug!("Customer segmentation config available; segment-aware generation is partial");
3198 }
3199
3200 let resource_stats = self.resource_guard.stats();
3202 info!(
3203 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3204 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3205 resource_stats.disk.estimated_bytes_written,
3206 resource_stats.degradation_level
3207 );
3208
3209 if let Some(ref sink) = self.phase_sink {
3211 if let Err(e) = sink.flush() {
3212 warn!("Stream sink flush failed: {e}");
3213 }
3214 }
3215
3216 let lineage = self.build_lineage_graph();
3218
3219 let gate_result = if self.config.quality_gates.enabled {
3221 let profile_name = &self.config.quality_gates.profile;
3222 match datasynth_eval::gates::get_profile(profile_name) {
3223 Some(profile) => {
3224 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3226
3227 if balance_validation.validated {
3229 eval.coherence.balance =
3230 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3231 equation_balanced: balance_validation.is_balanced,
3232 max_imbalance: (balance_validation.total_debits
3233 - balance_validation.total_credits)
3234 .abs(),
3235 periods_evaluated: 1,
3236 periods_imbalanced: if balance_validation.is_balanced {
3237 0
3238 } else {
3239 1
3240 },
3241 period_results: Vec::new(),
3242 companies_evaluated: self.config.companies.len(),
3243 });
3244 }
3245
3246 eval.coherence.passes = balance_validation.is_balanced;
3248 if !balance_validation.is_balanced {
3249 eval.coherence
3250 .failures
3251 .push("Balance sheet equation not satisfied".to_string());
3252 }
3253
3254 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3256 eval.statistical.passes = !entries.is_empty();
3257
3258 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3261
3262 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3263 info!(
3264 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3265 profile_name, result.gates_passed, result.gates_total, result.summary
3266 );
3267 Some(result)
3268 }
3269 None => {
3270 warn!(
3271 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3272 profile_name
3273 );
3274 None
3275 }
3276 }
3277 } else {
3278 None
3279 };
3280
3281 let internal_controls = if self.config.internal_controls.enabled {
3283 InternalControl::standard_controls()
3284 } else {
3285 Vec::new()
3286 };
3287
3288 Ok(EnhancedGenerationResult {
3289 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3290 master_data: std::mem::take(&mut self.master_data),
3291 document_flows,
3292 subledger,
3293 ocpm,
3294 audit,
3295 banking,
3296 graph_export,
3297 sourcing,
3298 financial_reporting,
3299 hr,
3300 accounting_standards,
3301 manufacturing: manufacturing_snap,
3302 sales_kpi_budgets,
3303 tax,
3304 esg: esg_snap,
3305 treasury,
3306 project_accounting,
3307 process_evolution,
3308 organizational_events,
3309 disruption_events,
3310 intercompany,
3311 journal_entries: entries,
3312 anomaly_labels,
3313 balance_validation,
3314 data_quality_stats,
3315 quality_issues,
3316 statistics: stats,
3317 lineage: Some(lineage),
3318 gate_result,
3319 internal_controls,
3320 sod_violations,
3321 opening_balances,
3322 subledger_reconciliation,
3323 counterfactual_pairs,
3324 red_flags,
3325 collusion_rings,
3326 temporal_vendor_chains,
3327 entity_relationship_graph,
3328 cross_process_links,
3329 industry_output,
3330 compliance_regulations,
3331 })
3332 }
3333
3334 fn phase_chart_of_accounts(
3340 &mut self,
3341 stats: &mut EnhancedGenerationStatistics,
3342 ) -> SynthResult<Arc<ChartOfAccounts>> {
3343 info!("Phase 1: Generating Chart of Accounts");
3344 let coa = self.generate_coa()?;
3345 stats.accounts_count = coa.account_count();
3346 info!(
3347 "Chart of Accounts generated: {} accounts",
3348 stats.accounts_count
3349 );
3350 self.check_resources_with_log("post-coa")?;
3351 Ok(coa)
3352 }
3353
3354 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3356 if self.phase_config.generate_master_data {
3357 info!("Phase 2: Generating Master Data");
3358 self.generate_master_data()?;
3359 stats.vendor_count = self.master_data.vendors.len();
3360 stats.customer_count = self.master_data.customers.len();
3361 stats.material_count = self.master_data.materials.len();
3362 stats.asset_count = self.master_data.assets.len();
3363 stats.employee_count = self.master_data.employees.len();
3364 info!(
3365 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3366 stats.vendor_count, stats.customer_count, stats.material_count,
3367 stats.asset_count, stats.employee_count
3368 );
3369 self.check_resources_with_log("post-master-data")?;
3370 } else {
3371 debug!("Phase 2: Skipped (master data generation disabled)");
3372 }
3373 Ok(())
3374 }
3375
3376 fn phase_document_flows(
3378 &mut self,
3379 stats: &mut EnhancedGenerationStatistics,
3380 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3381 let mut document_flows = DocumentFlowSnapshot::default();
3382 let mut subledger = SubledgerSnapshot::default();
3383 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3386
3387 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3388 info!("Phase 3: Generating Document Flows");
3389 self.generate_document_flows(&mut document_flows)?;
3390 stats.p2p_chain_count = document_flows.p2p_chains.len();
3391 stats.o2c_chain_count = document_flows.o2c_chains.len();
3392 info!(
3393 "Document flows generated: {} P2P chains, {} O2C chains",
3394 stats.p2p_chain_count, stats.o2c_chain_count
3395 );
3396
3397 debug!("Phase 3b: Linking document flows to subledgers");
3399 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3400 stats.ap_invoice_count = subledger.ap_invoices.len();
3401 stats.ar_invoice_count = subledger.ar_invoices.len();
3402 debug!(
3403 "Subledgers linked: {} AP invoices, {} AR invoices",
3404 stats.ap_invoice_count, stats.ar_invoice_count
3405 );
3406
3407 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3412 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3413 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3414 debug!("Payment settlements applied to AP and AR subledgers");
3415
3416 if let Ok(start_date) =
3419 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3420 {
3421 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3422 - chrono::Days::new(1);
3423 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3424 for company in &self.config.companies {
3431 let ar_report = ARAgingReport::from_invoices(
3432 company.code.clone(),
3433 &subledger.ar_invoices,
3434 as_of_date,
3435 );
3436 subledger.ar_aging_reports.push(ar_report);
3437
3438 let ap_report = APAgingReport::from_invoices(
3439 company.code.clone(),
3440 &subledger.ap_invoices,
3441 as_of_date,
3442 );
3443 subledger.ap_aging_reports.push(ap_report);
3444 }
3445 debug!(
3446 "AR/AP aging reports built: {} AR, {} AP",
3447 subledger.ar_aging_reports.len(),
3448 subledger.ap_aging_reports.len()
3449 );
3450
3451 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3453 {
3454 use datasynth_generators::DunningGenerator;
3455 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3456 for company in &self.config.companies {
3457 let currency = company.currency.as_str();
3458 let mut company_invoices: Vec<
3461 datasynth_core::models::subledger::ar::ARInvoice,
3462 > = subledger
3463 .ar_invoices
3464 .iter()
3465 .filter(|inv| inv.company_code == company.code)
3466 .cloned()
3467 .collect();
3468
3469 if company_invoices.is_empty() {
3470 continue;
3471 }
3472
3473 let result = dunning_gen.execute_dunning_run(
3474 &company.code,
3475 as_of_date,
3476 &mut company_invoices,
3477 currency,
3478 );
3479
3480 for updated in &company_invoices {
3482 if let Some(orig) = subledger
3483 .ar_invoices
3484 .iter_mut()
3485 .find(|i| i.invoice_number == updated.invoice_number)
3486 {
3487 orig.dunning_info = updated.dunning_info.clone();
3488 }
3489 }
3490
3491 subledger.dunning_runs.push(result.dunning_run);
3492 subledger.dunning_letters.extend(result.letters);
3493 dunning_journal_entries.extend(result.journal_entries);
3495 }
3496 debug!(
3497 "Dunning runs complete: {} runs, {} letters",
3498 subledger.dunning_runs.len(),
3499 subledger.dunning_letters.len()
3500 );
3501 }
3502 }
3503
3504 self.check_resources_with_log("post-document-flows")?;
3505 } else {
3506 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3507 }
3508
3509 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3511 if !self.master_data.assets.is_empty() {
3512 debug!("Generating FA subledger records");
3513 let company_code = self
3514 .config
3515 .companies
3516 .first()
3517 .map(|c| c.code.as_str())
3518 .unwrap_or("1000");
3519 let currency = self
3520 .config
3521 .companies
3522 .first()
3523 .map(|c| c.currency.as_str())
3524 .unwrap_or("USD");
3525
3526 let mut fa_gen = datasynth_generators::FAGenerator::new(
3527 datasynth_generators::FAGeneratorConfig::default(),
3528 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3529 );
3530
3531 for asset in &self.master_data.assets {
3532 let (record, je) = fa_gen.generate_asset_acquisition(
3533 company_code,
3534 &format!("{:?}", asset.asset_class),
3535 &asset.description,
3536 asset.acquisition_date,
3537 currency,
3538 asset.cost_center.as_deref(),
3539 );
3540 subledger.fa_records.push(record);
3541 fa_journal_entries.push(je);
3542 }
3543
3544 stats.fa_subledger_count = subledger.fa_records.len();
3545 debug!(
3546 "FA subledger records generated: {} (with {} acquisition JEs)",
3547 stats.fa_subledger_count,
3548 fa_journal_entries.len()
3549 );
3550 }
3551
3552 if !self.master_data.materials.is_empty() {
3554 debug!("Generating Inventory subledger records");
3555 let first_company = self.config.companies.first();
3556 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3557 let inv_currency = first_company
3558 .map(|c| c.currency.clone())
3559 .unwrap_or_else(|| "USD".to_string());
3560
3561 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3562 datasynth_generators::InventoryGeneratorConfig::default(),
3563 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3564 inv_currency.clone(),
3565 );
3566
3567 for (i, material) in self.master_data.materials.iter().enumerate() {
3568 let plant = format!("PLANT{:02}", (i % 3) + 1);
3569 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3570 let initial_qty = rust_decimal::Decimal::from(
3571 material
3572 .safety_stock
3573 .to_string()
3574 .parse::<i64>()
3575 .unwrap_or(100),
3576 );
3577
3578 let position = inv_gen.generate_position(
3579 company_code,
3580 &plant,
3581 &storage_loc,
3582 &material.material_id,
3583 &material.description,
3584 initial_qty,
3585 Some(material.standard_cost),
3586 &inv_currency,
3587 );
3588 subledger.inventory_positions.push(position);
3589 }
3590
3591 stats.inventory_subledger_count = subledger.inventory_positions.len();
3592 debug!(
3593 "Inventory subledger records generated: {}",
3594 stats.inventory_subledger_count
3595 );
3596 }
3597
3598 if !subledger.fa_records.is_empty() {
3600 if let Ok(start_date) =
3601 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3602 {
3603 let company_code = self
3604 .config
3605 .companies
3606 .first()
3607 .map(|c| c.code.as_str())
3608 .unwrap_or("1000");
3609 let fiscal_year = start_date.year();
3610 let start_period = start_date.month();
3611 let end_period =
3612 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3613
3614 let depr_cfg = FaDepreciationScheduleConfig {
3615 fiscal_year,
3616 start_period,
3617 end_period,
3618 seed_offset: 800,
3619 };
3620 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3621 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3622 let run_count = runs.len();
3623 subledger.depreciation_runs = runs;
3624 debug!(
3625 "Depreciation runs generated: {} runs for {} periods",
3626 run_count, self.config.global.period_months
3627 );
3628 }
3629 }
3630
3631 if !subledger.inventory_positions.is_empty() {
3633 if let Ok(start_date) =
3634 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3635 {
3636 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3637 - chrono::Days::new(1);
3638
3639 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3640 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3641
3642 for company in &self.config.companies {
3643 let result = inv_val_gen.generate(
3644 &company.code,
3645 &subledger.inventory_positions,
3646 as_of_date,
3647 );
3648 subledger.inventory_valuations.push(result);
3649 }
3650 debug!(
3651 "Inventory valuations generated: {} company reports",
3652 subledger.inventory_valuations.len()
3653 );
3654 }
3655 }
3656
3657 Ok((document_flows, subledger, fa_journal_entries))
3658 }
3659
3660 #[allow(clippy::too_many_arguments)]
3662 fn phase_ocpm_events(
3663 &mut self,
3664 document_flows: &DocumentFlowSnapshot,
3665 sourcing: &SourcingSnapshot,
3666 hr: &HrSnapshot,
3667 manufacturing: &ManufacturingSnapshot,
3668 banking: &BankingSnapshot,
3669 audit: &AuditSnapshot,
3670 financial_reporting: &FinancialReportingSnapshot,
3671 stats: &mut EnhancedGenerationStatistics,
3672 ) -> SynthResult<OcpmSnapshot> {
3673 let degradation = self.check_resources()?;
3674 if degradation >= DegradationLevel::Reduced {
3675 debug!(
3676 "Phase skipped due to resource pressure (degradation: {:?})",
3677 degradation
3678 );
3679 return Ok(OcpmSnapshot::default());
3680 }
3681 if self.phase_config.generate_ocpm_events {
3682 info!("Phase 3c: Generating OCPM Events");
3683 let ocpm_snapshot = self.generate_ocpm_events(
3684 document_flows,
3685 sourcing,
3686 hr,
3687 manufacturing,
3688 banking,
3689 audit,
3690 financial_reporting,
3691 )?;
3692 stats.ocpm_event_count = ocpm_snapshot.event_count;
3693 stats.ocpm_object_count = ocpm_snapshot.object_count;
3694 stats.ocpm_case_count = ocpm_snapshot.case_count;
3695 info!(
3696 "OCPM events generated: {} events, {} objects, {} cases",
3697 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3698 );
3699 self.check_resources_with_log("post-ocpm")?;
3700 Ok(ocpm_snapshot)
3701 } else {
3702 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3703 Ok(OcpmSnapshot::default())
3704 }
3705 }
3706
3707 fn phase_journal_entries(
3709 &mut self,
3710 coa: &Arc<ChartOfAccounts>,
3711 document_flows: &DocumentFlowSnapshot,
3712 _stats: &mut EnhancedGenerationStatistics,
3713 ) -> SynthResult<Vec<JournalEntry>> {
3714 let mut entries = Vec::new();
3715
3716 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3718 debug!("Phase 4a: Generating JEs from document flows");
3719 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3720 debug!("Generated {} JEs from document flows", flow_entries.len());
3721 entries.extend(flow_entries);
3722 }
3723
3724 if self.phase_config.generate_journal_entries {
3726 info!("Phase 4: Generating Journal Entries");
3727 let je_entries = self.generate_journal_entries(coa)?;
3728 info!("Generated {} standalone journal entries", je_entries.len());
3729 entries.extend(je_entries);
3730 } else {
3731 debug!("Phase 4: Skipped (journal entry generation disabled)");
3732 }
3733
3734 if !entries.is_empty() {
3735 self.check_resources_with_log("post-journal-entries")?;
3738 }
3739
3740 Ok(entries)
3741 }
3742
3743 fn phase_anomaly_injection(
3745 &mut self,
3746 entries: &mut [JournalEntry],
3747 actions: &DegradationActions,
3748 stats: &mut EnhancedGenerationStatistics,
3749 ) -> SynthResult<AnomalyLabels> {
3750 if self.phase_config.inject_anomalies
3751 && !entries.is_empty()
3752 && !actions.skip_anomaly_injection
3753 {
3754 info!("Phase 5: Injecting Anomalies");
3755 let result = self.inject_anomalies(entries)?;
3756 stats.anomalies_injected = result.labels.len();
3757 info!("Injected {} anomalies", stats.anomalies_injected);
3758 self.check_resources_with_log("post-anomaly-injection")?;
3759 Ok(result)
3760 } else if actions.skip_anomaly_injection {
3761 warn!("Phase 5: Skipped due to resource degradation");
3762 Ok(AnomalyLabels::default())
3763 } else {
3764 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3765 Ok(AnomalyLabels::default())
3766 }
3767 }
3768
3769 fn phase_balance_validation(
3771 &mut self,
3772 entries: &[JournalEntry],
3773 ) -> SynthResult<BalanceValidationResult> {
3774 if self.phase_config.validate_balances && !entries.is_empty() {
3775 debug!("Phase 6: Validating Balances");
3776 let balance_validation = self.validate_journal_entries(entries)?;
3777 if balance_validation.is_balanced {
3778 debug!("Balance validation passed");
3779 } else {
3780 warn!(
3781 "Balance validation found {} errors",
3782 balance_validation.validation_errors.len()
3783 );
3784 }
3785 Ok(balance_validation)
3786 } else {
3787 Ok(BalanceValidationResult::default())
3788 }
3789 }
3790
3791 fn phase_data_quality_injection(
3793 &mut self,
3794 entries: &mut [JournalEntry],
3795 actions: &DegradationActions,
3796 stats: &mut EnhancedGenerationStatistics,
3797 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3798 if self.phase_config.inject_data_quality
3799 && !entries.is_empty()
3800 && !actions.skip_data_quality
3801 {
3802 info!("Phase 7: Injecting Data Quality Variations");
3803 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3804 stats.data_quality_issues = dq_stats.records_with_issues;
3805 info!("Injected {} data quality issues", stats.data_quality_issues);
3806 self.check_resources_with_log("post-data-quality")?;
3807 Ok((dq_stats, quality_issues))
3808 } else if actions.skip_data_quality {
3809 warn!("Phase 7: Skipped due to resource degradation");
3810 Ok((DataQualityStats::default(), Vec::new()))
3811 } else {
3812 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3813 Ok((DataQualityStats::default(), Vec::new()))
3814 }
3815 }
3816
3817 fn phase_period_close(
3827 &mut self,
3828 entries: &mut Vec<JournalEntry>,
3829 subledger: &SubledgerSnapshot,
3830 stats: &mut EnhancedGenerationStatistics,
3831 ) -> SynthResult<()> {
3832 if !self.phase_config.generate_period_close || entries.is_empty() {
3833 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3834 return Ok(());
3835 }
3836
3837 info!("Phase 10b: Generating period-close journal entries");
3838
3839 use datasynth_core::accounts::{
3840 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3841 };
3842 use rust_decimal::Decimal;
3843
3844 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3845 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3846 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3847 let close_date = end_date - chrono::Days::new(1);
3849
3850 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3855 .config
3856 .companies
3857 .iter()
3858 .map(|c| c.code.clone())
3859 .collect();
3860
3861 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3863 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3864
3865 let period_months = self.config.global.period_months;
3869 for asset in &subledger.fa_records {
3870 use datasynth_core::models::subledger::fa::AssetStatus;
3872 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3873 continue;
3874 }
3875 let useful_life_months = asset.useful_life_months();
3876 if useful_life_months == 0 {
3877 continue;
3879 }
3880 let salvage_value = asset.salvage_value();
3881 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3882 if depreciable_base == Decimal::ZERO {
3883 continue;
3884 }
3885 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3886 * Decimal::from(period_months))
3887 .round_dp(2);
3888 if period_depr <= Decimal::ZERO {
3889 continue;
3890 }
3891
3892 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3893 depr_header.document_type = "CL".to_string();
3894 depr_header.header_text = Some(format!(
3895 "Depreciation - {} {}",
3896 asset.asset_number, asset.description
3897 ));
3898 depr_header.created_by = "CLOSE_ENGINE".to_string();
3899 depr_header.source = TransactionSource::Automated;
3900 depr_header.business_process = Some(BusinessProcess::R2R);
3901
3902 let doc_id = depr_header.document_id;
3903 let mut depr_je = JournalEntry::new(depr_header);
3904
3905 depr_je.add_line(JournalEntryLine::debit(
3907 doc_id,
3908 1,
3909 expense_accounts::DEPRECIATION.to_string(),
3910 period_depr,
3911 ));
3912 depr_je.add_line(JournalEntryLine::credit(
3914 doc_id,
3915 2,
3916 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3917 period_depr,
3918 ));
3919
3920 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3921 close_jes.push(depr_je);
3922 }
3923
3924 if !subledger.fa_records.is_empty() {
3925 debug!(
3926 "Generated {} depreciation JEs from {} FA records",
3927 close_jes.len(),
3928 subledger.fa_records.len()
3929 );
3930 }
3931
3932 {
3936 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3937 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3938
3939 let accrual_items: &[(&str, &str, &str)] = &[
3941 ("Accrued Utilities", "6200", "2100"),
3942 ("Accrued Rent", "6300", "2100"),
3943 ("Accrued Interest", "6100", "2150"),
3944 ];
3945
3946 for company_code in &company_codes {
3947 let company_revenue: Decimal = entries
3949 .iter()
3950 .filter(|e| e.header.company_code == *company_code)
3951 .flat_map(|e| e.lines.iter())
3952 .filter(|l| l.gl_account.starts_with('4'))
3953 .map(|l| l.credit_amount - l.debit_amount)
3954 .fold(Decimal::ZERO, |acc, v| acc + v);
3955
3956 if company_revenue <= Decimal::ZERO {
3957 continue;
3958 }
3959
3960 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3962 if accrual_base <= Decimal::ZERO {
3963 continue;
3964 }
3965
3966 for (description, expense_acct, liability_acct) in accrual_items {
3967 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3968 company_code,
3969 description,
3970 accrual_base,
3971 expense_acct,
3972 liability_acct,
3973 close_date,
3974 None,
3975 );
3976 close_jes.push(accrual_je);
3977 if let Some(rev_je) = reversal_je {
3978 close_jes.push(rev_je);
3979 }
3980 }
3981 }
3982
3983 debug!(
3984 "Generated accrual entries for {} companies",
3985 company_codes.len()
3986 );
3987 }
3988
3989 for company_code in &company_codes {
3990 let mut total_revenue = Decimal::ZERO;
3995 let mut total_expenses = Decimal::ZERO;
3996
3997 for entry in entries.iter() {
3998 if entry.header.company_code != *company_code {
3999 continue;
4000 }
4001 for line in &entry.lines {
4002 let category = AccountCategory::from_account(&line.gl_account);
4003 match category {
4004 AccountCategory::Revenue => {
4005 total_revenue += line.credit_amount - line.debit_amount;
4007 }
4008 AccountCategory::Cogs
4009 | AccountCategory::OperatingExpense
4010 | AccountCategory::OtherIncomeExpense
4011 | AccountCategory::Tax => {
4012 total_expenses += line.debit_amount - line.credit_amount;
4014 }
4015 _ => {}
4016 }
4017 }
4018 }
4019
4020 let pre_tax_income = total_revenue - total_expenses;
4021
4022 if pre_tax_income == Decimal::ZERO {
4024 debug!(
4025 "Company {}: no pre-tax income, skipping period close",
4026 company_code
4027 );
4028 continue;
4029 }
4030
4031 if pre_tax_income > Decimal::ZERO {
4033 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4035
4036 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4037 tax_header.document_type = "CL".to_string();
4038 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4039 tax_header.created_by = "CLOSE_ENGINE".to_string();
4040 tax_header.source = TransactionSource::Automated;
4041 tax_header.business_process = Some(BusinessProcess::R2R);
4042
4043 let doc_id = tax_header.document_id;
4044 let mut tax_je = JournalEntry::new(tax_header);
4045
4046 tax_je.add_line(JournalEntryLine::debit(
4048 doc_id,
4049 1,
4050 tax_accounts::TAX_EXPENSE.to_string(),
4051 tax_amount,
4052 ));
4053 tax_je.add_line(JournalEntryLine::credit(
4055 doc_id,
4056 2,
4057 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4058 tax_amount,
4059 ));
4060
4061 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4062 close_jes.push(tax_je);
4063 } else {
4064 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4067 if dta_amount > Decimal::ZERO {
4068 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4069 dta_header.document_type = "CL".to_string();
4070 dta_header.header_text =
4071 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4072 dta_header.created_by = "CLOSE_ENGINE".to_string();
4073 dta_header.source = TransactionSource::Automated;
4074 dta_header.business_process = Some(BusinessProcess::R2R);
4075
4076 let doc_id = dta_header.document_id;
4077 let mut dta_je = JournalEntry::new(dta_header);
4078
4079 dta_je.add_line(JournalEntryLine::debit(
4081 doc_id,
4082 1,
4083 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4084 dta_amount,
4085 ));
4086 dta_je.add_line(JournalEntryLine::credit(
4089 doc_id,
4090 2,
4091 tax_accounts::TAX_EXPENSE.to_string(),
4092 dta_amount,
4093 ));
4094
4095 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4096 close_jes.push(dta_je);
4097 debug!(
4098 "Company {}: loss year — recognised DTA of {}",
4099 company_code, dta_amount
4100 );
4101 }
4102 }
4103
4104 let tax_provision = if pre_tax_income > Decimal::ZERO {
4110 (pre_tax_income * tax_rate).round_dp(2)
4111 } else {
4112 Decimal::ZERO
4113 };
4114 let net_income = pre_tax_income - tax_provision;
4115
4116 if net_income > Decimal::ZERO {
4117 use datasynth_generators::DividendGenerator;
4118 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4120 let currency_str = self
4121 .config
4122 .companies
4123 .iter()
4124 .find(|c| c.code == *company_code)
4125 .map(|c| c.currency.as_str())
4126 .unwrap_or("USD");
4127 let div_result = div_gen.generate(
4128 company_code,
4129 close_date,
4130 Decimal::new(1, 0), dividend_amount,
4132 currency_str,
4133 );
4134 let div_je_count = div_result.journal_entries.len();
4135 close_jes.extend(div_result.journal_entries);
4136 debug!(
4137 "Company {}: declared dividend of {} ({} JEs)",
4138 company_code, dividend_amount, div_je_count
4139 );
4140 }
4141
4142 if net_income != Decimal::ZERO {
4147 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4148 close_header.document_type = "CL".to_string();
4149 close_header.header_text =
4150 Some(format!("Income statement close - {}", company_code));
4151 close_header.created_by = "CLOSE_ENGINE".to_string();
4152 close_header.source = TransactionSource::Automated;
4153 close_header.business_process = Some(BusinessProcess::R2R);
4154
4155 let doc_id = close_header.document_id;
4156 let mut close_je = JournalEntry::new(close_header);
4157
4158 let abs_net_income = net_income.abs();
4159
4160 if net_income > Decimal::ZERO {
4161 close_je.add_line(JournalEntryLine::debit(
4163 doc_id,
4164 1,
4165 equity_accounts::INCOME_SUMMARY.to_string(),
4166 abs_net_income,
4167 ));
4168 close_je.add_line(JournalEntryLine::credit(
4169 doc_id,
4170 2,
4171 equity_accounts::RETAINED_EARNINGS.to_string(),
4172 abs_net_income,
4173 ));
4174 } else {
4175 close_je.add_line(JournalEntryLine::debit(
4177 doc_id,
4178 1,
4179 equity_accounts::RETAINED_EARNINGS.to_string(),
4180 abs_net_income,
4181 ));
4182 close_je.add_line(JournalEntryLine::credit(
4183 doc_id,
4184 2,
4185 equity_accounts::INCOME_SUMMARY.to_string(),
4186 abs_net_income,
4187 ));
4188 }
4189
4190 debug_assert!(
4191 close_je.is_balanced(),
4192 "Income statement closing JE must be balanced"
4193 );
4194 close_jes.push(close_je);
4195 }
4196 }
4197
4198 let close_count = close_jes.len();
4199 if close_count > 0 {
4200 info!("Generated {} period-close journal entries", close_count);
4201 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4202 entries.extend(close_jes);
4203 stats.period_close_je_count = close_count;
4204
4205 stats.total_entries = entries.len() as u64;
4207 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4208 } else {
4209 debug!("No period-close entries generated (no income statement activity)");
4210 }
4211
4212 Ok(())
4213 }
4214
4215 fn phase_audit_data(
4217 &mut self,
4218 entries: &[JournalEntry],
4219 stats: &mut EnhancedGenerationStatistics,
4220 ) -> SynthResult<AuditSnapshot> {
4221 if self.phase_config.generate_audit {
4222 info!("Phase 8: Generating Audit Data");
4223 let audit_snapshot = self.generate_audit_data(entries)?;
4224 stats.audit_engagement_count = audit_snapshot.engagements.len();
4225 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4226 stats.audit_evidence_count = audit_snapshot.evidence.len();
4227 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4228 stats.audit_finding_count = audit_snapshot.findings.len();
4229 stats.audit_judgment_count = audit_snapshot.judgments.len();
4230 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4231 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4232 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4233 stats.audit_sample_count = audit_snapshot.samples.len();
4234 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4235 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4236 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4237 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4238 stats.audit_related_party_transaction_count =
4239 audit_snapshot.related_party_transactions.len();
4240 info!(
4241 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4242 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4243 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4244 {} RP transactions",
4245 stats.audit_engagement_count,
4246 stats.audit_workpaper_count,
4247 stats.audit_evidence_count,
4248 stats.audit_risk_count,
4249 stats.audit_finding_count,
4250 stats.audit_judgment_count,
4251 stats.audit_confirmation_count,
4252 stats.audit_procedure_step_count,
4253 stats.audit_sample_count,
4254 stats.audit_analytical_result_count,
4255 stats.audit_ia_function_count,
4256 stats.audit_ia_report_count,
4257 stats.audit_related_party_count,
4258 stats.audit_related_party_transaction_count,
4259 );
4260 self.check_resources_with_log("post-audit")?;
4261 Ok(audit_snapshot)
4262 } else {
4263 debug!("Phase 8: Skipped (audit generation disabled)");
4264 Ok(AuditSnapshot::default())
4265 }
4266 }
4267
4268 fn phase_banking_data(
4270 &mut self,
4271 stats: &mut EnhancedGenerationStatistics,
4272 ) -> SynthResult<BankingSnapshot> {
4273 if self.phase_config.generate_banking {
4274 info!("Phase 9: Generating Banking KYC/AML Data");
4275 let banking_snapshot = self.generate_banking_data()?;
4276 stats.banking_customer_count = banking_snapshot.customers.len();
4277 stats.banking_account_count = banking_snapshot.accounts.len();
4278 stats.banking_transaction_count = banking_snapshot.transactions.len();
4279 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4280 info!(
4281 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4282 stats.banking_customer_count, stats.banking_account_count,
4283 stats.banking_transaction_count, stats.banking_suspicious_count
4284 );
4285 self.check_resources_with_log("post-banking")?;
4286 Ok(banking_snapshot)
4287 } else {
4288 debug!("Phase 9: Skipped (banking generation disabled)");
4289 Ok(BankingSnapshot::default())
4290 }
4291 }
4292
4293 fn phase_graph_export(
4295 &mut self,
4296 entries: &[JournalEntry],
4297 coa: &Arc<ChartOfAccounts>,
4298 stats: &mut EnhancedGenerationStatistics,
4299 ) -> SynthResult<GraphExportSnapshot> {
4300 if self.phase_config.generate_graph_export && !entries.is_empty() {
4301 info!("Phase 10: Exporting Accounting Network Graphs");
4302 match self.export_graphs(entries, coa, stats) {
4303 Ok(snapshot) => {
4304 info!(
4305 "Graph export complete: {} graphs ({} nodes, {} edges)",
4306 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4307 );
4308 Ok(snapshot)
4309 }
4310 Err(e) => {
4311 warn!("Phase 10: Graph export failed: {}", e);
4312 Ok(GraphExportSnapshot::default())
4313 }
4314 }
4315 } else {
4316 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4317 Ok(GraphExportSnapshot::default())
4318 }
4319 }
4320
4321 #[allow(clippy::too_many_arguments)]
4323 fn phase_hypergraph_export(
4324 &self,
4325 coa: &Arc<ChartOfAccounts>,
4326 entries: &[JournalEntry],
4327 document_flows: &DocumentFlowSnapshot,
4328 sourcing: &SourcingSnapshot,
4329 hr: &HrSnapshot,
4330 manufacturing: &ManufacturingSnapshot,
4331 banking: &BankingSnapshot,
4332 audit: &AuditSnapshot,
4333 financial_reporting: &FinancialReportingSnapshot,
4334 ocpm: &OcpmSnapshot,
4335 compliance: &ComplianceRegulationsSnapshot,
4336 stats: &mut EnhancedGenerationStatistics,
4337 ) -> SynthResult<()> {
4338 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4339 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4340 match self.export_hypergraph(
4341 coa,
4342 entries,
4343 document_flows,
4344 sourcing,
4345 hr,
4346 manufacturing,
4347 banking,
4348 audit,
4349 financial_reporting,
4350 ocpm,
4351 compliance,
4352 stats,
4353 ) {
4354 Ok(info) => {
4355 info!(
4356 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4357 info.node_count, info.edge_count, info.hyperedge_count
4358 );
4359 }
4360 Err(e) => {
4361 warn!("Phase 10b: Hypergraph export failed: {}", e);
4362 }
4363 }
4364 } else {
4365 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4366 }
4367 Ok(())
4368 }
4369
4370 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4376 if !self.config.llm.enabled {
4377 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4378 return;
4379 }
4380
4381 info!("Phase 11: Starting LLM Enrichment");
4382 let start = std::time::Instant::now();
4383
4384 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4385 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4388 let schema_provider = &self.config.llm.provider;
4389 let api_key_env = match schema_provider.as_str() {
4390 "openai" => Some("OPENAI_API_KEY"),
4391 "anthropic" => Some("ANTHROPIC_API_KEY"),
4392 "custom" => Some("LLM_API_KEY"),
4393 _ => None,
4394 };
4395 if let Some(key_env) = api_key_env {
4396 if std::env::var(key_env).is_ok() {
4397 let llm_config = datasynth_core::llm::LlmConfig {
4398 model: self.config.llm.model.clone(),
4399 api_key_env: key_env.to_string(),
4400 ..datasynth_core::llm::LlmConfig::default()
4401 };
4402 match HttpLlmProvider::new(llm_config) {
4403 Ok(p) => Arc::new(p),
4404 Err(e) => {
4405 warn!(
4406 "Failed to create HttpLlmProvider: {}; falling back to mock",
4407 e
4408 );
4409 Arc::new(MockLlmProvider::new(self.seed))
4410 }
4411 }
4412 } else {
4413 Arc::new(MockLlmProvider::new(self.seed))
4414 }
4415 } else {
4416 Arc::new(MockLlmProvider::new(self.seed))
4417 }
4418 };
4419 let enricher = VendorLlmEnricher::new(provider);
4420
4421 let industry = format!("{:?}", self.config.global.industry);
4422 let max_enrichments = self
4423 .config
4424 .llm
4425 .max_vendor_enrichments
4426 .min(self.master_data.vendors.len());
4427
4428 let mut enriched_count = 0usize;
4429 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4430 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4431 Ok(name) => {
4432 vendor.name = name;
4433 enriched_count += 1;
4434 }
4435 Err(e) => {
4436 warn!(
4437 "LLM vendor enrichment failed for {}: {}",
4438 vendor.vendor_id, e
4439 );
4440 }
4441 }
4442 }
4443
4444 enriched_count
4445 }));
4446
4447 match result {
4448 Ok(enriched_count) => {
4449 stats.llm_vendors_enriched = enriched_count;
4450 let elapsed = start.elapsed();
4451 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4452 info!(
4453 "Phase 11 complete: {} vendors enriched in {}ms",
4454 enriched_count, stats.llm_enrichment_ms
4455 );
4456 }
4457 Err(_) => {
4458 let elapsed = start.elapsed();
4459 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4460 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4461 }
4462 }
4463 }
4464
4465 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4471 if !self.config.diffusion.enabled {
4472 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4473 return;
4474 }
4475
4476 info!("Phase 12: Starting Diffusion Enhancement");
4477 let start = std::time::Instant::now();
4478
4479 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4480 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4483
4484 let diffusion_config = DiffusionConfig {
4485 n_steps: self.config.diffusion.n_steps,
4486 seed: self.seed,
4487 ..Default::default()
4488 };
4489
4490 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4491
4492 let n_samples = self.config.diffusion.sample_size;
4493 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
4495
4496 samples.len()
4497 }));
4498
4499 match result {
4500 Ok(sample_count) => {
4501 stats.diffusion_samples_generated = sample_count;
4502 let elapsed = start.elapsed();
4503 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4504 info!(
4505 "Phase 12 complete: {} diffusion samples generated in {}ms",
4506 sample_count, stats.diffusion_enhancement_ms
4507 );
4508 }
4509 Err(_) => {
4510 let elapsed = start.elapsed();
4511 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4512 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4513 }
4514 }
4515 }
4516
4517 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4524 if !self.config.causal.enabled {
4525 debug!("Phase 13: Skipped (causal generation disabled)");
4526 return;
4527 }
4528
4529 info!("Phase 13: Starting Causal Overlay");
4530 let start = std::time::Instant::now();
4531
4532 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4533 let graph = match self.config.causal.template.as_str() {
4535 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4536 _ => CausalGraph::fraud_detection_template(),
4537 };
4538
4539 let scm = StructuralCausalModel::new(graph.clone())
4540 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4541
4542 let n_samples = self.config.causal.sample_size;
4543 let samples = scm
4544 .generate(n_samples, self.seed)
4545 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4546
4547 let validation_passed = if self.config.causal.validate {
4549 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4550 if report.valid {
4551 info!(
4552 "Causal validation passed: all {} checks OK",
4553 report.checks.len()
4554 );
4555 } else {
4556 warn!(
4557 "Causal validation: {} violations detected: {:?}",
4558 report.violations.len(),
4559 report.violations
4560 );
4561 }
4562 Some(report.valid)
4563 } else {
4564 None
4565 };
4566
4567 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4568 }));
4569
4570 match result {
4571 Ok(Ok((sample_count, validation_passed))) => {
4572 stats.causal_samples_generated = sample_count;
4573 stats.causal_validation_passed = validation_passed;
4574 let elapsed = start.elapsed();
4575 stats.causal_generation_ms = elapsed.as_millis() as u64;
4576 info!(
4577 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4578 sample_count, stats.causal_generation_ms, validation_passed,
4579 );
4580 }
4581 Ok(Err(e)) => {
4582 let elapsed = start.elapsed();
4583 stats.causal_generation_ms = elapsed.as_millis() as u64;
4584 warn!("Phase 13: Causal generation failed: {}", e);
4585 }
4586 Err(_) => {
4587 let elapsed = start.elapsed();
4588 stats.causal_generation_ms = elapsed.as_millis() as u64;
4589 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4590 }
4591 }
4592 }
4593
4594 fn phase_sourcing_data(
4596 &mut self,
4597 stats: &mut EnhancedGenerationStatistics,
4598 ) -> SynthResult<SourcingSnapshot> {
4599 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4600 debug!("Phase 14: Skipped (sourcing generation disabled)");
4601 return Ok(SourcingSnapshot::default());
4602 }
4603 let degradation = self.check_resources()?;
4604 if degradation >= DegradationLevel::Reduced {
4605 debug!(
4606 "Phase skipped due to resource pressure (degradation: {:?})",
4607 degradation
4608 );
4609 return Ok(SourcingSnapshot::default());
4610 }
4611
4612 info!("Phase 14: Generating S2C Sourcing Data");
4613 let seed = self.seed;
4614
4615 let vendor_ids: Vec<String> = self
4617 .master_data
4618 .vendors
4619 .iter()
4620 .map(|v| v.vendor_id.clone())
4621 .collect();
4622 if vendor_ids.is_empty() {
4623 debug!("Phase 14: Skipped (no vendors available)");
4624 return Ok(SourcingSnapshot::default());
4625 }
4626
4627 let categories: Vec<(String, String)> = vec![
4628 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4629 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4630 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4631 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4632 ("CAT-LOG".to_string(), "Logistics".to_string()),
4633 ];
4634 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4635 .iter()
4636 .map(|(id, name)| {
4637 (
4638 id.clone(),
4639 name.clone(),
4640 rust_decimal::Decimal::from(100_000),
4641 )
4642 })
4643 .collect();
4644
4645 let company_code = self
4646 .config
4647 .companies
4648 .first()
4649 .map(|c| c.code.as_str())
4650 .unwrap_or("1000");
4651 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4652 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4653 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4654 let fiscal_year = start_date.year() as u16;
4655 let owner_ids: Vec<String> = self
4656 .master_data
4657 .employees
4658 .iter()
4659 .take(5)
4660 .map(|e| e.employee_id.clone())
4661 .collect();
4662 let owner_id = owner_ids
4663 .first()
4664 .map(std::string::String::as_str)
4665 .unwrap_or("BUYER-001");
4666
4667 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4669 let spend_analyses =
4670 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4671
4672 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4674 let sourcing_projects = if owner_ids.is_empty() {
4675 Vec::new()
4676 } else {
4677 project_gen.generate(
4678 company_code,
4679 &categories_with_spend,
4680 &owner_ids,
4681 start_date,
4682 self.config.global.period_months,
4683 )
4684 };
4685 stats.sourcing_project_count = sourcing_projects.len();
4686
4687 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4689 let mut qual_gen = QualificationGenerator::new(seed + 2);
4690 let qualifications = qual_gen.generate(
4691 company_code,
4692 &qual_vendor_ids,
4693 sourcing_projects.first().map(|p| p.project_id.as_str()),
4694 owner_id,
4695 start_date,
4696 );
4697
4698 let mut rfx_gen = RfxGenerator::new(seed + 3);
4700 let rfx_events: Vec<RfxEvent> = sourcing_projects
4701 .iter()
4702 .map(|proj| {
4703 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4704 rfx_gen.generate(
4705 company_code,
4706 &proj.project_id,
4707 &proj.category_id,
4708 &qualified_vids,
4709 owner_id,
4710 start_date,
4711 50000.0,
4712 )
4713 })
4714 .collect();
4715 stats.rfx_event_count = rfx_events.len();
4716
4717 let mut bid_gen = BidGenerator::new(seed + 4);
4719 let mut all_bids = Vec::new();
4720 for rfx in &rfx_events {
4721 let bidder_count = vendor_ids.len().clamp(2, 5);
4722 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4723 let bids = bid_gen.generate(rfx, &responding, start_date);
4724 all_bids.extend(bids);
4725 }
4726 stats.bid_count = all_bids.len();
4727
4728 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4730 let bid_evaluations: Vec<BidEvaluation> = rfx_events
4731 .iter()
4732 .map(|rfx| {
4733 let rfx_bids: Vec<SupplierBid> = all_bids
4734 .iter()
4735 .filter(|b| b.rfx_id == rfx.rfx_id)
4736 .cloned()
4737 .collect();
4738 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4739 })
4740 .collect();
4741
4742 let mut contract_gen = ContractGenerator::new(seed + 6);
4744 let contracts: Vec<ProcurementContract> = bid_evaluations
4745 .iter()
4746 .zip(rfx_events.iter())
4747 .filter_map(|(eval, rfx)| {
4748 eval.ranked_bids.first().and_then(|winner| {
4749 all_bids
4750 .iter()
4751 .find(|b| b.bid_id == winner.bid_id)
4752 .map(|winning_bid| {
4753 contract_gen.generate_from_bid(
4754 winning_bid,
4755 Some(&rfx.sourcing_project_id),
4756 &rfx.category_id,
4757 owner_id,
4758 start_date,
4759 )
4760 })
4761 })
4762 })
4763 .collect();
4764 stats.contract_count = contracts.len();
4765
4766 let mut catalog_gen = CatalogGenerator::new(seed + 7);
4768 let catalog_items = catalog_gen.generate(&contracts);
4769 stats.catalog_item_count = catalog_items.len();
4770
4771 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4773 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4774 .iter()
4775 .fold(
4776 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4777 |mut acc, c| {
4778 acc.entry(c.vendor_id.clone()).or_default().push(c);
4779 acc
4780 },
4781 )
4782 .into_iter()
4783 .collect();
4784 let scorecards = scorecard_gen.generate(
4785 company_code,
4786 &vendor_contracts,
4787 start_date,
4788 end_date,
4789 owner_id,
4790 );
4791 stats.scorecard_count = scorecards.len();
4792
4793 let mut sourcing_projects = sourcing_projects;
4796 for project in &mut sourcing_projects {
4797 project.rfx_ids = rfx_events
4799 .iter()
4800 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4801 .map(|rfx| rfx.rfx_id.clone())
4802 .collect();
4803
4804 project.contract_id = contracts
4806 .iter()
4807 .find(|c| {
4808 c.sourcing_project_id
4809 .as_deref()
4810 .is_some_and(|sp| sp == project.project_id)
4811 })
4812 .map(|c| c.contract_id.clone());
4813
4814 project.spend_analysis_id = spend_analyses
4816 .iter()
4817 .find(|sa| sa.category_id == project.category_id)
4818 .map(|sa| sa.category_id.clone());
4819 }
4820
4821 info!(
4822 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4823 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4824 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4825 );
4826 self.check_resources_with_log("post-sourcing")?;
4827
4828 Ok(SourcingSnapshot {
4829 spend_analyses,
4830 sourcing_projects,
4831 qualifications,
4832 rfx_events,
4833 bids: all_bids,
4834 bid_evaluations,
4835 contracts,
4836 catalog_items,
4837 scorecards,
4838 })
4839 }
4840
4841 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4847 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4848
4849 let parent_code = self
4850 .config
4851 .companies
4852 .first()
4853 .map(|c| c.code.clone())
4854 .unwrap_or_else(|| "PARENT".to_string());
4855
4856 let mut group = GroupStructure::new(parent_code);
4857
4858 for company in self.config.companies.iter().skip(1) {
4859 let sub =
4860 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4861 group.add_subsidiary(sub);
4862 }
4863
4864 group
4865 }
4866
4867 fn phase_intercompany(
4869 &mut self,
4870 journal_entries: &[JournalEntry],
4871 stats: &mut EnhancedGenerationStatistics,
4872 ) -> SynthResult<IntercompanySnapshot> {
4873 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4875 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4876 return Ok(IntercompanySnapshot::default());
4877 }
4878
4879 if self.config.companies.len() < 2 {
4881 debug!(
4882 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4883 self.config.companies.len()
4884 );
4885 return Ok(IntercompanySnapshot::default());
4886 }
4887
4888 info!("Phase 14b: Generating Intercompany Transactions");
4889
4890 let group_structure = self.build_group_structure();
4893 debug!(
4894 "Group structure built: parent={}, subsidiaries={}",
4895 group_structure.parent_entity,
4896 group_structure.subsidiaries.len()
4897 );
4898
4899 let seed = self.seed;
4900 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4901 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4902 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4903
4904 let parent_code = self.config.companies[0].code.clone();
4907 let mut ownership_structure =
4908 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4909
4910 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4911 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4912 format!("REL{:03}", i + 1),
4913 parent_code.clone(),
4914 company.code.clone(),
4915 rust_decimal::Decimal::from(100), start_date,
4917 );
4918 ownership_structure.add_relationship(relationship);
4919 }
4920
4921 let tp_method = match self.config.intercompany.transfer_pricing_method {
4923 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4924 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4925 }
4926 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4927 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4928 }
4929 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4930 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4931 }
4932 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4933 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4934 }
4935 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4936 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4937 }
4938 };
4939
4940 let ic_currency = self
4942 .config
4943 .companies
4944 .first()
4945 .map(|c| c.currency.clone())
4946 .unwrap_or_else(|| "USD".to_string());
4947 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4948 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4949 transfer_pricing_method: tp_method,
4950 markup_percent: rust_decimal::Decimal::from_f64_retain(
4951 self.config.intercompany.markup_percent,
4952 )
4953 .unwrap_or(rust_decimal::Decimal::from(5)),
4954 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4955 default_currency: ic_currency,
4956 ..Default::default()
4957 };
4958
4959 let mut ic_generator = datasynth_generators::ICGenerator::new(
4961 ic_gen_config,
4962 ownership_structure.clone(),
4963 seed + 50,
4964 );
4965
4966 let transactions_per_day = 3;
4969 let matched_pairs = ic_generator.generate_transactions_for_period(
4970 start_date,
4971 end_date,
4972 transactions_per_day,
4973 );
4974
4975 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4977 debug!(
4978 "Generated {} IC seller invoices, {} IC buyer POs",
4979 ic_doc_chains.seller_invoices.len(),
4980 ic_doc_chains.buyer_orders.len()
4981 );
4982
4983 let mut seller_entries = Vec::new();
4985 let mut buyer_entries = Vec::new();
4986 let fiscal_year = start_date.year();
4987
4988 for pair in &matched_pairs {
4989 let fiscal_period = pair.posting_date.month();
4990 let (seller_je, buyer_je) =
4991 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4992 seller_entries.push(seller_je);
4993 buyer_entries.push(buyer_je);
4994 }
4995
4996 let matching_config = datasynth_generators::ICMatchingConfig {
4998 base_currency: self
4999 .config
5000 .companies
5001 .first()
5002 .map(|c| c.currency.clone())
5003 .unwrap_or_else(|| "USD".to_string()),
5004 ..Default::default()
5005 };
5006 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5007 matching_engine.load_matched_pairs(&matched_pairs);
5008 let matching_result = matching_engine.run_matching(end_date);
5009
5010 let mut elimination_entries = Vec::new();
5012 if self.config.intercompany.generate_eliminations {
5013 let elim_config = datasynth_generators::EliminationConfig {
5014 consolidation_entity: "GROUP".to_string(),
5015 base_currency: self
5016 .config
5017 .companies
5018 .first()
5019 .map(|c| c.currency.clone())
5020 .unwrap_or_else(|| "USD".to_string()),
5021 ..Default::default()
5022 };
5023
5024 let mut elim_generator =
5025 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5026
5027 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5028 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5029 matching_result
5030 .matched_balances
5031 .iter()
5032 .chain(matching_result.unmatched_balances.iter())
5033 .cloned()
5034 .collect();
5035
5036 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5048 std::collections::HashMap::new();
5049 let mut equity_amounts: std::collections::HashMap<
5050 String,
5051 std::collections::HashMap<String, rust_decimal::Decimal>,
5052 > = std::collections::HashMap::new();
5053 {
5054 use rust_decimal::Decimal;
5055 let hundred = Decimal::from(100u32);
5056 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5060 for sub in &group_structure.subsidiaries {
5061 let net_assets = {
5062 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5063 if na > Decimal::ZERO {
5064 na
5065 } else {
5066 Decimal::from(1_000_000u64)
5067 }
5068 };
5069 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5071 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5072
5073 let mut eq_map = std::collections::HashMap::new();
5076 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5077 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5078 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5079 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5080 }
5081 }
5082
5083 let journal = elim_generator.generate_eliminations(
5084 &fiscal_period,
5085 end_date,
5086 &all_balances,
5087 &matched_pairs,
5088 &investment_amounts,
5089 &equity_amounts,
5090 );
5091
5092 elimination_entries = journal.entries.clone();
5093 }
5094
5095 let matched_pair_count = matched_pairs.len();
5096 let elimination_entry_count = elimination_entries.len();
5097 let match_rate = matching_result.match_rate;
5098
5099 stats.ic_matched_pair_count = matched_pair_count;
5100 stats.ic_elimination_count = elimination_entry_count;
5101 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5102
5103 info!(
5104 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5105 matched_pair_count,
5106 stats.ic_transaction_count,
5107 seller_entries.len(),
5108 buyer_entries.len(),
5109 elimination_entry_count,
5110 match_rate * 100.0
5111 );
5112 self.check_resources_with_log("post-intercompany")?;
5113
5114 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5118 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5119 use rust_decimal::Decimal;
5120
5121 let eight_pct = Decimal::new(8, 2); group_structure
5124 .subsidiaries
5125 .iter()
5126 .filter(|sub| {
5127 sub.nci_percentage > Decimal::ZERO
5128 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5129 })
5130 .map(|sub| {
5131 let net_assets_from_jes =
5135 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5136
5137 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5138 net_assets_from_jes.round_dp(2)
5139 } else {
5140 Decimal::from(1_000_000u64)
5142 };
5143
5144 let net_income = (net_assets * eight_pct).round_dp(2);
5146
5147 NciMeasurement::compute(
5148 sub.entity_code.clone(),
5149 sub.nci_percentage,
5150 net_assets,
5151 net_income,
5152 )
5153 })
5154 .collect()
5155 };
5156
5157 if !nci_measurements.is_empty() {
5158 info!(
5159 "NCI measurements: {} subsidiaries with non-controlling interests",
5160 nci_measurements.len()
5161 );
5162 }
5163
5164 Ok(IntercompanySnapshot {
5165 group_structure: Some(group_structure),
5166 matched_pairs,
5167 seller_journal_entries: seller_entries,
5168 buyer_journal_entries: buyer_entries,
5169 elimination_entries,
5170 nci_measurements,
5171 ic_document_chains: Some(ic_doc_chains),
5172 matched_pair_count,
5173 elimination_entry_count,
5174 match_rate,
5175 })
5176 }
5177
5178 fn phase_financial_reporting(
5180 &mut self,
5181 document_flows: &DocumentFlowSnapshot,
5182 journal_entries: &[JournalEntry],
5183 coa: &Arc<ChartOfAccounts>,
5184 _hr: &HrSnapshot,
5185 _audit: &AuditSnapshot,
5186 stats: &mut EnhancedGenerationStatistics,
5187 ) -> SynthResult<FinancialReportingSnapshot> {
5188 let fs_enabled = self.phase_config.generate_financial_statements
5189 || self.config.financial_reporting.enabled;
5190 let br_enabled = self.phase_config.generate_bank_reconciliation;
5191
5192 if !fs_enabled && !br_enabled {
5193 debug!("Phase 15: Skipped (financial reporting disabled)");
5194 return Ok(FinancialReportingSnapshot::default());
5195 }
5196
5197 info!("Phase 15: Generating Financial Reporting Data");
5198
5199 let seed = self.seed;
5200 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5201 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5202
5203 let mut financial_statements = Vec::new();
5204 let mut bank_reconciliations = Vec::new();
5205 let mut trial_balances = Vec::new();
5206 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5207 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5208 Vec::new();
5209 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5211 std::collections::HashMap::new();
5212 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5214 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5216
5217 if fs_enabled {
5225 let has_journal_entries = !journal_entries.is_empty();
5226
5227 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5230 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5232
5233 let elimination_entries: Vec<&JournalEntry> = journal_entries
5235 .iter()
5236 .filter(|je| je.header.is_elimination)
5237 .collect();
5238
5239 for period in 0..self.config.global.period_months {
5241 let period_start = start_date + chrono::Months::new(period);
5242 let period_end =
5243 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5244 let fiscal_year = period_end.year() as u16;
5245 let fiscal_period = period_end.month() as u8;
5246 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5247
5248 let mut entity_tb_map: std::collections::HashMap<
5251 String,
5252 std::collections::HashMap<String, rust_decimal::Decimal>,
5253 > = std::collections::HashMap::new();
5254
5255 for (company_idx, company) in self.config.companies.iter().enumerate() {
5257 let company_code = company.code.as_str();
5258 let currency = company.currency.as_str();
5259 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5262 let mut company_fs_gen =
5263 FinancialStatementGenerator::new(seed + company_seed_offset);
5264
5265 if has_journal_entries {
5266 let tb_entries = Self::build_cumulative_trial_balance(
5267 journal_entries,
5268 coa,
5269 company_code,
5270 start_date,
5271 period_end,
5272 fiscal_year,
5273 fiscal_period,
5274 );
5275
5276 let entity_cat_map =
5278 entity_tb_map.entry(company_code.to_string()).or_default();
5279 for tb_entry in &tb_entries {
5280 let net = tb_entry.debit_balance - tb_entry.credit_balance;
5281 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5282 }
5283
5284 let stmts = company_fs_gen.generate(
5285 company_code,
5286 currency,
5287 &tb_entries,
5288 period_start,
5289 period_end,
5290 fiscal_year,
5291 fiscal_period,
5292 None,
5293 "SYS-AUTOCLOSE",
5294 );
5295
5296 let mut entity_stmts = Vec::new();
5297 for stmt in stmts {
5298 if stmt.statement_type == StatementType::CashFlowStatement {
5299 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5300 let cf_items = Self::build_cash_flow_from_trial_balances(
5301 &tb_entries,
5302 None,
5303 net_income,
5304 );
5305 entity_stmts.push(FinancialStatement {
5306 cash_flow_items: cf_items,
5307 ..stmt
5308 });
5309 } else {
5310 entity_stmts.push(stmt);
5311 }
5312 }
5313
5314 financial_statements.extend(entity_stmts.clone());
5316
5317 standalone_statements
5319 .entry(company_code.to_string())
5320 .or_default()
5321 .extend(entity_stmts);
5322
5323 if company_idx == 0 {
5326 trial_balances.push(PeriodTrialBalance {
5327 fiscal_year,
5328 fiscal_period,
5329 period_start,
5330 period_end,
5331 entries: tb_entries,
5332 });
5333 }
5334 } else {
5335 let tb_entries = Self::build_trial_balance_from_entries(
5337 journal_entries,
5338 coa,
5339 company_code,
5340 fiscal_year,
5341 fiscal_period,
5342 );
5343
5344 let stmts = company_fs_gen.generate(
5345 company_code,
5346 currency,
5347 &tb_entries,
5348 period_start,
5349 period_end,
5350 fiscal_year,
5351 fiscal_period,
5352 None,
5353 "SYS-AUTOCLOSE",
5354 );
5355 financial_statements.extend(stmts.clone());
5356 standalone_statements
5357 .entry(company_code.to_string())
5358 .or_default()
5359 .extend(stmts);
5360
5361 if company_idx == 0 && !tb_entries.is_empty() {
5362 trial_balances.push(PeriodTrialBalance {
5363 fiscal_year,
5364 fiscal_period,
5365 period_start,
5366 period_end,
5367 entries: tb_entries,
5368 });
5369 }
5370 }
5371 }
5372
5373 let group_currency = self
5376 .config
5377 .companies
5378 .first()
5379 .map(|c| c.currency.as_str())
5380 .unwrap_or("USD");
5381
5382 let period_eliminations: Vec<JournalEntry> = elimination_entries
5384 .iter()
5385 .filter(|je| {
5386 je.header.fiscal_year == fiscal_year
5387 && je.header.fiscal_period == fiscal_period
5388 })
5389 .map(|je| (*je).clone())
5390 .collect();
5391
5392 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5393 &entity_tb_map,
5394 &period_eliminations,
5395 &period_label,
5396 );
5397
5398 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5401 .line_items
5402 .iter()
5403 .map(|li| {
5404 let net = li.post_elimination_total;
5405 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5406 (net, rust_decimal::Decimal::ZERO)
5407 } else {
5408 (rust_decimal::Decimal::ZERO, -net)
5409 };
5410 datasynth_generators::TrialBalanceEntry {
5411 account_code: li.account_category.clone(),
5412 account_name: li.account_category.clone(),
5413 category: li.account_category.clone(),
5414 debit_balance: debit,
5415 credit_balance: credit,
5416 }
5417 })
5418 .collect();
5419
5420 let mut cons_stmts = cons_gen.generate(
5421 "GROUP",
5422 group_currency,
5423 &cons_tb,
5424 period_start,
5425 period_end,
5426 fiscal_year,
5427 fiscal_period,
5428 None,
5429 "SYS-AUTOCLOSE",
5430 );
5431
5432 let bs_categories: &[&str] = &[
5436 "CASH",
5437 "RECEIVABLES",
5438 "INVENTORY",
5439 "FIXEDASSETS",
5440 "PAYABLES",
5441 "ACCRUEDLIABILITIES",
5442 "LONGTERMDEBT",
5443 "EQUITY",
5444 ];
5445 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5446 cons_line_items.into_iter().partition(|li| {
5447 let upper = li.label.to_uppercase();
5448 bs_categories.iter().any(|c| upper == *c)
5449 });
5450
5451 for stmt in &mut cons_stmts {
5452 stmt.is_consolidated = true;
5453 match stmt.statement_type {
5454 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5455 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5456 _ => {} }
5458 }
5459
5460 consolidated_statements.extend(cons_stmts);
5461 consolidation_schedules.push(schedule);
5462 }
5463
5464 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5470 info!(
5471 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5472 stats.financial_statement_count,
5473 consolidated_statements.len(),
5474 has_journal_entries
5475 );
5476
5477 let entity_seeds: Vec<SegmentSeed> = self
5482 .config
5483 .companies
5484 .iter()
5485 .map(|c| SegmentSeed {
5486 code: c.code.clone(),
5487 name: c.name.clone(),
5488 currency: c.currency.clone(),
5489 })
5490 .collect();
5491
5492 let mut seg_gen = SegmentGenerator::new(seed + 30);
5493
5494 for period in 0..self.config.global.period_months {
5499 let period_end =
5500 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5501 let fiscal_year = period_end.year() as u16;
5502 let fiscal_period = period_end.month() as u8;
5503 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5504
5505 use datasynth_core::models::StatementType;
5506
5507 let cons_is = consolidated_statements.iter().find(|s| {
5509 s.fiscal_year == fiscal_year
5510 && s.fiscal_period == fiscal_period
5511 && s.statement_type == StatementType::IncomeStatement
5512 });
5513 let cons_bs = consolidated_statements.iter().find(|s| {
5514 s.fiscal_year == fiscal_year
5515 && s.fiscal_period == fiscal_period
5516 && s.statement_type == StatementType::BalanceSheet
5517 });
5518
5519 let is_stmt = cons_is.or_else(|| {
5521 financial_statements.iter().find(|s| {
5522 s.fiscal_year == fiscal_year
5523 && s.fiscal_period == fiscal_period
5524 && s.statement_type == StatementType::IncomeStatement
5525 })
5526 });
5527 let bs_stmt = cons_bs.or_else(|| {
5528 financial_statements.iter().find(|s| {
5529 s.fiscal_year == fiscal_year
5530 && s.fiscal_period == fiscal_period
5531 && s.statement_type == StatementType::BalanceSheet
5532 })
5533 });
5534
5535 let consolidated_revenue = is_stmt
5536 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5537 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5539
5540 let consolidated_profit = is_stmt
5541 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5542 .map(|li| li.amount)
5543 .unwrap_or(rust_decimal::Decimal::ZERO);
5544
5545 let consolidated_assets = bs_stmt
5546 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5547 .map(|li| li.amount)
5548 .unwrap_or(rust_decimal::Decimal::ZERO);
5549
5550 if consolidated_revenue == rust_decimal::Decimal::ZERO
5552 && consolidated_assets == rust_decimal::Decimal::ZERO
5553 {
5554 continue;
5555 }
5556
5557 let group_code = self
5558 .config
5559 .companies
5560 .first()
5561 .map(|c| c.code.as_str())
5562 .unwrap_or("GROUP");
5563
5564 let total_depr: rust_decimal::Decimal = journal_entries
5567 .iter()
5568 .filter(|je| je.header.document_type == "CL")
5569 .flat_map(|je| je.lines.iter())
5570 .filter(|l| l.gl_account.starts_with("6000"))
5571 .map(|l| l.debit_amount)
5572 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5573 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5574 Some(total_depr)
5575 } else {
5576 None
5577 };
5578
5579 let (segs, recon) = seg_gen.generate(
5580 group_code,
5581 &period_label,
5582 consolidated_revenue,
5583 consolidated_profit,
5584 consolidated_assets,
5585 &entity_seeds,
5586 depr_param,
5587 );
5588 segment_reports.extend(segs);
5589 segment_reconciliations.push(recon);
5590 }
5591
5592 info!(
5593 "Segment reports generated: {} segments, {} reconciliations",
5594 segment_reports.len(),
5595 segment_reconciliations.len()
5596 );
5597 }
5598
5599 if br_enabled && !document_flows.payments.is_empty() {
5601 let employee_ids: Vec<String> = self
5602 .master_data
5603 .employees
5604 .iter()
5605 .map(|e| e.employee_id.clone())
5606 .collect();
5607 let mut br_gen =
5608 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5609
5610 for company in &self.config.companies {
5612 let company_payments: Vec<PaymentReference> = document_flows
5613 .payments
5614 .iter()
5615 .filter(|p| p.header.company_code == company.code)
5616 .map(|p| PaymentReference {
5617 id: p.header.document_id.clone(),
5618 amount: if p.is_vendor { p.amount } else { -p.amount },
5619 date: p.header.document_date,
5620 reference: p
5621 .check_number
5622 .clone()
5623 .or_else(|| p.wire_reference.clone())
5624 .unwrap_or_else(|| p.header.document_id.clone()),
5625 })
5626 .collect();
5627
5628 if company_payments.is_empty() {
5629 continue;
5630 }
5631
5632 let bank_account_id = format!("{}-MAIN", company.code);
5633
5634 for period in 0..self.config.global.period_months {
5636 let period_start = start_date + chrono::Months::new(period);
5637 let period_end =
5638 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5639
5640 let period_payments: Vec<PaymentReference> = company_payments
5641 .iter()
5642 .filter(|p| p.date >= period_start && p.date <= period_end)
5643 .cloned()
5644 .collect();
5645
5646 let recon = br_gen.generate(
5647 &company.code,
5648 &bank_account_id,
5649 period_start,
5650 period_end,
5651 &company.currency,
5652 &period_payments,
5653 );
5654 bank_reconciliations.push(recon);
5655 }
5656 }
5657 info!(
5658 "Bank reconciliations generated: {} reconciliations",
5659 bank_reconciliations.len()
5660 );
5661 }
5662
5663 stats.bank_reconciliation_count = bank_reconciliations.len();
5664 self.check_resources_with_log("post-financial-reporting")?;
5665
5666 if !trial_balances.is_empty() {
5667 info!(
5668 "Period-close trial balances captured: {} periods",
5669 trial_balances.len()
5670 );
5671 }
5672
5673 let notes_to_financial_statements = Vec::new();
5677
5678 Ok(FinancialReportingSnapshot {
5679 financial_statements,
5680 standalone_statements,
5681 consolidated_statements,
5682 consolidation_schedules,
5683 bank_reconciliations,
5684 trial_balances,
5685 segment_reports,
5686 segment_reconciliations,
5687 notes_to_financial_statements,
5688 })
5689 }
5690
5691 fn generate_notes_to_financial_statements(
5698 &self,
5699 financial_reporting: &mut FinancialReportingSnapshot,
5700 accounting_standards: &AccountingStandardsSnapshot,
5701 tax: &TaxSnapshot,
5702 hr: &HrSnapshot,
5703 audit: &AuditSnapshot,
5704 treasury: &TreasurySnapshot,
5705 ) {
5706 use datasynth_config::schema::AccountingFrameworkConfig;
5707 use datasynth_core::models::StatementType;
5708 use datasynth_generators::period_close::notes_generator::{
5709 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5710 };
5711
5712 let seed = self.seed;
5713 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5714 {
5715 Ok(d) => d,
5716 Err(_) => return,
5717 };
5718
5719 let mut notes_gen = NotesGenerator::new(seed + 4235);
5720
5721 for company in &self.config.companies {
5722 let last_period_end = start_date
5723 + chrono::Months::new(self.config.global.period_months)
5724 - chrono::Days::new(1);
5725 let fiscal_year = last_period_end.year() as u16;
5726
5727 let entity_is = financial_reporting
5729 .standalone_statements
5730 .get(&company.code)
5731 .and_then(|stmts| {
5732 stmts.iter().find(|s| {
5733 s.fiscal_year == fiscal_year
5734 && s.statement_type == StatementType::IncomeStatement
5735 })
5736 });
5737 let entity_bs = financial_reporting
5738 .standalone_statements
5739 .get(&company.code)
5740 .and_then(|stmts| {
5741 stmts.iter().find(|s| {
5742 s.fiscal_year == fiscal_year
5743 && s.statement_type == StatementType::BalanceSheet
5744 })
5745 });
5746
5747 let revenue_amount = entity_is
5749 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5750 .map(|li| li.amount);
5751 let ppe_gross = entity_bs
5752 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5753 .map(|li| li.amount);
5754
5755 let framework = match self
5756 .config
5757 .accounting_standards
5758 .framework
5759 .unwrap_or_default()
5760 {
5761 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5762 "IFRS".to_string()
5763 }
5764 _ => "US GAAP".to_string(),
5765 };
5766
5767 let (entity_dta, entity_dtl) = {
5770 let mut dta = rust_decimal::Decimal::ZERO;
5771 let mut dtl = rust_decimal::Decimal::ZERO;
5772 for rf in &tax.deferred_tax.rollforwards {
5773 if rf.entity_code == company.code {
5774 dta += rf.closing_dta;
5775 dtl += rf.closing_dtl;
5776 }
5777 }
5778 (
5779 if dta > rust_decimal::Decimal::ZERO {
5780 Some(dta)
5781 } else {
5782 None
5783 },
5784 if dtl > rust_decimal::Decimal::ZERO {
5785 Some(dtl)
5786 } else {
5787 None
5788 },
5789 )
5790 };
5791
5792 let entity_provisions: Vec<_> = accounting_standards
5795 .provisions
5796 .iter()
5797 .filter(|p| p.entity_code == company.code)
5798 .collect();
5799 let provision_count = entity_provisions.len();
5800 let total_provisions = if provision_count > 0 {
5801 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5802 } else {
5803 None
5804 };
5805
5806 let entity_pension_plan_count = hr
5808 .pension_plans
5809 .iter()
5810 .filter(|p| p.entity_code == company.code)
5811 .count();
5812 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5813 let sum: rust_decimal::Decimal = hr
5814 .pension_disclosures
5815 .iter()
5816 .filter(|d| {
5817 hr.pension_plans
5818 .iter()
5819 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5820 })
5821 .map(|d| d.net_pension_liability)
5822 .sum();
5823 let plan_assets_sum: rust_decimal::Decimal = hr
5824 .pension_plan_assets
5825 .iter()
5826 .filter(|a| {
5827 hr.pension_plans
5828 .iter()
5829 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5830 })
5831 .map(|a| a.fair_value_closing)
5832 .sum();
5833 if entity_pension_plan_count > 0 {
5834 Some(sum + plan_assets_sum)
5835 } else {
5836 None
5837 }
5838 };
5839 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5840 let sum: rust_decimal::Decimal = hr
5841 .pension_plan_assets
5842 .iter()
5843 .filter(|a| {
5844 hr.pension_plans
5845 .iter()
5846 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5847 })
5848 .map(|a| a.fair_value_closing)
5849 .sum();
5850 if entity_pension_plan_count > 0 {
5851 Some(sum)
5852 } else {
5853 None
5854 }
5855 };
5856
5857 let rp_count = audit.related_party_transactions.len();
5860 let se_count = audit.subsequent_events.len();
5861 let adjusting_count = audit
5862 .subsequent_events
5863 .iter()
5864 .filter(|e| {
5865 matches!(
5866 e.classification,
5867 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5868 )
5869 })
5870 .count();
5871
5872 let ctx = NotesGeneratorContext {
5873 entity_code: company.code.clone(),
5874 framework,
5875 period: format!("FY{}", fiscal_year),
5876 period_end: last_period_end,
5877 currency: company.currency.clone(),
5878 revenue_amount,
5879 total_ppe_gross: ppe_gross,
5880 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5881 deferred_tax_asset: entity_dta,
5883 deferred_tax_liability: entity_dtl,
5884 provision_count,
5886 total_provisions,
5887 pension_plan_count: entity_pension_plan_count,
5889 total_dbo: entity_total_dbo,
5890 total_plan_assets: entity_total_plan_assets,
5891 related_party_transaction_count: rp_count,
5893 subsequent_event_count: se_count,
5894 adjusting_event_count: adjusting_count,
5895 ..NotesGeneratorContext::default()
5896 };
5897
5898 let entity_notes = notes_gen.generate(&ctx);
5899 let standard_note_count = entity_notes.len() as u32;
5900 info!(
5901 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5902 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5903 );
5904 financial_reporting
5905 .notes_to_financial_statements
5906 .extend(entity_notes);
5907
5908 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5910 .debt_instruments
5911 .iter()
5912 .filter(|d| d.entity_id == company.code)
5913 .map(|d| {
5914 (
5915 format!("{:?}", d.instrument_type),
5916 d.principal,
5917 d.maturity_date.to_string(),
5918 )
5919 })
5920 .collect();
5921
5922 let hedge_count = treasury.hedge_relationships.len();
5923 let effective_hedges = treasury
5924 .hedge_relationships
5925 .iter()
5926 .filter(|h| h.is_effective)
5927 .count();
5928 let total_notional: rust_decimal::Decimal = treasury
5929 .hedging_instruments
5930 .iter()
5931 .map(|h| h.notional_amount)
5932 .sum();
5933 let total_fair_value: rust_decimal::Decimal = treasury
5934 .hedging_instruments
5935 .iter()
5936 .map(|h| h.fair_value)
5937 .sum();
5938
5939 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5941 .provisions
5942 .iter()
5943 .filter(|p| p.entity_code == company.code)
5944 .map(|p| p.id.as_str())
5945 .collect();
5946 let provision_movements: Vec<(
5947 String,
5948 rust_decimal::Decimal,
5949 rust_decimal::Decimal,
5950 rust_decimal::Decimal,
5951 )> = accounting_standards
5952 .provision_movements
5953 .iter()
5954 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5955 .map(|m| {
5956 let prov_type = accounting_standards
5957 .provisions
5958 .iter()
5959 .find(|p| p.id == m.provision_id)
5960 .map(|p| format!("{:?}", p.provision_type))
5961 .unwrap_or_else(|| "Unknown".to_string());
5962 (prov_type, m.opening, m.additions, m.closing)
5963 })
5964 .collect();
5965
5966 let enhanced_ctx = EnhancedNotesContext {
5967 entity_code: company.code.clone(),
5968 period: format!("FY{}", fiscal_year),
5969 currency: company.currency.clone(),
5970 finished_goods_value: rust_decimal::Decimal::ZERO,
5972 wip_value: rust_decimal::Decimal::ZERO,
5973 raw_materials_value: rust_decimal::Decimal::ZERO,
5974 debt_instruments,
5975 hedge_count,
5976 effective_hedges,
5977 total_notional,
5978 total_fair_value,
5979 provision_movements,
5980 };
5981
5982 let enhanced_notes =
5983 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5984 if !enhanced_notes.is_empty() {
5985 info!(
5986 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5987 company.code,
5988 enhanced_notes.len(),
5989 enhanced_ctx.debt_instruments.len(),
5990 hedge_count,
5991 enhanced_ctx.provision_movements.len(),
5992 );
5993 financial_reporting
5994 .notes_to_financial_statements
5995 .extend(enhanced_notes);
5996 }
5997 }
5998 }
5999
6000 fn build_trial_balance_from_entries(
6006 journal_entries: &[JournalEntry],
6007 coa: &ChartOfAccounts,
6008 company_code: &str,
6009 fiscal_year: u16,
6010 fiscal_period: u8,
6011 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6012 use rust_decimal::Decimal;
6013
6014 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6016 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6017
6018 for je in journal_entries {
6019 if je.header.company_code != company_code
6021 || je.header.fiscal_year != fiscal_year
6022 || je.header.fiscal_period != fiscal_period
6023 {
6024 continue;
6025 }
6026
6027 for line in &je.lines {
6028 let acct = &line.gl_account;
6029 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6030 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6031 }
6032 }
6033
6034 let mut all_accounts: Vec<&String> = account_debits
6036 .keys()
6037 .chain(account_credits.keys())
6038 .collect::<std::collections::HashSet<_>>()
6039 .into_iter()
6040 .collect();
6041 all_accounts.sort();
6042
6043 let mut entries = Vec::new();
6044
6045 for acct_number in all_accounts {
6046 let debit = account_debits
6047 .get(acct_number)
6048 .copied()
6049 .unwrap_or(Decimal::ZERO);
6050 let credit = account_credits
6051 .get(acct_number)
6052 .copied()
6053 .unwrap_or(Decimal::ZERO);
6054
6055 if debit.is_zero() && credit.is_zero() {
6056 continue;
6057 }
6058
6059 let account_name = coa
6061 .get_account(acct_number)
6062 .map(|gl| gl.short_description.clone())
6063 .unwrap_or_else(|| format!("Account {acct_number}"));
6064
6065 let category = Self::category_from_account_code(acct_number);
6070
6071 entries.push(datasynth_generators::TrialBalanceEntry {
6072 account_code: acct_number.clone(),
6073 account_name,
6074 category,
6075 debit_balance: debit,
6076 credit_balance: credit,
6077 });
6078 }
6079
6080 entries
6081 }
6082
6083 fn build_cumulative_trial_balance(
6090 journal_entries: &[JournalEntry],
6091 coa: &ChartOfAccounts,
6092 company_code: &str,
6093 start_date: NaiveDate,
6094 period_end: NaiveDate,
6095 fiscal_year: u16,
6096 fiscal_period: u8,
6097 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6098 use rust_decimal::Decimal;
6099
6100 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6102 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6103
6104 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6106 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6107
6108 for je in journal_entries {
6109 if je.header.company_code != company_code {
6110 continue;
6111 }
6112
6113 for line in &je.lines {
6114 let acct = &line.gl_account;
6115 let category = Self::category_from_account_code(acct);
6116 let is_bs_account = matches!(
6117 category.as_str(),
6118 "Cash"
6119 | "Receivables"
6120 | "Inventory"
6121 | "FixedAssets"
6122 | "Payables"
6123 | "AccruedLiabilities"
6124 | "LongTermDebt"
6125 | "Equity"
6126 );
6127
6128 if is_bs_account {
6129 if je.header.document_date <= period_end
6131 && je.header.document_date >= start_date
6132 {
6133 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6134 line.debit_amount;
6135 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6136 line.credit_amount;
6137 }
6138 } else {
6139 if je.header.fiscal_year == fiscal_year
6141 && je.header.fiscal_period == fiscal_period
6142 {
6143 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6144 line.debit_amount;
6145 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6146 line.credit_amount;
6147 }
6148 }
6149 }
6150 }
6151
6152 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6154 all_accounts.extend(bs_debits.keys().cloned());
6155 all_accounts.extend(bs_credits.keys().cloned());
6156 all_accounts.extend(is_debits.keys().cloned());
6157 all_accounts.extend(is_credits.keys().cloned());
6158
6159 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6160 sorted_accounts.sort();
6161
6162 let mut entries = Vec::new();
6163
6164 for acct_number in &sorted_accounts {
6165 let category = Self::category_from_account_code(acct_number);
6166 let is_bs_account = matches!(
6167 category.as_str(),
6168 "Cash"
6169 | "Receivables"
6170 | "Inventory"
6171 | "FixedAssets"
6172 | "Payables"
6173 | "AccruedLiabilities"
6174 | "LongTermDebt"
6175 | "Equity"
6176 );
6177
6178 let (debit, credit) = if is_bs_account {
6179 (
6180 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6181 bs_credits
6182 .get(acct_number)
6183 .copied()
6184 .unwrap_or(Decimal::ZERO),
6185 )
6186 } else {
6187 (
6188 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6189 is_credits
6190 .get(acct_number)
6191 .copied()
6192 .unwrap_or(Decimal::ZERO),
6193 )
6194 };
6195
6196 if debit.is_zero() && credit.is_zero() {
6197 continue;
6198 }
6199
6200 let account_name = coa
6201 .get_account(acct_number)
6202 .map(|gl| gl.short_description.clone())
6203 .unwrap_or_else(|| format!("Account {acct_number}"));
6204
6205 entries.push(datasynth_generators::TrialBalanceEntry {
6206 account_code: acct_number.clone(),
6207 account_name,
6208 category,
6209 debit_balance: debit,
6210 credit_balance: credit,
6211 });
6212 }
6213
6214 entries
6215 }
6216
6217 fn build_cash_flow_from_trial_balances(
6222 current_tb: &[datasynth_generators::TrialBalanceEntry],
6223 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6224 net_income: rust_decimal::Decimal,
6225 ) -> Vec<CashFlowItem> {
6226 use rust_decimal::Decimal;
6227
6228 let aggregate =
6230 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6231 let mut map: HashMap<String, Decimal> = HashMap::new();
6232 for entry in tb {
6233 let net = entry.debit_balance - entry.credit_balance;
6234 *map.entry(entry.category.clone()).or_default() += net;
6235 }
6236 map
6237 };
6238
6239 let current = aggregate(current_tb);
6240 let prior = prior_tb.map(aggregate);
6241
6242 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6244 *map.get(key).unwrap_or(&Decimal::ZERO)
6245 };
6246
6247 let change = |key: &str| -> Decimal {
6249 let curr = get(¤t, key);
6250 match &prior {
6251 Some(p) => curr - get(p, key),
6252 None => curr,
6253 }
6254 };
6255
6256 let fixed_asset_change = change("FixedAssets");
6259 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6260 -fixed_asset_change
6261 } else {
6262 Decimal::ZERO
6263 };
6264
6265 let ar_change = change("Receivables");
6267 let inventory_change = change("Inventory");
6268 let ap_change = change("Payables");
6270 let accrued_change = change("AccruedLiabilities");
6271
6272 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6273 + (-ap_change)
6274 + (-accrued_change);
6275
6276 let capex = if fixed_asset_change > Decimal::ZERO {
6278 -fixed_asset_change
6279 } else {
6280 Decimal::ZERO
6281 };
6282 let investing_cf = capex;
6283
6284 let debt_change = -change("LongTermDebt");
6286 let equity_change = -change("Equity");
6287 let financing_cf = debt_change + equity_change;
6288
6289 let net_change = operating_cf + investing_cf + financing_cf;
6290
6291 vec![
6292 CashFlowItem {
6293 item_code: "CF-NI".to_string(),
6294 label: "Net Income".to_string(),
6295 category: CashFlowCategory::Operating,
6296 amount: net_income,
6297 amount_prior: None,
6298 sort_order: 1,
6299 is_total: false,
6300 },
6301 CashFlowItem {
6302 item_code: "CF-DEP".to_string(),
6303 label: "Depreciation & Amortization".to_string(),
6304 category: CashFlowCategory::Operating,
6305 amount: depreciation_addback,
6306 amount_prior: None,
6307 sort_order: 2,
6308 is_total: false,
6309 },
6310 CashFlowItem {
6311 item_code: "CF-AR".to_string(),
6312 label: "Change in Accounts Receivable".to_string(),
6313 category: CashFlowCategory::Operating,
6314 amount: -ar_change,
6315 amount_prior: None,
6316 sort_order: 3,
6317 is_total: false,
6318 },
6319 CashFlowItem {
6320 item_code: "CF-AP".to_string(),
6321 label: "Change in Accounts Payable".to_string(),
6322 category: CashFlowCategory::Operating,
6323 amount: -ap_change,
6324 amount_prior: None,
6325 sort_order: 4,
6326 is_total: false,
6327 },
6328 CashFlowItem {
6329 item_code: "CF-INV".to_string(),
6330 label: "Change in Inventory".to_string(),
6331 category: CashFlowCategory::Operating,
6332 amount: -inventory_change,
6333 amount_prior: None,
6334 sort_order: 5,
6335 is_total: false,
6336 },
6337 CashFlowItem {
6338 item_code: "CF-OP".to_string(),
6339 label: "Net Cash from Operating Activities".to_string(),
6340 category: CashFlowCategory::Operating,
6341 amount: operating_cf,
6342 amount_prior: None,
6343 sort_order: 6,
6344 is_total: true,
6345 },
6346 CashFlowItem {
6347 item_code: "CF-CAPEX".to_string(),
6348 label: "Capital Expenditures".to_string(),
6349 category: CashFlowCategory::Investing,
6350 amount: capex,
6351 amount_prior: None,
6352 sort_order: 7,
6353 is_total: false,
6354 },
6355 CashFlowItem {
6356 item_code: "CF-INV-T".to_string(),
6357 label: "Net Cash from Investing Activities".to_string(),
6358 category: CashFlowCategory::Investing,
6359 amount: investing_cf,
6360 amount_prior: None,
6361 sort_order: 8,
6362 is_total: true,
6363 },
6364 CashFlowItem {
6365 item_code: "CF-DEBT".to_string(),
6366 label: "Net Borrowings / (Repayments)".to_string(),
6367 category: CashFlowCategory::Financing,
6368 amount: debt_change,
6369 amount_prior: None,
6370 sort_order: 9,
6371 is_total: false,
6372 },
6373 CashFlowItem {
6374 item_code: "CF-EQ".to_string(),
6375 label: "Equity Changes".to_string(),
6376 category: CashFlowCategory::Financing,
6377 amount: equity_change,
6378 amount_prior: None,
6379 sort_order: 10,
6380 is_total: false,
6381 },
6382 CashFlowItem {
6383 item_code: "CF-FIN-T".to_string(),
6384 label: "Net Cash from Financing Activities".to_string(),
6385 category: CashFlowCategory::Financing,
6386 amount: financing_cf,
6387 amount_prior: None,
6388 sort_order: 11,
6389 is_total: true,
6390 },
6391 CashFlowItem {
6392 item_code: "CF-NET".to_string(),
6393 label: "Net Change in Cash".to_string(),
6394 category: CashFlowCategory::Operating,
6395 amount: net_change,
6396 amount_prior: None,
6397 sort_order: 12,
6398 is_total: true,
6399 },
6400 ]
6401 }
6402
6403 fn calculate_net_income_from_tb(
6407 tb: &[datasynth_generators::TrialBalanceEntry],
6408 ) -> rust_decimal::Decimal {
6409 use rust_decimal::Decimal;
6410
6411 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6412 for entry in tb {
6413 let net = entry.debit_balance - entry.credit_balance;
6414 *aggregated.entry(entry.category.clone()).or_default() += net;
6415 }
6416
6417 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6418 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6419 let opex = *aggregated
6420 .get("OperatingExpenses")
6421 .unwrap_or(&Decimal::ZERO);
6422 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6423 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6424
6425 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6428 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6430 operating_income - tax
6431 }
6432
6433 fn category_from_account_code(code: &str) -> String {
6440 let prefix: String = code.chars().take(2).collect();
6441 match prefix.as_str() {
6442 "10" => "Cash",
6443 "11" => "Receivables",
6444 "12" | "13" | "14" => "Inventory",
6445 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6446 "20" => "Payables",
6447 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6448 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6449 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6450 "40" | "41" | "42" | "43" | "44" => "Revenue",
6451 "50" | "51" | "52" => "CostOfSales",
6452 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6453 "OperatingExpenses"
6454 }
6455 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6456 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6457 _ => "OperatingExpenses",
6458 }
6459 .to_string()
6460 }
6461
6462 fn phase_hr_data(
6464 &mut self,
6465 stats: &mut EnhancedGenerationStatistics,
6466 ) -> SynthResult<HrSnapshot> {
6467 if !self.phase_config.generate_hr {
6468 debug!("Phase 16: Skipped (HR generation disabled)");
6469 return Ok(HrSnapshot::default());
6470 }
6471
6472 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6473
6474 let seed = self.seed;
6475 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6476 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6477 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6478 let company_code = self
6479 .config
6480 .companies
6481 .first()
6482 .map(|c| c.code.as_str())
6483 .unwrap_or("1000");
6484 let currency = self
6485 .config
6486 .companies
6487 .first()
6488 .map(|c| c.currency.as_str())
6489 .unwrap_or("USD");
6490
6491 let employee_ids: Vec<String> = self
6492 .master_data
6493 .employees
6494 .iter()
6495 .map(|e| e.employee_id.clone())
6496 .collect();
6497
6498 if employee_ids.is_empty() {
6499 debug!("Phase 16: Skipped (no employees available)");
6500 return Ok(HrSnapshot::default());
6501 }
6502
6503 let cost_center_ids: Vec<String> = self
6506 .master_data
6507 .employees
6508 .iter()
6509 .filter_map(|e| e.cost_center.clone())
6510 .collect::<std::collections::HashSet<_>>()
6511 .into_iter()
6512 .collect();
6513
6514 let mut snapshot = HrSnapshot::default();
6515
6516 if self.config.hr.payroll.enabled {
6518 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6519 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6520
6521 let payroll_pack = self.primary_pack();
6523
6524 payroll_gen.set_country_pack(payroll_pack.clone());
6527
6528 let employees_with_salary: Vec<(
6529 String,
6530 rust_decimal::Decimal,
6531 Option<String>,
6532 Option<String>,
6533 )> = self
6534 .master_data
6535 .employees
6536 .iter()
6537 .map(|e| {
6538 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6541 e.base_salary
6542 } else {
6543 rust_decimal::Decimal::from(60_000)
6544 };
6545 (
6546 e.employee_id.clone(),
6547 annual, e.cost_center.clone(),
6549 e.department_id.clone(),
6550 )
6551 })
6552 .collect();
6553
6554 let change_history = &self.master_data.employee_change_history;
6557 let has_changes = !change_history.is_empty();
6558 if has_changes {
6559 debug!(
6560 "Payroll will incorporate {} employee change events",
6561 change_history.len()
6562 );
6563 }
6564
6565 for month in 0..self.config.global.period_months {
6566 let period_start = start_date + chrono::Months::new(month);
6567 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6568 let (run, items) = if has_changes {
6569 payroll_gen.generate_with_changes(
6570 company_code,
6571 &employees_with_salary,
6572 period_start,
6573 period_end,
6574 currency,
6575 change_history,
6576 )
6577 } else {
6578 payroll_gen.generate(
6579 company_code,
6580 &employees_with_salary,
6581 period_start,
6582 period_end,
6583 currency,
6584 )
6585 };
6586 snapshot.payroll_runs.push(run);
6587 snapshot.payroll_run_count += 1;
6588 snapshot.payroll_line_item_count += items.len();
6589 snapshot.payroll_line_items.extend(items);
6590 }
6591 }
6592
6593 if self.config.hr.time_attendance.enabled {
6595 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6596 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6597 let entries = time_gen.generate(
6598 &employee_ids,
6599 start_date,
6600 end_date,
6601 &self.config.hr.time_attendance,
6602 );
6603 snapshot.time_entry_count = entries.len();
6604 snapshot.time_entries = entries;
6605 }
6606
6607 if self.config.hr.expenses.enabled {
6609 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6610 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6611 expense_gen.set_country_pack(self.primary_pack().clone());
6612 let company_currency = self
6613 .config
6614 .companies
6615 .first()
6616 .map(|c| c.currency.as_str())
6617 .unwrap_or("USD");
6618 let reports = expense_gen.generate_with_currency(
6619 &employee_ids,
6620 start_date,
6621 end_date,
6622 &self.config.hr.expenses,
6623 company_currency,
6624 );
6625 snapshot.expense_report_count = reports.len();
6626 snapshot.expense_reports = reports;
6627 }
6628
6629 if self.config.hr.payroll.enabled {
6631 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6632 let employee_pairs: Vec<(String, String)> = self
6633 .master_data
6634 .employees
6635 .iter()
6636 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6637 .collect();
6638 let enrollments =
6639 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6640 snapshot.benefit_enrollment_count = enrollments.len();
6641 snapshot.benefit_enrollments = enrollments;
6642 }
6643
6644 if self.phase_config.generate_hr {
6646 let entity_name = self
6647 .config
6648 .companies
6649 .first()
6650 .map(|c| c.name.as_str())
6651 .unwrap_or("Entity");
6652 let period_months = self.config.global.period_months;
6653 let period_label = {
6654 let y = start_date.year();
6655 let m = start_date.month();
6656 if period_months >= 12 {
6657 format!("FY{y}")
6658 } else {
6659 format!("{y}-{m:02}")
6660 }
6661 };
6662 let reporting_date =
6663 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6664
6665 let avg_salary: Option<rust_decimal::Decimal> = {
6670 let employee_count = employee_ids.len();
6671 if self.config.hr.payroll.enabled
6672 && employee_count > 0
6673 && !snapshot.payroll_runs.is_empty()
6674 {
6675 let total_gross: rust_decimal::Decimal = snapshot
6677 .payroll_runs
6678 .iter()
6679 .filter(|r| r.company_code == company_code)
6680 .map(|r| r.total_gross)
6681 .sum();
6682 if total_gross > rust_decimal::Decimal::ZERO {
6683 let annual_total = if period_months > 0 && period_months < 12 {
6685 total_gross * rust_decimal::Decimal::from(12u32)
6686 / rust_decimal::Decimal::from(period_months)
6687 } else {
6688 total_gross
6689 };
6690 Some(
6691 (annual_total / rust_decimal::Decimal::from(employee_count))
6692 .round_dp(2),
6693 )
6694 } else {
6695 None
6696 }
6697 } else {
6698 None
6699 }
6700 };
6701
6702 let mut pension_gen =
6703 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6704 let pension_snap = pension_gen.generate(
6705 company_code,
6706 entity_name,
6707 &period_label,
6708 reporting_date,
6709 employee_ids.len(),
6710 currency,
6711 avg_salary,
6712 period_months,
6713 );
6714 snapshot.pension_plan_count = pension_snap.plans.len();
6715 snapshot.pension_plans = pension_snap.plans;
6716 snapshot.pension_obligations = pension_snap.obligations;
6717 snapshot.pension_plan_assets = pension_snap.plan_assets;
6718 snapshot.pension_disclosures = pension_snap.disclosures;
6719 snapshot.pension_journal_entries = pension_snap.journal_entries;
6724 }
6725
6726 if self.phase_config.generate_hr && !employee_ids.is_empty() {
6728 let period_months = self.config.global.period_months;
6729 let period_label = {
6730 let y = start_date.year();
6731 let m = start_date.month();
6732 if period_months >= 12 {
6733 format!("FY{y}")
6734 } else {
6735 format!("{y}-{m:02}")
6736 }
6737 };
6738 let reporting_date =
6739 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6740
6741 let mut stock_comp_gen =
6742 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6743 let stock_snap = stock_comp_gen.generate(
6744 company_code,
6745 &employee_ids,
6746 start_date,
6747 &period_label,
6748 reporting_date,
6749 currency,
6750 );
6751 snapshot.stock_grant_count = stock_snap.grants.len();
6752 snapshot.stock_grants = stock_snap.grants;
6753 snapshot.stock_comp_expenses = stock_snap.expenses;
6754 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6755 }
6756
6757 stats.payroll_run_count = snapshot.payroll_run_count;
6758 stats.time_entry_count = snapshot.time_entry_count;
6759 stats.expense_report_count = snapshot.expense_report_count;
6760 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6761 stats.pension_plan_count = snapshot.pension_plan_count;
6762 stats.stock_grant_count = snapshot.stock_grant_count;
6763
6764 info!(
6765 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6766 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6767 snapshot.time_entry_count, snapshot.expense_report_count,
6768 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6769 snapshot.stock_grant_count
6770 );
6771 self.check_resources_with_log("post-hr")?;
6772
6773 Ok(snapshot)
6774 }
6775
6776 fn phase_accounting_standards(
6778 &mut self,
6779 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6780 journal_entries: &[JournalEntry],
6781 stats: &mut EnhancedGenerationStatistics,
6782 ) -> SynthResult<AccountingStandardsSnapshot> {
6783 if !self.phase_config.generate_accounting_standards {
6784 debug!("Phase 17: Skipped (accounting standards generation disabled)");
6785 return Ok(AccountingStandardsSnapshot::default());
6786 }
6787 info!("Phase 17: Generating Accounting Standards Data");
6788
6789 let seed = self.seed;
6790 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6791 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6792 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6793 let company_code = self
6794 .config
6795 .companies
6796 .first()
6797 .map(|c| c.code.as_str())
6798 .unwrap_or("1000");
6799 let currency = self
6800 .config
6801 .companies
6802 .first()
6803 .map(|c| c.currency.as_str())
6804 .unwrap_or("USD");
6805
6806 let framework = match self.config.accounting_standards.framework {
6811 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6812 datasynth_standards::framework::AccountingFramework::UsGaap
6813 }
6814 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6815 datasynth_standards::framework::AccountingFramework::Ifrs
6816 }
6817 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6818 datasynth_standards::framework::AccountingFramework::DualReporting
6819 }
6820 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6821 datasynth_standards::framework::AccountingFramework::FrenchGaap
6822 }
6823 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6824 datasynth_standards::framework::AccountingFramework::GermanGaap
6825 }
6826 None => {
6827 let pack = self.primary_pack();
6829 let pack_fw = pack.accounting.framework.as_str();
6830 match pack_fw {
6831 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6832 "dual_reporting" => {
6833 datasynth_standards::framework::AccountingFramework::DualReporting
6834 }
6835 "french_gaap" => {
6836 datasynth_standards::framework::AccountingFramework::FrenchGaap
6837 }
6838 "german_gaap" | "hgb" => {
6839 datasynth_standards::framework::AccountingFramework::GermanGaap
6840 }
6841 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6843 }
6844 }
6845 };
6846
6847 let mut snapshot = AccountingStandardsSnapshot::default();
6848
6849 if self.config.accounting_standards.revenue_recognition.enabled {
6851 let customer_ids: Vec<String> = self
6852 .master_data
6853 .customers
6854 .iter()
6855 .map(|c| c.customer_id.clone())
6856 .collect();
6857
6858 if !customer_ids.is_empty() {
6859 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6860 let contracts = rev_gen.generate(
6861 company_code,
6862 &customer_ids,
6863 start_date,
6864 end_date,
6865 currency,
6866 &self.config.accounting_standards.revenue_recognition,
6867 framework,
6868 );
6869 snapshot.revenue_contract_count = contracts.len();
6870 snapshot.contracts = contracts;
6871 }
6872 }
6873
6874 if self.config.accounting_standards.impairment.enabled {
6876 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6877 .master_data
6878 .assets
6879 .iter()
6880 .map(|a| {
6881 (
6882 a.asset_id.clone(),
6883 a.description.clone(),
6884 a.acquisition_cost,
6885 )
6886 })
6887 .collect();
6888
6889 if !asset_data.is_empty() {
6890 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6891 let tests = imp_gen.generate(
6892 company_code,
6893 &asset_data,
6894 end_date,
6895 &self.config.accounting_standards.impairment,
6896 framework,
6897 );
6898 snapshot.impairment_test_count = tests.len();
6899 snapshot.impairment_tests = tests;
6900 }
6901 }
6902
6903 if self
6905 .config
6906 .accounting_standards
6907 .business_combinations
6908 .enabled
6909 {
6910 let bc_config = &self.config.accounting_standards.business_combinations;
6911 let framework_str = match framework {
6912 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6913 _ => "US_GAAP",
6914 };
6915 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6916 let bc_snap = bc_gen.generate(
6917 company_code,
6918 currency,
6919 start_date,
6920 end_date,
6921 bc_config.acquisition_count,
6922 framework_str,
6923 );
6924 snapshot.business_combination_count = bc_snap.combinations.len();
6925 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6926 snapshot.business_combinations = bc_snap.combinations;
6927 }
6928
6929 if self
6931 .config
6932 .accounting_standards
6933 .expected_credit_loss
6934 .enabled
6935 {
6936 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6937 let framework_str = match framework {
6938 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6939 _ => "ASC_326",
6940 };
6941
6942 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6945
6946 let mut ecl_gen = EclGenerator::new(seed + 43);
6947
6948 let bucket_exposures: Vec<(
6950 datasynth_core::models::subledger::ar::AgingBucket,
6951 rust_decimal::Decimal,
6952 )> = if ar_aging_reports.is_empty() {
6953 use datasynth_core::models::subledger::ar::AgingBucket;
6955 vec![
6956 (
6957 AgingBucket::Current,
6958 rust_decimal::Decimal::from(500_000_u32),
6959 ),
6960 (
6961 AgingBucket::Days1To30,
6962 rust_decimal::Decimal::from(120_000_u32),
6963 ),
6964 (
6965 AgingBucket::Days31To60,
6966 rust_decimal::Decimal::from(45_000_u32),
6967 ),
6968 (
6969 AgingBucket::Days61To90,
6970 rust_decimal::Decimal::from(15_000_u32),
6971 ),
6972 (
6973 AgingBucket::Over90Days,
6974 rust_decimal::Decimal::from(8_000_u32),
6975 ),
6976 ]
6977 } else {
6978 use datasynth_core::models::subledger::ar::AgingBucket;
6979 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6981 std::collections::HashMap::new();
6982 for report in ar_aging_reports {
6983 for (bucket, amount) in &report.bucket_totals {
6984 *totals.entry(*bucket).or_default() += amount;
6985 }
6986 }
6987 AgingBucket::all()
6988 .into_iter()
6989 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6990 .collect()
6991 };
6992
6993 let ecl_snap = ecl_gen.generate(
6994 company_code,
6995 end_date,
6996 &bucket_exposures,
6997 ecl_config,
6998 &period_label,
6999 framework_str,
7000 );
7001
7002 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7003 snapshot.ecl_models = ecl_snap.ecl_models;
7004 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7005 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7006 }
7007
7008 {
7010 let framework_str = match framework {
7011 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7012 _ => "US_GAAP",
7013 };
7014
7015 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7020 .max(rust_decimal::Decimal::from(100_000_u32));
7021
7022 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7023
7024 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7025 let prov_snap = prov_gen.generate(
7026 company_code,
7027 currency,
7028 revenue_proxy,
7029 end_date,
7030 &period_label,
7031 framework_str,
7032 None, );
7034
7035 snapshot.provision_count = prov_snap.provisions.len();
7036 snapshot.provisions = prov_snap.provisions;
7037 snapshot.provision_movements = prov_snap.movements;
7038 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7039 snapshot.provision_journal_entries = prov_snap.journal_entries;
7040 }
7041
7042 {
7046 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7047
7048 let presentation_currency = self
7049 .config
7050 .global
7051 .presentation_currency
7052 .clone()
7053 .unwrap_or_else(|| self.config.global.group_currency.clone());
7054
7055 let mut rate_table = FxRateTable::new(&presentation_currency);
7058
7059 let base_rates = base_rates_usd();
7063 for (ccy, rate) in &base_rates {
7064 rate_table.add_rate(FxRate::new(
7065 ccy,
7066 "USD",
7067 RateType::Closing,
7068 end_date,
7069 *rate,
7070 "SYNTHETIC",
7071 ));
7072 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7075 rate_table.add_rate(FxRate::new(
7076 ccy,
7077 "USD",
7078 RateType::Average,
7079 end_date,
7080 avg,
7081 "SYNTHETIC",
7082 ));
7083 }
7084
7085 let mut translation_results = Vec::new();
7086 for company in &self.config.companies {
7087 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7090 .max(rust_decimal::Decimal::from(100_000_u32));
7091
7092 let func_ccy = company
7093 .functional_currency
7094 .clone()
7095 .unwrap_or_else(|| company.currency.clone());
7096
7097 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7098 &company.code,
7099 &func_ccy,
7100 &presentation_currency,
7101 &ias21_period_label,
7102 end_date,
7103 company_revenue,
7104 &rate_table,
7105 );
7106 translation_results.push(result);
7107 }
7108
7109 snapshot.currency_translation_count = translation_results.len();
7110 snapshot.currency_translation_results = translation_results;
7111 }
7112
7113 stats.revenue_contract_count = snapshot.revenue_contract_count;
7114 stats.impairment_test_count = snapshot.impairment_test_count;
7115 stats.business_combination_count = snapshot.business_combination_count;
7116 stats.ecl_model_count = snapshot.ecl_model_count;
7117 stats.provision_count = snapshot.provision_count;
7118
7119 info!(
7120 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
7121 snapshot.revenue_contract_count,
7122 snapshot.impairment_test_count,
7123 snapshot.business_combination_count,
7124 snapshot.ecl_model_count,
7125 snapshot.provision_count,
7126 snapshot.currency_translation_count
7127 );
7128 self.check_resources_with_log("post-accounting-standards")?;
7129
7130 Ok(snapshot)
7131 }
7132
7133 fn phase_manufacturing(
7135 &mut self,
7136 stats: &mut EnhancedGenerationStatistics,
7137 ) -> SynthResult<ManufacturingSnapshot> {
7138 if !self.phase_config.generate_manufacturing {
7139 debug!("Phase 18: Skipped (manufacturing generation disabled)");
7140 return Ok(ManufacturingSnapshot::default());
7141 }
7142 info!("Phase 18: Generating Manufacturing Data");
7143
7144 let seed = self.seed;
7145 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7146 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7147 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7148 let company_code = self
7149 .config
7150 .companies
7151 .first()
7152 .map(|c| c.code.as_str())
7153 .unwrap_or("1000");
7154
7155 let material_data: Vec<(String, String)> = self
7156 .master_data
7157 .materials
7158 .iter()
7159 .map(|m| (m.material_id.clone(), m.description.clone()))
7160 .collect();
7161
7162 if material_data.is_empty() {
7163 debug!("Phase 18: Skipped (no materials available)");
7164 return Ok(ManufacturingSnapshot::default());
7165 }
7166
7167 let mut snapshot = ManufacturingSnapshot::default();
7168
7169 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7171 let production_orders = prod_gen.generate(
7172 company_code,
7173 &material_data,
7174 start_date,
7175 end_date,
7176 &self.config.manufacturing.production_orders,
7177 &self.config.manufacturing.costing,
7178 &self.config.manufacturing.routing,
7179 );
7180 snapshot.production_order_count = production_orders.len();
7181
7182 let inspection_data: Vec<(String, String, String)> = production_orders
7184 .iter()
7185 .map(|po| {
7186 (
7187 po.order_id.clone(),
7188 po.material_id.clone(),
7189 po.material_description.clone(),
7190 )
7191 })
7192 .collect();
7193
7194 snapshot.production_orders = production_orders;
7195
7196 if !inspection_data.is_empty() {
7197 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7198 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7199 snapshot.quality_inspection_count = inspections.len();
7200 snapshot.quality_inspections = inspections;
7201 }
7202
7203 let storage_locations: Vec<(String, String)> = material_data
7205 .iter()
7206 .enumerate()
7207 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7208 .collect();
7209
7210 let employee_ids: Vec<String> = self
7211 .master_data
7212 .employees
7213 .iter()
7214 .map(|e| e.employee_id.clone())
7215 .collect();
7216 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7217 .with_employee_pool(employee_ids);
7218 let mut cycle_count_total = 0usize;
7219 for month in 0..self.config.global.period_months {
7220 let count_date = start_date + chrono::Months::new(month);
7221 let items_per_count = storage_locations.len().clamp(10, 50);
7222 let cc = cc_gen.generate(
7223 company_code,
7224 &storage_locations,
7225 count_date,
7226 items_per_count,
7227 );
7228 snapshot.cycle_counts.push(cc);
7229 cycle_count_total += 1;
7230 }
7231 snapshot.cycle_count_count = cycle_count_total;
7232
7233 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7235 let bom_components = bom_gen.generate(company_code, &material_data);
7236 snapshot.bom_component_count = bom_components.len();
7237 snapshot.bom_components = bom_components;
7238
7239 let currency = self
7241 .config
7242 .companies
7243 .first()
7244 .map(|c| c.currency.as_str())
7245 .unwrap_or("USD");
7246 let production_order_ids: Vec<String> = snapshot
7247 .production_orders
7248 .iter()
7249 .map(|po| po.order_id.clone())
7250 .collect();
7251 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7252 let inventory_movements = inv_mov_gen.generate_with_production_orders(
7253 company_code,
7254 &material_data,
7255 start_date,
7256 end_date,
7257 2,
7258 currency,
7259 &production_order_ids,
7260 );
7261 snapshot.inventory_movement_count = inventory_movements.len();
7262 snapshot.inventory_movements = inventory_movements;
7263
7264 stats.production_order_count = snapshot.production_order_count;
7265 stats.quality_inspection_count = snapshot.quality_inspection_count;
7266 stats.cycle_count_count = snapshot.cycle_count_count;
7267 stats.bom_component_count = snapshot.bom_component_count;
7268 stats.inventory_movement_count = snapshot.inventory_movement_count;
7269
7270 info!(
7271 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7272 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7273 snapshot.bom_component_count, snapshot.inventory_movement_count
7274 );
7275 self.check_resources_with_log("post-manufacturing")?;
7276
7277 Ok(snapshot)
7278 }
7279
7280 fn phase_sales_kpi_budgets(
7282 &mut self,
7283 coa: &Arc<ChartOfAccounts>,
7284 financial_reporting: &FinancialReportingSnapshot,
7285 stats: &mut EnhancedGenerationStatistics,
7286 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7287 if !self.phase_config.generate_sales_kpi_budgets {
7288 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7289 return Ok(SalesKpiBudgetsSnapshot::default());
7290 }
7291 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7292
7293 let seed = self.seed;
7294 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7295 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7296 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7297 let company_code = self
7298 .config
7299 .companies
7300 .first()
7301 .map(|c| c.code.as_str())
7302 .unwrap_or("1000");
7303
7304 let mut snapshot = SalesKpiBudgetsSnapshot::default();
7305
7306 if self.config.sales_quotes.enabled {
7308 let customer_data: Vec<(String, String)> = self
7309 .master_data
7310 .customers
7311 .iter()
7312 .map(|c| (c.customer_id.clone(), c.name.clone()))
7313 .collect();
7314 let material_data: Vec<(String, String)> = self
7315 .master_data
7316 .materials
7317 .iter()
7318 .map(|m| (m.material_id.clone(), m.description.clone()))
7319 .collect();
7320
7321 if !customer_data.is_empty() && !material_data.is_empty() {
7322 let employee_ids: Vec<String> = self
7323 .master_data
7324 .employees
7325 .iter()
7326 .map(|e| e.employee_id.clone())
7327 .collect();
7328 let customer_ids: Vec<String> = self
7329 .master_data
7330 .customers
7331 .iter()
7332 .map(|c| c.customer_id.clone())
7333 .collect();
7334 let company_currency = self
7335 .config
7336 .companies
7337 .first()
7338 .map(|c| c.currency.as_str())
7339 .unwrap_or("USD");
7340
7341 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7342 .with_pools(employee_ids, customer_ids);
7343 let quotes = quote_gen.generate_with_currency(
7344 company_code,
7345 &customer_data,
7346 &material_data,
7347 start_date,
7348 end_date,
7349 &self.config.sales_quotes,
7350 company_currency,
7351 );
7352 snapshot.sales_quote_count = quotes.len();
7353 snapshot.sales_quotes = quotes;
7354 }
7355 }
7356
7357 if self.config.financial_reporting.management_kpis.enabled {
7359 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7360 let mut kpis = kpi_gen.generate(
7361 company_code,
7362 start_date,
7363 end_date,
7364 &self.config.financial_reporting.management_kpis,
7365 );
7366
7367 {
7369 use rust_decimal::Decimal;
7370
7371 if let Some(income_stmt) =
7372 financial_reporting.financial_statements.iter().find(|fs| {
7373 fs.statement_type == StatementType::IncomeStatement
7374 && fs.company_code == company_code
7375 })
7376 {
7377 let total_revenue: Decimal = income_stmt
7379 .line_items
7380 .iter()
7381 .filter(|li| li.section.contains("Revenue") && !li.is_total)
7382 .map(|li| li.amount)
7383 .sum();
7384 let total_cogs: Decimal = income_stmt
7385 .line_items
7386 .iter()
7387 .filter(|li| {
7388 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7389 && !li.is_total
7390 })
7391 .map(|li| li.amount.abs())
7392 .sum();
7393 let total_opex: Decimal = income_stmt
7394 .line_items
7395 .iter()
7396 .filter(|li| {
7397 li.section.contains("Expense")
7398 && !li.is_total
7399 && !li.section.contains("Cost")
7400 })
7401 .map(|li| li.amount.abs())
7402 .sum();
7403
7404 if total_revenue > Decimal::ZERO {
7405 let hundred = Decimal::from(100);
7406 let gross_margin_pct =
7407 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7408 let operating_income = total_revenue - total_cogs - total_opex;
7409 let op_margin_pct =
7410 (operating_income * hundred / total_revenue).round_dp(2);
7411
7412 for kpi in &mut kpis {
7414 if kpi.name == "Gross Margin" {
7415 kpi.value = gross_margin_pct;
7416 } else if kpi.name == "Operating Margin" {
7417 kpi.value = op_margin_pct;
7418 }
7419 }
7420 }
7421 }
7422
7423 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7425 fs.statement_type == StatementType::BalanceSheet
7426 && fs.company_code == company_code
7427 }) {
7428 let current_assets: Decimal = bs
7429 .line_items
7430 .iter()
7431 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7432 .map(|li| li.amount)
7433 .sum();
7434 let current_liabilities: Decimal = bs
7435 .line_items
7436 .iter()
7437 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7438 .map(|li| li.amount.abs())
7439 .sum();
7440
7441 if current_liabilities > Decimal::ZERO {
7442 let current_ratio = (current_assets / current_liabilities).round_dp(2);
7443 for kpi in &mut kpis {
7444 if kpi.name == "Current Ratio" {
7445 kpi.value = current_ratio;
7446 }
7447 }
7448 }
7449 }
7450 }
7451
7452 snapshot.kpi_count = kpis.len();
7453 snapshot.kpis = kpis;
7454 }
7455
7456 if self.config.financial_reporting.budgets.enabled {
7458 let account_data: Vec<(String, String)> = coa
7459 .accounts
7460 .iter()
7461 .map(|a| (a.account_number.clone(), a.short_description.clone()))
7462 .collect();
7463
7464 if !account_data.is_empty() {
7465 let fiscal_year = start_date.year() as u32;
7466 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7467 let budget = budget_gen.generate(
7468 company_code,
7469 fiscal_year,
7470 &account_data,
7471 &self.config.financial_reporting.budgets,
7472 );
7473 snapshot.budget_line_count = budget.line_items.len();
7474 snapshot.budgets.push(budget);
7475 }
7476 }
7477
7478 stats.sales_quote_count = snapshot.sales_quote_count;
7479 stats.kpi_count = snapshot.kpi_count;
7480 stats.budget_line_count = snapshot.budget_line_count;
7481
7482 info!(
7483 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7484 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7485 );
7486 self.check_resources_with_log("post-sales-kpi-budgets")?;
7487
7488 Ok(snapshot)
7489 }
7490
7491 fn compute_pre_tax_income(
7498 company_code: &str,
7499 journal_entries: &[JournalEntry],
7500 ) -> rust_decimal::Decimal {
7501 use datasynth_core::accounts::AccountCategory;
7502 use rust_decimal::Decimal;
7503
7504 let mut total_revenue = Decimal::ZERO;
7505 let mut total_expenses = Decimal::ZERO;
7506
7507 for je in journal_entries {
7508 if je.header.company_code != company_code {
7509 continue;
7510 }
7511 for line in &je.lines {
7512 let cat = AccountCategory::from_account(&line.gl_account);
7513 match cat {
7514 AccountCategory::Revenue => {
7515 total_revenue += line.credit_amount - line.debit_amount;
7516 }
7517 AccountCategory::Cogs
7518 | AccountCategory::OperatingExpense
7519 | AccountCategory::OtherIncomeExpense => {
7520 total_expenses += line.debit_amount - line.credit_amount;
7521 }
7522 _ => {}
7523 }
7524 }
7525 }
7526
7527 let pti = (total_revenue - total_expenses).round_dp(2);
7528 if pti == rust_decimal::Decimal::ZERO {
7529 rust_decimal::Decimal::from(1_000_000u32)
7532 } else {
7533 pti
7534 }
7535 }
7536
7537 fn phase_tax_generation(
7539 &mut self,
7540 document_flows: &DocumentFlowSnapshot,
7541 journal_entries: &[JournalEntry],
7542 stats: &mut EnhancedGenerationStatistics,
7543 ) -> SynthResult<TaxSnapshot> {
7544 if !self.phase_config.generate_tax {
7545 debug!("Phase 20: Skipped (tax generation disabled)");
7546 return Ok(TaxSnapshot::default());
7547 }
7548 info!("Phase 20: Generating Tax Data");
7549
7550 let seed = self.seed;
7551 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7552 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7553 let fiscal_year = start_date.year();
7554 let company_code = self
7555 .config
7556 .companies
7557 .first()
7558 .map(|c| c.code.as_str())
7559 .unwrap_or("1000");
7560
7561 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7562 seed + 370,
7563 self.config.tax.clone(),
7564 );
7565
7566 let pack = self.primary_pack().clone();
7567 let (jurisdictions, codes) =
7568 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7569
7570 let mut provisions = Vec::new();
7572 if self.config.tax.provisions.enabled {
7573 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7574 for company in &self.config.companies {
7575 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7576 let statutory_rate = rust_decimal::Decimal::new(
7577 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7578 2,
7579 );
7580 let provision = provision_gen.generate(
7581 &company.code,
7582 start_date,
7583 pre_tax_income,
7584 statutory_rate,
7585 );
7586 provisions.push(provision);
7587 }
7588 }
7589
7590 let mut tax_lines = Vec::new();
7592 if !codes.is_empty() {
7593 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7594 datasynth_generators::TaxLineGeneratorConfig::default(),
7595 codes.clone(),
7596 seed + 372,
7597 );
7598
7599 let buyer_country = self
7602 .config
7603 .companies
7604 .first()
7605 .map(|c| c.country.as_str())
7606 .unwrap_or("US");
7607 for vi in &document_flows.vendor_invoices {
7608 let lines = tax_line_gen.generate_for_document(
7609 datasynth_core::models::TaxableDocumentType::VendorInvoice,
7610 &vi.header.document_id,
7611 buyer_country, buyer_country,
7613 vi.payable_amount,
7614 vi.header.document_date,
7615 None,
7616 );
7617 tax_lines.extend(lines);
7618 }
7619
7620 for ci in &document_flows.customer_invoices {
7622 let lines = tax_line_gen.generate_for_document(
7623 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7624 &ci.header.document_id,
7625 buyer_country, buyer_country,
7627 ci.total_gross_amount,
7628 ci.header.document_date,
7629 None,
7630 );
7631 tax_lines.extend(lines);
7632 }
7633 }
7634
7635 let deferred_tax = {
7637 let companies: Vec<(&str, &str)> = self
7638 .config
7639 .companies
7640 .iter()
7641 .map(|c| (c.code.as_str(), c.country.as_str()))
7642 .collect();
7643 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7644 deferred_gen.generate(&companies, start_date, journal_entries)
7645 };
7646
7647 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7650 std::collections::HashMap::new();
7651 for vi in &document_flows.vendor_invoices {
7652 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7653 }
7654 for ci in &document_flows.customer_invoices {
7655 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7656 }
7657
7658 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7660 let tax_posting_journal_entries = if !tax_lines.is_empty() {
7661 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7662 &tax_lines,
7663 company_code,
7664 &doc_dates,
7665 end_date,
7666 );
7667 debug!("Generated {} tax posting JEs", jes.len());
7668 jes
7669 } else {
7670 Vec::new()
7671 };
7672
7673 let snapshot = TaxSnapshot {
7674 jurisdiction_count: jurisdictions.len(),
7675 code_count: codes.len(),
7676 jurisdictions,
7677 codes,
7678 tax_provisions: provisions,
7679 tax_lines,
7680 tax_returns: Vec::new(),
7681 withholding_records: Vec::new(),
7682 tax_anomaly_labels: Vec::new(),
7683 deferred_tax,
7684 tax_posting_journal_entries,
7685 };
7686
7687 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7688 stats.tax_code_count = snapshot.code_count;
7689 stats.tax_provision_count = snapshot.tax_provisions.len();
7690 stats.tax_line_count = snapshot.tax_lines.len();
7691
7692 info!(
7693 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7694 snapshot.jurisdiction_count,
7695 snapshot.code_count,
7696 snapshot.tax_provisions.len(),
7697 snapshot.deferred_tax.temporary_differences.len(),
7698 snapshot.deferred_tax.journal_entries.len(),
7699 snapshot.tax_posting_journal_entries.len(),
7700 );
7701 self.check_resources_with_log("post-tax")?;
7702
7703 Ok(snapshot)
7704 }
7705
7706 fn phase_esg_generation(
7708 &mut self,
7709 document_flows: &DocumentFlowSnapshot,
7710 manufacturing: &ManufacturingSnapshot,
7711 stats: &mut EnhancedGenerationStatistics,
7712 ) -> SynthResult<EsgSnapshot> {
7713 if !self.phase_config.generate_esg {
7714 debug!("Phase 21: Skipped (ESG generation disabled)");
7715 return Ok(EsgSnapshot::default());
7716 }
7717 let degradation = self.check_resources()?;
7718 if degradation >= DegradationLevel::Reduced {
7719 debug!(
7720 "Phase skipped due to resource pressure (degradation: {:?})",
7721 degradation
7722 );
7723 return Ok(EsgSnapshot::default());
7724 }
7725 info!("Phase 21: Generating ESG Data");
7726
7727 let seed = self.seed;
7728 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7729 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7730 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7731 let entity_id = self
7732 .config
7733 .companies
7734 .first()
7735 .map(|c| c.code.as_str())
7736 .unwrap_or("1000");
7737
7738 let esg_cfg = &self.config.esg;
7739 let mut snapshot = EsgSnapshot::default();
7740
7741 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7743 esg_cfg.environmental.energy.clone(),
7744 seed + 80,
7745 );
7746 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7747
7748 let facility_count = esg_cfg.environmental.energy.facility_count;
7750 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7751 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7752
7753 let mut waste_gen = datasynth_generators::WasteGenerator::new(
7755 seed + 82,
7756 esg_cfg.environmental.waste.diversion_target,
7757 facility_count,
7758 );
7759 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7760
7761 let mut emission_gen =
7763 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7764
7765 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7767 .iter()
7768 .map(|e| datasynth_generators::EnergyInput {
7769 facility_id: e.facility_id.clone(),
7770 energy_type: match e.energy_source {
7771 EnergySourceType::NaturalGas => {
7772 datasynth_generators::EnergyInputType::NaturalGas
7773 }
7774 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7775 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7776 _ => datasynth_generators::EnergyInputType::Electricity,
7777 },
7778 consumption_kwh: e.consumption_kwh,
7779 period: e.period,
7780 })
7781 .collect();
7782
7783 if !manufacturing.production_orders.is_empty() {
7785 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7786 &manufacturing.production_orders,
7787 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
7790 if !mfg_energy.is_empty() {
7791 info!(
7792 "ESG: {} energy inputs derived from {} production orders",
7793 mfg_energy.len(),
7794 manufacturing.production_orders.len(),
7795 );
7796 energy_inputs.extend(mfg_energy);
7797 }
7798 }
7799
7800 let mut emissions = Vec::new();
7801 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7802 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7803
7804 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7806 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7807 for payment in &document_flows.payments {
7808 if payment.is_vendor {
7809 *totals
7810 .entry(payment.business_partner_id.clone())
7811 .or_default() += payment.amount;
7812 }
7813 }
7814 totals
7815 };
7816 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7817 .master_data
7818 .vendors
7819 .iter()
7820 .map(|v| {
7821 let spend = vendor_payment_totals
7822 .get(&v.vendor_id)
7823 .copied()
7824 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7825 datasynth_generators::VendorSpendInput {
7826 vendor_id: v.vendor_id.clone(),
7827 category: format!("{:?}", v.vendor_type).to_lowercase(),
7828 spend,
7829 country: v.country.clone(),
7830 }
7831 })
7832 .collect();
7833 if !vendor_spend.is_empty() {
7834 emissions.extend(emission_gen.generate_scope3_purchased_goods(
7835 entity_id,
7836 &vendor_spend,
7837 start_date,
7838 end_date,
7839 ));
7840 }
7841
7842 let headcount = self.master_data.employees.len() as u32;
7844 if headcount > 0 {
7845 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7846 emissions.extend(emission_gen.generate_scope3_business_travel(
7847 entity_id,
7848 travel_spend,
7849 start_date,
7850 ));
7851 emissions
7852 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7853 }
7854
7855 snapshot.emission_count = emissions.len();
7856 snapshot.emissions = emissions;
7857 snapshot.energy = energy_records;
7858
7859 let mut workforce_gen =
7861 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7862 let total_headcount = headcount.max(100);
7863 snapshot.diversity =
7864 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7865 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7866
7867 if !self.master_data.employees.is_empty() {
7869 let hr_diversity = workforce_gen.generate_diversity_from_employees(
7870 entity_id,
7871 &self.master_data.employees,
7872 end_date,
7873 );
7874 if !hr_diversity.is_empty() {
7875 info!(
7876 "ESG: {} diversity metrics derived from {} actual employees",
7877 hr_diversity.len(),
7878 self.master_data.employees.len(),
7879 );
7880 snapshot.diversity.extend(hr_diversity);
7881 }
7882 }
7883
7884 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7885 entity_id,
7886 facility_count,
7887 start_date,
7888 end_date,
7889 );
7890
7891 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
7894 entity_id,
7895 &snapshot.safety_incidents,
7896 total_hours,
7897 start_date,
7898 );
7899 snapshot.safety_metrics = vec![safety_metric];
7900
7901 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7903 seed + 85,
7904 esg_cfg.governance.board_size,
7905 esg_cfg.governance.independence_target,
7906 );
7907 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7908
7909 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7911 esg_cfg.supply_chain_esg.clone(),
7912 seed + 86,
7913 );
7914 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7915 .master_data
7916 .vendors
7917 .iter()
7918 .map(|v| datasynth_generators::VendorInput {
7919 vendor_id: v.vendor_id.clone(),
7920 country: v.country.clone(),
7921 industry: format!("{:?}", v.vendor_type).to_lowercase(),
7922 quality_score: None,
7923 })
7924 .collect();
7925 snapshot.supplier_assessments =
7926 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7927
7928 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7930 seed + 87,
7931 esg_cfg.reporting.clone(),
7932 esg_cfg.climate_scenarios.clone(),
7933 );
7934 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7935 snapshot.disclosures = disclosure_gen.generate_disclosures(
7936 entity_id,
7937 &snapshot.materiality,
7938 start_date,
7939 end_date,
7940 );
7941 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7942 snapshot.disclosure_count = snapshot.disclosures.len();
7943
7944 if esg_cfg.anomaly_rate > 0.0 {
7946 let mut anomaly_injector =
7947 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7948 let mut labels = Vec::new();
7949 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7950 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7951 labels.extend(
7952 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7953 );
7954 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7955 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7956 snapshot.anomaly_labels = labels;
7957 }
7958
7959 stats.esg_emission_count = snapshot.emission_count;
7960 stats.esg_disclosure_count = snapshot.disclosure_count;
7961
7962 info!(
7963 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7964 snapshot.emission_count,
7965 snapshot.disclosure_count,
7966 snapshot.supplier_assessments.len()
7967 );
7968 self.check_resources_with_log("post-esg")?;
7969
7970 Ok(snapshot)
7971 }
7972
7973 fn phase_treasury_data(
7975 &mut self,
7976 document_flows: &DocumentFlowSnapshot,
7977 subledger: &SubledgerSnapshot,
7978 intercompany: &IntercompanySnapshot,
7979 stats: &mut EnhancedGenerationStatistics,
7980 ) -> SynthResult<TreasurySnapshot> {
7981 if !self.phase_config.generate_treasury {
7982 debug!("Phase 22: Skipped (treasury generation disabled)");
7983 return Ok(TreasurySnapshot::default());
7984 }
7985 let degradation = self.check_resources()?;
7986 if degradation >= DegradationLevel::Reduced {
7987 debug!(
7988 "Phase skipped due to resource pressure (degradation: {:?})",
7989 degradation
7990 );
7991 return Ok(TreasurySnapshot::default());
7992 }
7993 info!("Phase 22: Generating Treasury Data");
7994
7995 let seed = self.seed;
7996 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7997 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7998 let currency = self
7999 .config
8000 .companies
8001 .first()
8002 .map(|c| c.currency.as_str())
8003 .unwrap_or("USD");
8004 let entity_id = self
8005 .config
8006 .companies
8007 .first()
8008 .map(|c| c.code.as_str())
8009 .unwrap_or("1000");
8010
8011 let mut snapshot = TreasurySnapshot::default();
8012
8013 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8015 self.config.treasury.debt.clone(),
8016 seed + 90,
8017 );
8018 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8019
8020 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8022 self.config.treasury.hedging.clone(),
8023 seed + 91,
8024 );
8025 for debt in &snapshot.debt_instruments {
8026 if debt.rate_type == InterestRateType::Variable {
8027 let swap = hedge_gen.generate_ir_swap(
8028 currency,
8029 debt.principal,
8030 debt.origination_date,
8031 debt.maturity_date,
8032 );
8033 snapshot.hedging_instruments.push(swap);
8034 }
8035 }
8036
8037 {
8040 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8041 for payment in &document_flows.payments {
8042 if payment.currency != currency {
8043 let entry = fx_map
8044 .entry(payment.currency.clone())
8045 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8046 entry.0 += payment.amount;
8047 if payment.header.document_date > entry.1 {
8049 entry.1 = payment.header.document_date;
8050 }
8051 }
8052 }
8053 if !fx_map.is_empty() {
8054 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8055 .into_iter()
8056 .map(|(foreign_ccy, (net_amount, settlement_date))| {
8057 datasynth_generators::treasury::FxExposure {
8058 currency_pair: format!("{foreign_ccy}/{currency}"),
8059 foreign_currency: foreign_ccy,
8060 net_amount,
8061 settlement_date,
8062 description: "AP payment FX exposure".to_string(),
8063 }
8064 })
8065 .collect();
8066 let (fx_instruments, fx_relationships) =
8067 hedge_gen.generate(start_date, &fx_exposures);
8068 snapshot.hedging_instruments.extend(fx_instruments);
8069 snapshot.hedge_relationships.extend(fx_relationships);
8070 }
8071 }
8072
8073 if self.config.treasury.anomaly_rate > 0.0 {
8075 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8076 seed + 92,
8077 self.config.treasury.anomaly_rate,
8078 );
8079 let mut labels = Vec::new();
8080 labels.extend(
8081 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8082 );
8083 snapshot.treasury_anomaly_labels = labels;
8084 }
8085
8086 if self.config.treasury.cash_positioning.enabled {
8088 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8089
8090 for payment in &document_flows.payments {
8092 cash_flows.push(datasynth_generators::treasury::CashFlow {
8093 date: payment.header.document_date,
8094 account_id: format!("{entity_id}-MAIN"),
8095 amount: payment.amount,
8096 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8097 });
8098 }
8099
8100 for chain in &document_flows.o2c_chains {
8102 if let Some(ref receipt) = chain.customer_receipt {
8103 cash_flows.push(datasynth_generators::treasury::CashFlow {
8104 date: receipt.header.document_date,
8105 account_id: format!("{entity_id}-MAIN"),
8106 amount: receipt.amount,
8107 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8108 });
8109 }
8110 for receipt in &chain.remainder_receipts {
8112 cash_flows.push(datasynth_generators::treasury::CashFlow {
8113 date: receipt.header.document_date,
8114 account_id: format!("{entity_id}-MAIN"),
8115 amount: receipt.amount,
8116 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8117 });
8118 }
8119 }
8120
8121 if !cash_flows.is_empty() {
8122 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8123 self.config.treasury.cash_positioning.clone(),
8124 seed + 93,
8125 );
8126 let account_id = format!("{entity_id}-MAIN");
8127 snapshot.cash_positions = cash_gen.generate(
8128 entity_id,
8129 &account_id,
8130 currency,
8131 &cash_flows,
8132 start_date,
8133 start_date + chrono::Months::new(self.config.global.period_months),
8134 rust_decimal::Decimal::new(1_000_000, 0), );
8136 }
8137 }
8138
8139 if self.config.treasury.cash_forecasting.enabled {
8141 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8142
8143 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8145 .ar_invoices
8146 .iter()
8147 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8148 .map(|inv| {
8149 let days_past_due = if inv.due_date < end_date {
8150 (end_date - inv.due_date).num_days().max(0) as u32
8151 } else {
8152 0
8153 };
8154 datasynth_generators::treasury::ArAgingItem {
8155 expected_date: inv.due_date,
8156 amount: inv.amount_remaining,
8157 days_past_due,
8158 document_id: inv.invoice_number.clone(),
8159 }
8160 })
8161 .collect();
8162
8163 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8165 .ap_invoices
8166 .iter()
8167 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8168 .map(|inv| datasynth_generators::treasury::ApAgingItem {
8169 payment_date: inv.due_date,
8170 amount: inv.amount_remaining,
8171 document_id: inv.invoice_number.clone(),
8172 })
8173 .collect();
8174
8175 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8176 self.config.treasury.cash_forecasting.clone(),
8177 seed + 94,
8178 );
8179 let forecast = forecast_gen.generate(
8180 entity_id,
8181 currency,
8182 end_date,
8183 &ar_items,
8184 &ap_items,
8185 &[], );
8187 snapshot.cash_forecasts.push(forecast);
8188 }
8189
8190 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8192 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8193 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8194 self.config.treasury.cash_pooling.clone(),
8195 seed + 95,
8196 );
8197
8198 let account_ids: Vec<String> = snapshot
8200 .cash_positions
8201 .iter()
8202 .map(|cp| cp.bank_account_id.clone())
8203 .collect::<std::collections::HashSet<_>>()
8204 .into_iter()
8205 .collect();
8206
8207 if let Some(pool) =
8208 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8209 {
8210 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8212 for cp in &snapshot.cash_positions {
8213 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8214 }
8215
8216 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8217 latest_balances
8218 .into_iter()
8219 .filter(|(id, _)| pool.participant_accounts.contains(id))
8220 .map(
8221 |(id, balance)| datasynth_generators::treasury::AccountBalance {
8222 account_id: id,
8223 balance,
8224 },
8225 )
8226 .collect();
8227
8228 let sweeps =
8229 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8230 snapshot.cash_pool_sweeps = sweeps;
8231 snapshot.cash_pools.push(pool);
8232 }
8233 }
8234
8235 if self.config.treasury.bank_guarantees.enabled {
8237 let vendor_names: Vec<String> = self
8238 .master_data
8239 .vendors
8240 .iter()
8241 .map(|v| v.name.clone())
8242 .collect();
8243 if !vendor_names.is_empty() {
8244 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8245 self.config.treasury.bank_guarantees.clone(),
8246 seed + 96,
8247 );
8248 snapshot.bank_guarantees =
8249 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8250 }
8251 }
8252
8253 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8255 let entity_ids: Vec<String> = self
8256 .config
8257 .companies
8258 .iter()
8259 .map(|c| c.code.clone())
8260 .collect();
8261 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8262 .matched_pairs
8263 .iter()
8264 .map(|mp| {
8265 (
8266 mp.seller_company.clone(),
8267 mp.buyer_company.clone(),
8268 mp.amount,
8269 )
8270 })
8271 .collect();
8272 if entity_ids.len() >= 2 {
8273 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8274 self.config.treasury.netting.clone(),
8275 seed + 97,
8276 );
8277 snapshot.netting_runs = netting_gen.generate(
8278 &entity_ids,
8279 currency,
8280 start_date,
8281 self.config.global.period_months,
8282 &ic_amounts,
8283 );
8284 }
8285 }
8286
8287 {
8289 use datasynth_generators::treasury::TreasuryAccounting;
8290
8291 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8292 let mut treasury_jes = Vec::new();
8293
8294 if !snapshot.debt_instruments.is_empty() {
8296 let debt_jes =
8297 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8298 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8299 treasury_jes.extend(debt_jes);
8300 }
8301
8302 if !snapshot.hedging_instruments.is_empty() {
8304 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8305 &snapshot.hedging_instruments,
8306 &snapshot.hedge_relationships,
8307 end_date,
8308 entity_id,
8309 );
8310 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8311 treasury_jes.extend(hedge_jes);
8312 }
8313
8314 if !snapshot.cash_pool_sweeps.is_empty() {
8316 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8317 &snapshot.cash_pool_sweeps,
8318 entity_id,
8319 );
8320 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8321 treasury_jes.extend(sweep_jes);
8322 }
8323
8324 if !treasury_jes.is_empty() {
8325 debug!("Total treasury journal entries: {}", treasury_jes.len());
8326 }
8327 snapshot.journal_entries = treasury_jes;
8328 }
8329
8330 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8331 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8332 stats.cash_position_count = snapshot.cash_positions.len();
8333 stats.cash_forecast_count = snapshot.cash_forecasts.len();
8334 stats.cash_pool_count = snapshot.cash_pools.len();
8335
8336 info!(
8337 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8338 snapshot.debt_instruments.len(),
8339 snapshot.hedging_instruments.len(),
8340 snapshot.cash_positions.len(),
8341 snapshot.cash_forecasts.len(),
8342 snapshot.cash_pools.len(),
8343 snapshot.bank_guarantees.len(),
8344 snapshot.netting_runs.len(),
8345 snapshot.journal_entries.len(),
8346 );
8347 self.check_resources_with_log("post-treasury")?;
8348
8349 Ok(snapshot)
8350 }
8351
8352 fn phase_project_accounting(
8354 &mut self,
8355 document_flows: &DocumentFlowSnapshot,
8356 hr: &HrSnapshot,
8357 stats: &mut EnhancedGenerationStatistics,
8358 ) -> SynthResult<ProjectAccountingSnapshot> {
8359 if !self.phase_config.generate_project_accounting {
8360 debug!("Phase 23: Skipped (project accounting disabled)");
8361 return Ok(ProjectAccountingSnapshot::default());
8362 }
8363 let degradation = self.check_resources()?;
8364 if degradation >= DegradationLevel::Reduced {
8365 debug!(
8366 "Phase skipped due to resource pressure (degradation: {:?})",
8367 degradation
8368 );
8369 return Ok(ProjectAccountingSnapshot::default());
8370 }
8371 info!("Phase 23: Generating Project Accounting Data");
8372
8373 let seed = self.seed;
8374 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8375 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8376 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8377 let company_code = self
8378 .config
8379 .companies
8380 .first()
8381 .map(|c| c.code.as_str())
8382 .unwrap_or("1000");
8383
8384 let mut snapshot = ProjectAccountingSnapshot::default();
8385
8386 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8388 self.config.project_accounting.clone(),
8389 seed + 95,
8390 );
8391 let pool = project_gen.generate(company_code, start_date, end_date);
8392 snapshot.projects = pool.projects.clone();
8393
8394 {
8396 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8397 Vec::new();
8398
8399 for te in &hr.time_entries {
8401 let total_hours = te.hours_regular + te.hours_overtime;
8402 if total_hours > 0.0 {
8403 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8404 id: te.entry_id.clone(),
8405 entity_id: company_code.to_string(),
8406 date: te.date,
8407 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8408 .unwrap_or(rust_decimal::Decimal::ZERO),
8409 source_type: CostSourceType::TimeEntry,
8410 hours: Some(
8411 rust_decimal::Decimal::from_f64_retain(total_hours)
8412 .unwrap_or(rust_decimal::Decimal::ZERO),
8413 ),
8414 });
8415 }
8416 }
8417
8418 for er in &hr.expense_reports {
8420 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8421 id: er.report_id.clone(),
8422 entity_id: company_code.to_string(),
8423 date: er.submission_date,
8424 amount: er.total_amount,
8425 source_type: CostSourceType::ExpenseReport,
8426 hours: None,
8427 });
8428 }
8429
8430 for po in &document_flows.purchase_orders {
8432 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8433 id: po.header.document_id.clone(),
8434 entity_id: company_code.to_string(),
8435 date: po.header.document_date,
8436 amount: po.total_net_amount,
8437 source_type: CostSourceType::PurchaseOrder,
8438 hours: None,
8439 });
8440 }
8441
8442 for vi in &document_flows.vendor_invoices {
8444 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8445 id: vi.header.document_id.clone(),
8446 entity_id: company_code.to_string(),
8447 date: vi.header.document_date,
8448 amount: vi.payable_amount,
8449 source_type: CostSourceType::VendorInvoice,
8450 hours: None,
8451 });
8452 }
8453
8454 if !source_docs.is_empty() && !pool.projects.is_empty() {
8455 let mut cost_gen =
8456 datasynth_generators::project_accounting::ProjectCostGenerator::new(
8457 self.config.project_accounting.cost_allocation.clone(),
8458 seed + 99,
8459 );
8460 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8461 }
8462 }
8463
8464 if self.config.project_accounting.change_orders.enabled {
8466 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8467 self.config.project_accounting.change_orders.clone(),
8468 seed + 96,
8469 );
8470 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8471 }
8472
8473 if self.config.project_accounting.milestones.enabled {
8475 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8476 self.config.project_accounting.milestones.clone(),
8477 seed + 97,
8478 );
8479 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8480 }
8481
8482 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8484 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8485 self.config.project_accounting.earned_value.clone(),
8486 seed + 98,
8487 );
8488 snapshot.earned_value_metrics =
8489 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8490 }
8491
8492 if self.config.project_accounting.revenue_recognition.enabled
8494 && !snapshot.projects.is_empty()
8495 && !snapshot.cost_lines.is_empty()
8496 {
8497 use datasynth_generators::project_accounting::RevenueGenerator;
8498 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8499 let avg_contract_value =
8500 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8501 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8502
8503 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8506 snapshot
8507 .projects
8508 .iter()
8509 .filter(|p| {
8510 matches!(
8511 p.project_type,
8512 datasynth_core::models::ProjectType::Customer
8513 )
8514 })
8515 .map(|p| {
8516 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8517 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8518 } else {
8520 avg_contract_value
8521 };
8522 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8524 })
8525 .collect();
8526
8527 if !contract_values.is_empty() {
8528 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8529 snapshot.revenue_records = rev_gen.generate(
8530 &snapshot.projects,
8531 &snapshot.cost_lines,
8532 &contract_values,
8533 start_date,
8534 end_date,
8535 );
8536 debug!(
8537 "Generated {} revenue recognition records for {} customer projects",
8538 snapshot.revenue_records.len(),
8539 contract_values.len()
8540 );
8541 }
8542 }
8543
8544 stats.project_count = snapshot.projects.len();
8545 stats.project_change_order_count = snapshot.change_orders.len();
8546 stats.project_cost_line_count = snapshot.cost_lines.len();
8547
8548 info!(
8549 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8550 snapshot.projects.len(),
8551 snapshot.change_orders.len(),
8552 snapshot.milestones.len(),
8553 snapshot.earned_value_metrics.len()
8554 );
8555 self.check_resources_with_log("post-project-accounting")?;
8556
8557 Ok(snapshot)
8558 }
8559
8560 fn phase_evolution_events(
8562 &mut self,
8563 stats: &mut EnhancedGenerationStatistics,
8564 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8565 if !self.phase_config.generate_evolution_events {
8566 debug!("Phase 24: Skipped (evolution events disabled)");
8567 return Ok((Vec::new(), Vec::new()));
8568 }
8569 info!("Phase 24: Generating Process Evolution + Organizational Events");
8570
8571 let seed = self.seed;
8572 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8573 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8574 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8575
8576 let mut proc_gen =
8578 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8579 seed + 100,
8580 );
8581 let process_events = proc_gen.generate_events(start_date, end_date);
8582
8583 let company_codes: Vec<String> = self
8585 .config
8586 .companies
8587 .iter()
8588 .map(|c| c.code.clone())
8589 .collect();
8590 let mut org_gen =
8591 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8592 seed + 101,
8593 );
8594 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8595
8596 stats.process_evolution_event_count = process_events.len();
8597 stats.organizational_event_count = org_events.len();
8598
8599 info!(
8600 "Evolution events generated: {} process evolution, {} organizational",
8601 process_events.len(),
8602 org_events.len()
8603 );
8604 self.check_resources_with_log("post-evolution-events")?;
8605
8606 Ok((process_events, org_events))
8607 }
8608
8609 fn phase_disruption_events(
8612 &self,
8613 stats: &mut EnhancedGenerationStatistics,
8614 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8615 if !self.config.organizational_events.enabled {
8616 debug!("Phase 24b: Skipped (organizational events disabled)");
8617 return Ok(Vec::new());
8618 }
8619 info!("Phase 24b: Generating Disruption Events");
8620
8621 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8622 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8623 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8624
8625 let company_codes: Vec<String> = self
8626 .config
8627 .companies
8628 .iter()
8629 .map(|c| c.code.clone())
8630 .collect();
8631
8632 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8633 let events = gen.generate(start_date, end_date, &company_codes);
8634
8635 stats.disruption_event_count = events.len();
8636 info!("Disruption events generated: {} events", events.len());
8637 self.check_resources_with_log("post-disruption-events")?;
8638
8639 Ok(events)
8640 }
8641
8642 fn phase_counterfactuals(
8649 &self,
8650 journal_entries: &[JournalEntry],
8651 stats: &mut EnhancedGenerationStatistics,
8652 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8653 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8654 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8655 return Ok(Vec::new());
8656 }
8657 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8658
8659 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8660
8661 let mut gen = CounterfactualGenerator::new(self.seed + 110);
8662
8663 let specs = [
8665 CounterfactualSpec::ScaleAmount { factor: 2.5 },
8666 CounterfactualSpec::ShiftDate { days: -14 },
8667 CounterfactualSpec::SelfApprove,
8668 CounterfactualSpec::SplitTransaction { split_count: 3 },
8669 ];
8670
8671 let pairs: Vec<_> = journal_entries
8672 .iter()
8673 .enumerate()
8674 .map(|(i, je)| {
8675 let spec = &specs[i % specs.len()];
8676 gen.generate(je, spec)
8677 })
8678 .collect();
8679
8680 stats.counterfactual_pair_count = pairs.len();
8681 info!(
8682 "Counterfactual pairs generated: {} pairs from {} journal entries",
8683 pairs.len(),
8684 journal_entries.len()
8685 );
8686 self.check_resources_with_log("post-counterfactuals")?;
8687
8688 Ok(pairs)
8689 }
8690
8691 fn phase_red_flags(
8698 &self,
8699 anomaly_labels: &AnomalyLabels,
8700 document_flows: &DocumentFlowSnapshot,
8701 stats: &mut EnhancedGenerationStatistics,
8702 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8703 if !self.config.fraud.enabled {
8704 debug!("Phase 26: Skipped (fraud generation disabled)");
8705 return Ok(Vec::new());
8706 }
8707 info!("Phase 26: Generating Fraud Red-Flag Indicators");
8708
8709 use datasynth_generators::fraud::RedFlagGenerator;
8710
8711 let generator = RedFlagGenerator::new();
8712 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8713
8714 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8716 .labels
8717 .iter()
8718 .filter(|label| label.anomaly_type.is_intentional())
8719 .map(|label| label.document_id.as_str())
8720 .collect();
8721
8722 let mut flags = Vec::new();
8723
8724 for chain in &document_flows.p2p_chains {
8726 let doc_id = &chain.purchase_order.header.document_id;
8727 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8728 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8729 }
8730
8731 for chain in &document_flows.o2c_chains {
8733 let doc_id = &chain.sales_order.header.document_id;
8734 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8735 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8736 }
8737
8738 stats.red_flag_count = flags.len();
8739 info!(
8740 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8741 flags.len(),
8742 document_flows.p2p_chains.len(),
8743 document_flows.o2c_chains.len(),
8744 fraud_doc_ids.len()
8745 );
8746 self.check_resources_with_log("post-red-flags")?;
8747
8748 Ok(flags)
8749 }
8750
8751 fn phase_collusion_rings(
8757 &mut self,
8758 stats: &mut EnhancedGenerationStatistics,
8759 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8760 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8761 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8762 return Ok(Vec::new());
8763 }
8764 info!("Phase 26b: Generating Collusion Rings");
8765
8766 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8767 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8768 let months = self.config.global.period_months;
8769
8770 let employee_ids: Vec<String> = self
8771 .master_data
8772 .employees
8773 .iter()
8774 .map(|e| e.employee_id.clone())
8775 .collect();
8776 let vendor_ids: Vec<String> = self
8777 .master_data
8778 .vendors
8779 .iter()
8780 .map(|v| v.vendor_id.clone())
8781 .collect();
8782
8783 let mut generator =
8784 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8785 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8786
8787 stats.collusion_ring_count = rings.len();
8788 info!(
8789 "Collusion rings generated: {} rings, total members: {}",
8790 rings.len(),
8791 rings
8792 .iter()
8793 .map(datasynth_generators::fraud::CollusionRing::size)
8794 .sum::<usize>()
8795 );
8796 self.check_resources_with_log("post-collusion-rings")?;
8797
8798 Ok(rings)
8799 }
8800
8801 fn phase_temporal_attributes(
8806 &mut self,
8807 stats: &mut EnhancedGenerationStatistics,
8808 ) -> SynthResult<
8809 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8810 > {
8811 if !self.config.temporal_attributes.enabled {
8812 debug!("Phase 27: Skipped (temporal attributes disabled)");
8813 return Ok(Vec::new());
8814 }
8815 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8816
8817 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8818 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8819
8820 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8824 || self.config.temporal_attributes.enabled;
8825 let temporal_config = {
8826 let ta = &self.config.temporal_attributes;
8827 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8828 .enabled(ta.enabled)
8829 .closed_probability(ta.valid_time.closed_probability)
8830 .avg_validity_days(ta.valid_time.avg_validity_days)
8831 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8832 .with_version_chains(if generate_version_chains {
8833 ta.avg_versions_per_entity
8834 } else {
8835 1.0
8836 })
8837 .build()
8838 };
8839 let temporal_config = if self
8841 .config
8842 .temporal_attributes
8843 .transaction_time
8844 .allow_backdating
8845 {
8846 let mut c = temporal_config;
8847 c.transaction_time.allow_backdating = true;
8848 c.transaction_time.backdating_probability = self
8849 .config
8850 .temporal_attributes
8851 .transaction_time
8852 .backdating_probability;
8853 c.transaction_time.max_backdate_days = self
8854 .config
8855 .temporal_attributes
8856 .transaction_time
8857 .max_backdate_days;
8858 c
8859 } else {
8860 temporal_config
8861 };
8862 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8863 temporal_config,
8864 self.seed + 130,
8865 start_date,
8866 );
8867
8868 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8869 self.seed + 130,
8870 datasynth_core::GeneratorType::Vendor,
8871 );
8872
8873 let chains: Vec<_> = self
8874 .master_data
8875 .vendors
8876 .iter()
8877 .map(|vendor| {
8878 let id = uuid_factory.next();
8879 gen.generate_version_chain(vendor.clone(), id)
8880 })
8881 .collect();
8882
8883 stats.temporal_version_chain_count = chains.len();
8884 info!("Temporal version chains generated: {} chains", chains.len());
8885 self.check_resources_with_log("post-temporal-attributes")?;
8886
8887 Ok(chains)
8888 }
8889
8890 fn phase_entity_relationships(
8900 &self,
8901 journal_entries: &[JournalEntry],
8902 document_flows: &DocumentFlowSnapshot,
8903 stats: &mut EnhancedGenerationStatistics,
8904 ) -> SynthResult<(
8905 Option<datasynth_core::models::EntityGraph>,
8906 Vec<datasynth_core::models::CrossProcessLink>,
8907 )> {
8908 use datasynth_generators::relationships::{
8909 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8910 TransactionSummary,
8911 };
8912
8913 let rs_enabled = self.config.relationship_strength.enabled;
8914 let cpl_enabled = self.config.cross_process_links.enabled
8915 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8916
8917 if !rs_enabled && !cpl_enabled {
8918 debug!(
8919 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8920 );
8921 return Ok((None, Vec::new()));
8922 }
8923
8924 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8925
8926 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8927 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8928
8929 let company_code = self
8930 .config
8931 .companies
8932 .first()
8933 .map(|c| c.code.as_str())
8934 .unwrap_or("1000");
8935
8936 let gen_config = EntityGraphConfig {
8938 enabled: rs_enabled,
8939 cross_process: datasynth_generators::relationships::CrossProcessConfig {
8940 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8941 enable_return_flows: false,
8942 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8943 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8944 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8946 1.0
8947 } else {
8948 0.30
8949 },
8950 ..Default::default()
8951 },
8952 strength_config: datasynth_generators::relationships::StrengthConfig {
8953 transaction_volume_weight: self
8954 .config
8955 .relationship_strength
8956 .calculation
8957 .transaction_volume_weight,
8958 transaction_count_weight: self
8959 .config
8960 .relationship_strength
8961 .calculation
8962 .transaction_count_weight,
8963 duration_weight: self
8964 .config
8965 .relationship_strength
8966 .calculation
8967 .relationship_duration_weight,
8968 recency_weight: self.config.relationship_strength.calculation.recency_weight,
8969 mutual_connections_weight: self
8970 .config
8971 .relationship_strength
8972 .calculation
8973 .mutual_connections_weight,
8974 recency_half_life_days: self
8975 .config
8976 .relationship_strength
8977 .calculation
8978 .recency_half_life_days,
8979 },
8980 ..Default::default()
8981 };
8982
8983 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8984
8985 let entity_graph = if rs_enabled {
8987 let vendor_summaries: Vec<EntitySummary> = self
8989 .master_data
8990 .vendors
8991 .iter()
8992 .map(|v| {
8993 EntitySummary::new(
8994 &v.vendor_id,
8995 &v.name,
8996 datasynth_core::models::GraphEntityType::Vendor,
8997 start_date,
8998 )
8999 })
9000 .collect();
9001
9002 let customer_summaries: Vec<EntitySummary> = self
9003 .master_data
9004 .customers
9005 .iter()
9006 .map(|c| {
9007 EntitySummary::new(
9008 &c.customer_id,
9009 &c.name,
9010 datasynth_core::models::GraphEntityType::Customer,
9011 start_date,
9012 )
9013 })
9014 .collect();
9015
9016 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9021 std::collections::HashMap::new();
9022
9023 for je in journal_entries {
9024 let cc = je.header.company_code.clone();
9025 let posting_date = je.header.posting_date;
9026 for line in &je.lines {
9027 if let Some(ref tp) = line.trading_partner {
9028 let amount = if line.debit_amount > line.credit_amount {
9029 line.debit_amount
9030 } else {
9031 line.credit_amount
9032 };
9033 let entry = txn_summaries
9034 .entry((cc.clone(), tp.clone()))
9035 .or_insert_with(|| TransactionSummary {
9036 total_volume: rust_decimal::Decimal::ZERO,
9037 transaction_count: 0,
9038 first_transaction_date: posting_date,
9039 last_transaction_date: posting_date,
9040 related_entities: std::collections::HashSet::new(),
9041 });
9042 entry.total_volume += amount;
9043 entry.transaction_count += 1;
9044 if posting_date < entry.first_transaction_date {
9045 entry.first_transaction_date = posting_date;
9046 }
9047 if posting_date > entry.last_transaction_date {
9048 entry.last_transaction_date = posting_date;
9049 }
9050 entry.related_entities.insert(cc.clone());
9051 }
9052 }
9053 }
9054
9055 for chain in &document_flows.p2p_chains {
9058 let cc = chain.purchase_order.header.company_code.clone();
9059 let vendor_id = chain.purchase_order.vendor_id.clone();
9060 let po_date = chain.purchase_order.header.document_date;
9061 let amount = chain.purchase_order.total_net_amount;
9062
9063 let entry = txn_summaries
9064 .entry((cc.clone(), vendor_id))
9065 .or_insert_with(|| TransactionSummary {
9066 total_volume: rust_decimal::Decimal::ZERO,
9067 transaction_count: 0,
9068 first_transaction_date: po_date,
9069 last_transaction_date: po_date,
9070 related_entities: std::collections::HashSet::new(),
9071 });
9072 entry.total_volume += amount;
9073 entry.transaction_count += 1;
9074 if po_date < entry.first_transaction_date {
9075 entry.first_transaction_date = po_date;
9076 }
9077 if po_date > entry.last_transaction_date {
9078 entry.last_transaction_date = po_date;
9079 }
9080 entry.related_entities.insert(cc);
9081 }
9082
9083 for chain in &document_flows.o2c_chains {
9085 let cc = chain.sales_order.header.company_code.clone();
9086 let customer_id = chain.sales_order.customer_id.clone();
9087 let so_date = chain.sales_order.header.document_date;
9088 let amount = chain.sales_order.total_net_amount;
9089
9090 let entry = txn_summaries
9091 .entry((cc.clone(), customer_id))
9092 .or_insert_with(|| TransactionSummary {
9093 total_volume: rust_decimal::Decimal::ZERO,
9094 transaction_count: 0,
9095 first_transaction_date: so_date,
9096 last_transaction_date: so_date,
9097 related_entities: std::collections::HashSet::new(),
9098 });
9099 entry.total_volume += amount;
9100 entry.transaction_count += 1;
9101 if so_date < entry.first_transaction_date {
9102 entry.first_transaction_date = so_date;
9103 }
9104 if so_date > entry.last_transaction_date {
9105 entry.last_transaction_date = so_date;
9106 }
9107 entry.related_entities.insert(cc);
9108 }
9109
9110 let as_of_date = journal_entries
9111 .last()
9112 .map(|je| je.header.posting_date)
9113 .unwrap_or(start_date);
9114
9115 let graph = gen.generate_entity_graph(
9116 company_code,
9117 as_of_date,
9118 &vendor_summaries,
9119 &customer_summaries,
9120 &txn_summaries,
9121 );
9122
9123 info!(
9124 "Entity relationship graph: {} nodes, {} edges",
9125 graph.nodes.len(),
9126 graph.edges.len()
9127 );
9128 stats.entity_relationship_node_count = graph.nodes.len();
9129 stats.entity_relationship_edge_count = graph.edges.len();
9130 Some(graph)
9131 } else {
9132 None
9133 };
9134
9135 let cross_process_links = if cpl_enabled {
9137 let gr_refs: Vec<GoodsReceiptRef> = document_flows
9139 .p2p_chains
9140 .iter()
9141 .flat_map(|chain| {
9142 let vendor_id = chain.purchase_order.vendor_id.clone();
9143 let cc = chain.purchase_order.header.company_code.clone();
9144 chain.goods_receipts.iter().flat_map(move |gr| {
9145 gr.items.iter().filter_map({
9146 let doc_id = gr.header.document_id.clone();
9147 let v_id = vendor_id.clone();
9148 let company = cc.clone();
9149 let receipt_date = gr.header.document_date;
9150 move |item| {
9151 item.base
9152 .material_id
9153 .as_ref()
9154 .map(|mat_id| GoodsReceiptRef {
9155 document_id: doc_id.clone(),
9156 material_id: mat_id.clone(),
9157 quantity: item.base.quantity,
9158 receipt_date,
9159 vendor_id: v_id.clone(),
9160 company_code: company.clone(),
9161 })
9162 }
9163 })
9164 })
9165 })
9166 .collect();
9167
9168 let del_refs: Vec<DeliveryRef> = document_flows
9170 .o2c_chains
9171 .iter()
9172 .flat_map(|chain| {
9173 let customer_id = chain.sales_order.customer_id.clone();
9174 let cc = chain.sales_order.header.company_code.clone();
9175 chain.deliveries.iter().flat_map(move |del| {
9176 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9177 del.items.iter().filter_map({
9178 let doc_id = del.header.document_id.clone();
9179 let c_id = customer_id.clone();
9180 let company = cc.clone();
9181 move |item| {
9182 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9183 document_id: doc_id.clone(),
9184 material_id: mat_id.clone(),
9185 quantity: item.base.quantity,
9186 delivery_date,
9187 customer_id: c_id.clone(),
9188 company_code: company.clone(),
9189 })
9190 }
9191 })
9192 })
9193 })
9194 .collect();
9195
9196 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9197 info!("Cross-process links generated: {} links", links.len());
9198 stats.cross_process_link_count = links.len();
9199 links
9200 } else {
9201 Vec::new()
9202 };
9203
9204 self.check_resources_with_log("post-entity-relationships")?;
9205 Ok((entity_graph, cross_process_links))
9206 }
9207
9208 fn phase_industry_data(
9210 &self,
9211 stats: &mut EnhancedGenerationStatistics,
9212 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9213 if !self.config.industry_specific.enabled {
9214 return None;
9215 }
9216 info!("Phase 29: Generating industry-specific data");
9217 let output = datasynth_generators::industry::factory::generate_industry_output(
9218 self.config.global.industry,
9219 );
9220 stats.industry_gl_account_count = output.gl_accounts.len();
9221 info!(
9222 "Industry data generated: {} GL accounts for {:?}",
9223 output.gl_accounts.len(),
9224 self.config.global.industry
9225 );
9226 Some(output)
9227 }
9228
9229 fn phase_opening_balances(
9231 &mut self,
9232 coa: &Arc<ChartOfAccounts>,
9233 stats: &mut EnhancedGenerationStatistics,
9234 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9235 if !self.config.balance.generate_opening_balances {
9236 debug!("Phase 3b: Skipped (opening balance generation disabled)");
9237 return Ok(Vec::new());
9238 }
9239 info!("Phase 3b: Generating Opening Balances");
9240
9241 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9242 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9243 let fiscal_year = start_date.year();
9244
9245 let industry = match self.config.global.industry {
9246 IndustrySector::Manufacturing => IndustryType::Manufacturing,
9247 IndustrySector::Retail => IndustryType::Retail,
9248 IndustrySector::FinancialServices => IndustryType::Financial,
9249 IndustrySector::Healthcare => IndustryType::Healthcare,
9250 IndustrySector::Technology => IndustryType::Technology,
9251 _ => IndustryType::Manufacturing,
9252 };
9253
9254 let config = datasynth_generators::OpeningBalanceConfig {
9255 industry,
9256 ..Default::default()
9257 };
9258 let mut gen =
9259 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9260
9261 let mut results = Vec::new();
9262 for company in &self.config.companies {
9263 let spec = OpeningBalanceSpec::new(
9264 company.code.clone(),
9265 start_date,
9266 fiscal_year,
9267 company.currency.clone(),
9268 rust_decimal::Decimal::new(10_000_000, 0),
9269 industry,
9270 );
9271 let ob = gen.generate(&spec, coa, start_date, &company.code);
9272 results.push(ob);
9273 }
9274
9275 stats.opening_balance_count = results.len();
9276 info!("Opening balances generated: {} companies", results.len());
9277 self.check_resources_with_log("post-opening-balances")?;
9278
9279 Ok(results)
9280 }
9281
9282 fn phase_subledger_reconciliation(
9284 &mut self,
9285 subledger: &SubledgerSnapshot,
9286 entries: &[JournalEntry],
9287 stats: &mut EnhancedGenerationStatistics,
9288 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9289 if !self.config.balance.reconcile_subledgers {
9290 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9291 return Ok(Vec::new());
9292 }
9293 info!("Phase 9b: Reconciling GL to subledger balances");
9294
9295 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9296 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9297 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9298
9299 let tracker_config = BalanceTrackerConfig {
9301 validate_on_each_entry: false,
9302 track_history: false,
9303 fail_on_validation_error: false,
9304 ..Default::default()
9305 };
9306 let recon_currency = self
9307 .config
9308 .companies
9309 .first()
9310 .map(|c| c.currency.clone())
9311 .unwrap_or_else(|| "USD".to_string());
9312 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9313 let validation_errors = tracker.apply_entries(entries);
9314 if !validation_errors.is_empty() {
9315 warn!(
9316 error_count = validation_errors.len(),
9317 "Balance tracker encountered validation errors during subledger reconciliation"
9318 );
9319 for err in &validation_errors {
9320 debug!("Balance validation error: {:?}", err);
9321 }
9322 }
9323
9324 let mut engine = datasynth_generators::ReconciliationEngine::new(
9325 datasynth_generators::ReconciliationConfig::default(),
9326 );
9327
9328 let mut results = Vec::new();
9329 let company_code = self
9330 .config
9331 .companies
9332 .first()
9333 .map(|c| c.code.as_str())
9334 .unwrap_or("1000");
9335
9336 if !subledger.ar_invoices.is_empty() {
9338 let gl_balance = tracker
9339 .get_account_balance(
9340 company_code,
9341 datasynth_core::accounts::control_accounts::AR_CONTROL,
9342 )
9343 .map(|b| b.closing_balance)
9344 .unwrap_or_default();
9345 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9346 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9347 }
9348
9349 if !subledger.ap_invoices.is_empty() {
9351 let gl_balance = tracker
9352 .get_account_balance(
9353 company_code,
9354 datasynth_core::accounts::control_accounts::AP_CONTROL,
9355 )
9356 .map(|b| b.closing_balance)
9357 .unwrap_or_default();
9358 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9359 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9360 }
9361
9362 if !subledger.fa_records.is_empty() {
9364 let gl_asset_balance = tracker
9365 .get_account_balance(
9366 company_code,
9367 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9368 )
9369 .map(|b| b.closing_balance)
9370 .unwrap_or_default();
9371 let gl_accum_depr_balance = tracker
9372 .get_account_balance(
9373 company_code,
9374 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9375 )
9376 .map(|b| b.closing_balance)
9377 .unwrap_or_default();
9378 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9379 subledger.fa_records.iter().collect();
9380 let (asset_recon, depr_recon) = engine.reconcile_fa(
9381 company_code,
9382 end_date,
9383 gl_asset_balance,
9384 gl_accum_depr_balance,
9385 &fa_refs,
9386 );
9387 results.push(asset_recon);
9388 results.push(depr_recon);
9389 }
9390
9391 if !subledger.inventory_positions.is_empty() {
9393 let gl_balance = tracker
9394 .get_account_balance(
9395 company_code,
9396 datasynth_core::accounts::control_accounts::INVENTORY,
9397 )
9398 .map(|b| b.closing_balance)
9399 .unwrap_or_default();
9400 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9401 subledger.inventory_positions.iter().collect();
9402 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9403 }
9404
9405 stats.subledger_reconciliation_count = results.len();
9406 let passed = results.iter().filter(|r| r.is_balanced()).count();
9407 let failed = results.len() - passed;
9408 info!(
9409 "Subledger reconciliation: {} checks, {} passed, {} failed",
9410 results.len(),
9411 passed,
9412 failed
9413 );
9414 self.check_resources_with_log("post-subledger-reconciliation")?;
9415
9416 Ok(results)
9417 }
9418
9419 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9421 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9422
9423 let coa_framework = self.resolve_coa_framework();
9424
9425 let mut gen = ChartOfAccountsGenerator::new(
9426 self.config.chart_of_accounts.complexity,
9427 self.config.global.industry,
9428 self.seed,
9429 )
9430 .with_coa_framework(coa_framework);
9431
9432 let coa = Arc::new(gen.generate());
9433 self.coa = Some(Arc::clone(&coa));
9434
9435 if let Some(pb) = pb {
9436 pb.finish_with_message("Chart of Accounts complete");
9437 }
9438
9439 Ok(coa)
9440 }
9441
9442 fn generate_master_data(&mut self) -> SynthResult<()> {
9444 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9445 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9446 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9447
9448 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
9450
9451 let pack = self.primary_pack().clone();
9453
9454 let vendors_per_company = self.phase_config.vendors_per_company;
9456 let customers_per_company = self.phase_config.customers_per_company;
9457 let materials_per_company = self.phase_config.materials_per_company;
9458 let assets_per_company = self.phase_config.assets_per_company;
9459 let coa_framework = self.resolve_coa_framework();
9460
9461 let per_company_results: Vec<_> = self
9464 .config
9465 .companies
9466 .par_iter()
9467 .enumerate()
9468 .map(|(i, company)| {
9469 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9470 let pack = pack.clone();
9471
9472 let mut vendor_gen = VendorGenerator::new(company_seed);
9474 vendor_gen.set_country_pack(pack.clone());
9475 vendor_gen.set_coa_framework(coa_framework);
9476 vendor_gen.set_counter_offset(i * vendors_per_company);
9477 if self.config.vendor_network.enabled {
9479 let vn = &self.config.vendor_network;
9480 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9481 enabled: true,
9482 depth: vn.depth,
9483 tier1_count: datasynth_generators::TierCountConfig::new(
9484 vn.tier1.min,
9485 vn.tier1.max,
9486 ),
9487 tier2_per_parent: datasynth_generators::TierCountConfig::new(
9488 vn.tier2_per_parent.min,
9489 vn.tier2_per_parent.max,
9490 ),
9491 tier3_per_parent: datasynth_generators::TierCountConfig::new(
9492 vn.tier3_per_parent.min,
9493 vn.tier3_per_parent.max,
9494 ),
9495 cluster_distribution: datasynth_generators::ClusterDistribution {
9496 reliable_strategic: vn.clusters.reliable_strategic,
9497 standard_operational: vn.clusters.standard_operational,
9498 transactional: vn.clusters.transactional,
9499 problematic: vn.clusters.problematic,
9500 },
9501 concentration_limits: datasynth_generators::ConcentrationLimits {
9502 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9503 max_top5: vn.dependencies.top_5_concentration,
9504 },
9505 ..datasynth_generators::VendorNetworkConfig::default()
9506 });
9507 }
9508 let vendor_pool =
9509 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9510
9511 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9513 customer_gen.set_country_pack(pack.clone());
9514 customer_gen.set_coa_framework(coa_framework);
9515 customer_gen.set_counter_offset(i * customers_per_company);
9516 if self.config.customer_segmentation.enabled {
9518 let cs = &self.config.customer_segmentation;
9519 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9520 enabled: true,
9521 segment_distribution: datasynth_generators::SegmentDistribution {
9522 enterprise: cs.value_segments.enterprise.customer_share,
9523 mid_market: cs.value_segments.mid_market.customer_share,
9524 smb: cs.value_segments.smb.customer_share,
9525 consumer: cs.value_segments.consumer.customer_share,
9526 },
9527 referral_config: datasynth_generators::ReferralConfig {
9528 enabled: cs.networks.referrals.enabled,
9529 referral_rate: cs.networks.referrals.referral_rate,
9530 ..Default::default()
9531 },
9532 hierarchy_config: datasynth_generators::HierarchyConfig {
9533 enabled: cs.networks.corporate_hierarchies.enabled,
9534 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9535 ..Default::default()
9536 },
9537 ..Default::default()
9538 };
9539 customer_gen.set_segmentation_config(seg_cfg);
9540 }
9541 let customer_pool = customer_gen.generate_customer_pool(
9542 customers_per_company,
9543 &company.code,
9544 start_date,
9545 );
9546
9547 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9549 material_gen.set_country_pack(pack.clone());
9550 material_gen.set_counter_offset(i * materials_per_company);
9551 let material_pool = material_gen.generate_material_pool(
9552 materials_per_company,
9553 &company.code,
9554 start_date,
9555 );
9556
9557 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9559 let asset_pool = asset_gen.generate_asset_pool(
9560 assets_per_company,
9561 &company.code,
9562 (start_date, end_date),
9563 );
9564
9565 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9567 employee_gen.set_country_pack(pack);
9568 let employee_pool =
9569 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9570
9571 let employee_change_history =
9573 employee_gen.generate_all_change_history(&employee_pool, end_date);
9574
9575 let employee_ids: Vec<String> = employee_pool
9577 .employees
9578 .iter()
9579 .map(|e| e.employee_id.clone())
9580 .collect();
9581 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9582 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9583
9584 (
9585 vendor_pool.vendors,
9586 customer_pool.customers,
9587 material_pool.materials,
9588 asset_pool.assets,
9589 employee_pool.employees,
9590 employee_change_history,
9591 cost_centers,
9592 )
9593 })
9594 .collect();
9595
9596 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9598 per_company_results
9599 {
9600 self.master_data.vendors.extend(vendors);
9601 self.master_data.customers.extend(customers);
9602 self.master_data.materials.extend(materials);
9603 self.master_data.assets.extend(assets);
9604 self.master_data.employees.extend(employees);
9605 self.master_data.cost_centers.extend(cost_centers);
9606 self.master_data
9607 .employee_change_history
9608 .extend(change_history);
9609 }
9610
9611 if let Some(pb) = &pb {
9612 pb.inc(total);
9613 }
9614 if let Some(pb) = pb {
9615 pb.finish_with_message("Master data generation complete");
9616 }
9617
9618 Ok(())
9619 }
9620
9621 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9623 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9624 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9625
9626 let months = (self.config.global.period_months as usize).max(1);
9629 let p2p_count = self
9630 .phase_config
9631 .p2p_chains
9632 .min(self.master_data.vendors.len() * 2 * months);
9633 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9634
9635 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9637 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9638 p2p_gen.set_country_pack(self.primary_pack().clone());
9639
9640 for i in 0..p2p_count {
9641 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9642 let materials: Vec<&Material> = self
9643 .master_data
9644 .materials
9645 .iter()
9646 .skip(i % self.master_data.materials.len().max(1))
9647 .take(2.min(self.master_data.materials.len()))
9648 .collect();
9649
9650 if materials.is_empty() {
9651 continue;
9652 }
9653
9654 let company = &self.config.companies[i % self.config.companies.len()];
9655 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9656 let fiscal_period = po_date.month() as u8;
9657 let created_by = if self.master_data.employees.is_empty() {
9658 "SYSTEM"
9659 } else {
9660 self.master_data.employees[i % self.master_data.employees.len()]
9661 .user_id
9662 .as_str()
9663 };
9664
9665 let chain = p2p_gen.generate_chain(
9666 &company.code,
9667 vendor,
9668 &materials,
9669 po_date,
9670 start_date.year() as u16,
9671 fiscal_period,
9672 created_by,
9673 );
9674
9675 flows.purchase_orders.push(chain.purchase_order.clone());
9677 flows.goods_receipts.extend(chain.goods_receipts.clone());
9678 if let Some(vi) = &chain.vendor_invoice {
9679 flows.vendor_invoices.push(vi.clone());
9680 }
9681 if let Some(payment) = &chain.payment {
9682 flows.payments.push(payment.clone());
9683 }
9684 for remainder in &chain.remainder_payments {
9685 flows.payments.push(remainder.clone());
9686 }
9687 flows.p2p_chains.push(chain);
9688
9689 if let Some(pb) = &pb {
9690 pb.inc(1);
9691 }
9692 }
9693
9694 if let Some(pb) = pb {
9695 pb.finish_with_message("P2P document flows complete");
9696 }
9697
9698 let o2c_count = self
9701 .phase_config
9702 .o2c_chains
9703 .min(self.master_data.customers.len() * 2 * months);
9704 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9705
9706 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9708 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9709 o2c_gen.set_country_pack(self.primary_pack().clone());
9710
9711 for i in 0..o2c_count {
9712 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9713 let materials: Vec<&Material> = self
9714 .master_data
9715 .materials
9716 .iter()
9717 .skip(i % self.master_data.materials.len().max(1))
9718 .take(2.min(self.master_data.materials.len()))
9719 .collect();
9720
9721 if materials.is_empty() {
9722 continue;
9723 }
9724
9725 let company = &self.config.companies[i % self.config.companies.len()];
9726 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9727 let fiscal_period = so_date.month() as u8;
9728 let created_by = if self.master_data.employees.is_empty() {
9729 "SYSTEM"
9730 } else {
9731 self.master_data.employees[i % self.master_data.employees.len()]
9732 .user_id
9733 .as_str()
9734 };
9735
9736 let chain = o2c_gen.generate_chain(
9737 &company.code,
9738 customer,
9739 &materials,
9740 so_date,
9741 start_date.year() as u16,
9742 fiscal_period,
9743 created_by,
9744 );
9745
9746 flows.sales_orders.push(chain.sales_order.clone());
9748 flows.deliveries.extend(chain.deliveries.clone());
9749 if let Some(ci) = &chain.customer_invoice {
9750 flows.customer_invoices.push(ci.clone());
9751 }
9752 if let Some(receipt) = &chain.customer_receipt {
9753 flows.payments.push(receipt.clone());
9754 }
9755 for receipt in &chain.remainder_receipts {
9757 flows.payments.push(receipt.clone());
9758 }
9759 flows.o2c_chains.push(chain);
9760
9761 if let Some(pb) = &pb {
9762 pb.inc(1);
9763 }
9764 }
9765
9766 if let Some(pb) = pb {
9767 pb.finish_with_message("O2C document flows complete");
9768 }
9769
9770 {
9774 let mut refs = Vec::new();
9775 for doc in &flows.purchase_orders {
9776 refs.extend(doc.header.document_references.iter().cloned());
9777 }
9778 for doc in &flows.goods_receipts {
9779 refs.extend(doc.header.document_references.iter().cloned());
9780 }
9781 for doc in &flows.vendor_invoices {
9782 refs.extend(doc.header.document_references.iter().cloned());
9783 }
9784 for doc in &flows.sales_orders {
9785 refs.extend(doc.header.document_references.iter().cloned());
9786 }
9787 for doc in &flows.deliveries {
9788 refs.extend(doc.header.document_references.iter().cloned());
9789 }
9790 for doc in &flows.customer_invoices {
9791 refs.extend(doc.header.document_references.iter().cloned());
9792 }
9793 for doc in &flows.payments {
9794 refs.extend(doc.header.document_references.iter().cloned());
9795 }
9796 debug!(
9797 "Collected {} document cross-references from document headers",
9798 refs.len()
9799 );
9800 flows.document_references = refs;
9801 }
9802
9803 Ok(())
9804 }
9805
9806 fn generate_journal_entries(
9808 &mut self,
9809 coa: &Arc<ChartOfAccounts>,
9810 ) -> SynthResult<Vec<JournalEntry>> {
9811 use datasynth_core::traits::ParallelGenerator;
9812
9813 let total = self.calculate_total_transactions();
9814 let pb = self.create_progress_bar(total, "Generating Journal Entries");
9815
9816 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9817 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9818 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9819
9820 let company_codes: Vec<String> = self
9821 .config
9822 .companies
9823 .iter()
9824 .map(|c| c.code.clone())
9825 .collect();
9826
9827 let mut generator = JournalEntryGenerator::new_with_params(
9828 self.config.transactions.clone(),
9829 Arc::clone(coa),
9830 company_codes,
9831 start_date,
9832 end_date,
9833 self.seed,
9834 );
9835 let bp = &self.config.business_processes;
9838 generator.set_business_process_weights(
9839 bp.o2c_weight,
9840 bp.p2p_weight,
9841 bp.r2r_weight,
9842 bp.h2r_weight,
9843 bp.a2r_weight,
9844 );
9845 let generator = generator;
9846
9847 let je_pack = self.primary_pack();
9851
9852 let mut generator = generator
9853 .with_master_data(
9854 &self.master_data.vendors,
9855 &self.master_data.customers,
9856 &self.master_data.materials,
9857 )
9858 .with_country_pack_names(je_pack)
9859 .with_country_pack_temporal(
9860 self.config.temporal_patterns.clone(),
9861 self.seed + 200,
9862 je_pack,
9863 )
9864 .with_persona_errors(true)
9865 .with_fraud_config(self.config.fraud.clone());
9866
9867 if self.config.temporal.enabled {
9869 let drift_config = self.config.temporal.to_core_config();
9870 generator = generator.with_drift_config(drift_config, self.seed + 100);
9871 }
9872
9873 self.check_memory_limit()?;
9875
9876 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9878
9879 let entries = if total >= 10_000 && num_threads > 1 {
9883 let sub_generators = generator.split(num_threads);
9886 let entries_per_thread = total as usize / num_threads;
9887 let remainder = total as usize % num_threads;
9888
9889 let batches: Vec<Vec<JournalEntry>> = sub_generators
9890 .into_par_iter()
9891 .enumerate()
9892 .map(|(i, mut gen)| {
9893 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9894 gen.generate_batch(count)
9895 })
9896 .collect();
9897
9898 let entries = JournalEntryGenerator::merge_results(batches);
9900
9901 if let Some(pb) = &pb {
9902 pb.inc(total);
9903 }
9904 entries
9905 } else {
9906 let mut entries = Vec::with_capacity(total as usize);
9908 for _ in 0..total {
9909 let entry = generator.generate();
9910 entries.push(entry);
9911 if let Some(pb) = &pb {
9912 pb.inc(1);
9913 }
9914 }
9915 entries
9916 };
9917
9918 if let Some(pb) = pb {
9919 pb.finish_with_message("Journal entries complete");
9920 }
9921
9922 Ok(entries)
9923 }
9924
9925 fn generate_jes_from_document_flows(
9930 &mut self,
9931 flows: &DocumentFlowSnapshot,
9932 ) -> SynthResult<Vec<JournalEntry>> {
9933 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9934 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9935
9936 let je_config = match self.resolve_coa_framework() {
9937 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9938 CoAFramework::GermanSkr04 => {
9939 let fa = datasynth_core::FrameworkAccounts::german_gaap();
9940 DocumentFlowJeConfig::from(&fa)
9941 }
9942 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9943 };
9944
9945 let populate_fec = je_config.populate_fec_fields;
9946 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9947
9948 if populate_fec {
9952 let mut aux_lookup = std::collections::HashMap::new();
9953 for vendor in &self.master_data.vendors {
9954 if let Some(ref aux) = vendor.auxiliary_gl_account {
9955 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9956 }
9957 }
9958 for customer in &self.master_data.customers {
9959 if let Some(ref aux) = customer.auxiliary_gl_account {
9960 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9961 }
9962 }
9963 if !aux_lookup.is_empty() {
9964 generator.set_auxiliary_account_lookup(aux_lookup);
9965 }
9966 }
9967
9968 let mut entries = Vec::new();
9969
9970 for chain in &flows.p2p_chains {
9972 let chain_entries = generator.generate_from_p2p_chain(chain);
9973 entries.extend(chain_entries);
9974 if let Some(pb) = &pb {
9975 pb.inc(1);
9976 }
9977 }
9978
9979 for chain in &flows.o2c_chains {
9981 let chain_entries = generator.generate_from_o2c_chain(chain);
9982 entries.extend(chain_entries);
9983 if let Some(pb) = &pb {
9984 pb.inc(1);
9985 }
9986 }
9987
9988 if let Some(pb) = pb {
9989 pb.finish_with_message(format!(
9990 "Generated {} JEs from document flows",
9991 entries.len()
9992 ));
9993 }
9994
9995 Ok(entries)
9996 }
9997
9998 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10004 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10005
10006 let mut jes = Vec::with_capacity(payroll_runs.len());
10007
10008 for run in payroll_runs {
10009 let mut je = JournalEntry::new_simple(
10010 format!("JE-PAYROLL-{}", run.payroll_id),
10011 run.company_code.clone(),
10012 run.run_date,
10013 format!("Payroll {}", run.payroll_id),
10014 );
10015
10016 je.add_line(JournalEntryLine {
10018 line_number: 1,
10019 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10020 debit_amount: run.total_gross,
10021 reference: Some(run.payroll_id.clone()),
10022 text: Some(format!(
10023 "Payroll {} ({} employees)",
10024 run.payroll_id, run.employee_count
10025 )),
10026 ..Default::default()
10027 });
10028
10029 je.add_line(JournalEntryLine {
10031 line_number: 2,
10032 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10033 credit_amount: run.total_gross,
10034 reference: Some(run.payroll_id.clone()),
10035 ..Default::default()
10036 });
10037
10038 jes.push(je);
10039 }
10040
10041 jes
10042 }
10043
10044 fn link_document_flows_to_subledgers(
10049 &mut self,
10050 flows: &DocumentFlowSnapshot,
10051 ) -> SynthResult<SubledgerSnapshot> {
10052 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10053 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10054
10055 let vendor_names: std::collections::HashMap<String, String> = self
10057 .master_data
10058 .vendors
10059 .iter()
10060 .map(|v| (v.vendor_id.clone(), v.name.clone()))
10061 .collect();
10062 let customer_names: std::collections::HashMap<String, String> = self
10063 .master_data
10064 .customers
10065 .iter()
10066 .map(|c| (c.customer_id.clone(), c.name.clone()))
10067 .collect();
10068
10069 let mut linker = DocumentFlowLinker::new()
10070 .with_vendor_names(vendor_names)
10071 .with_customer_names(customer_names);
10072
10073 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10075 if let Some(pb) = &pb {
10076 pb.inc(flows.vendor_invoices.len() as u64);
10077 }
10078
10079 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10081 if let Some(pb) = &pb {
10082 pb.inc(flows.customer_invoices.len() as u64);
10083 }
10084
10085 if let Some(pb) = pb {
10086 pb.finish_with_message(format!(
10087 "Linked {} AP and {} AR invoices",
10088 ap_invoices.len(),
10089 ar_invoices.len()
10090 ));
10091 }
10092
10093 Ok(SubledgerSnapshot {
10094 ap_invoices,
10095 ar_invoices,
10096 fa_records: Vec::new(),
10097 inventory_positions: Vec::new(),
10098 inventory_movements: Vec::new(),
10099 ar_aging_reports: Vec::new(),
10101 ap_aging_reports: Vec::new(),
10102 depreciation_runs: Vec::new(),
10104 inventory_valuations: Vec::new(),
10105 dunning_runs: Vec::new(),
10107 dunning_letters: Vec::new(),
10108 })
10109 }
10110
10111 #[allow(clippy::too_many_arguments)]
10116 fn generate_ocpm_events(
10117 &mut self,
10118 flows: &DocumentFlowSnapshot,
10119 sourcing: &SourcingSnapshot,
10120 hr: &HrSnapshot,
10121 manufacturing: &ManufacturingSnapshot,
10122 banking: &BankingSnapshot,
10123 audit: &AuditSnapshot,
10124 financial_reporting: &FinancialReportingSnapshot,
10125 ) -> SynthResult<OcpmSnapshot> {
10126 let total_chains = flows.p2p_chains.len()
10127 + flows.o2c_chains.len()
10128 + sourcing.sourcing_projects.len()
10129 + hr.payroll_runs.len()
10130 + manufacturing.production_orders.len()
10131 + banking.customers.len()
10132 + audit.engagements.len()
10133 + financial_reporting.bank_reconciliations.len();
10134 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
10135
10136 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
10138 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
10139
10140 let ocpm_config = OcpmGeneratorConfig {
10142 generate_p2p: true,
10143 generate_o2c: true,
10144 generate_s2c: !sourcing.sourcing_projects.is_empty(),
10145 generate_h2r: !hr.payroll_runs.is_empty(),
10146 generate_mfg: !manufacturing.production_orders.is_empty(),
10147 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
10148 generate_bank: !banking.customers.is_empty(),
10149 generate_audit: !audit.engagements.is_empty(),
10150 happy_path_rate: 0.75,
10151 exception_path_rate: 0.20,
10152 error_path_rate: 0.05,
10153 add_duration_variability: true,
10154 duration_std_dev_factor: 0.3,
10155 };
10156 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
10157 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
10158
10159 let available_users: Vec<String> = self
10161 .master_data
10162 .employees
10163 .iter()
10164 .take(20)
10165 .map(|e| e.user_id.clone())
10166 .collect();
10167
10168 let fallback_date =
10170 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
10171 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10172 .unwrap_or(fallback_date);
10173 let base_midnight = base_date
10174 .and_hms_opt(0, 0, 0)
10175 .expect("midnight is always valid");
10176 let base_datetime =
10177 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
10178
10179 let add_result = |event_log: &mut OcpmEventLog,
10181 result: datasynth_ocpm::CaseGenerationResult| {
10182 for event in result.events {
10183 event_log.add_event(event);
10184 }
10185 for object in result.objects {
10186 event_log.add_object(object);
10187 }
10188 for relationship in result.relationships {
10189 event_log.add_relationship(relationship);
10190 }
10191 for corr in result.correlation_events {
10192 event_log.add_correlation_event(corr);
10193 }
10194 event_log.add_case(result.case_trace);
10195 };
10196
10197 for chain in &flows.p2p_chains {
10199 let po = &chain.purchase_order;
10200 let documents = P2pDocuments::new(
10201 &po.header.document_id,
10202 &po.vendor_id,
10203 &po.header.company_code,
10204 po.total_net_amount,
10205 &po.header.currency,
10206 &ocpm_uuid_factory,
10207 )
10208 .with_goods_receipt(
10209 chain
10210 .goods_receipts
10211 .first()
10212 .map(|gr| gr.header.document_id.as_str())
10213 .unwrap_or(""),
10214 &ocpm_uuid_factory,
10215 )
10216 .with_invoice(
10217 chain
10218 .vendor_invoice
10219 .as_ref()
10220 .map(|vi| vi.header.document_id.as_str())
10221 .unwrap_or(""),
10222 &ocpm_uuid_factory,
10223 )
10224 .with_payment(
10225 chain
10226 .payment
10227 .as_ref()
10228 .map(|p| p.header.document_id.as_str())
10229 .unwrap_or(""),
10230 &ocpm_uuid_factory,
10231 );
10232
10233 let start_time =
10234 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10235 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10236 add_result(&mut event_log, result);
10237
10238 if let Some(pb) = &pb {
10239 pb.inc(1);
10240 }
10241 }
10242
10243 for chain in &flows.o2c_chains {
10245 let so = &chain.sales_order;
10246 let documents = O2cDocuments::new(
10247 &so.header.document_id,
10248 &so.customer_id,
10249 &so.header.company_code,
10250 so.total_net_amount,
10251 &so.header.currency,
10252 &ocpm_uuid_factory,
10253 )
10254 .with_delivery(
10255 chain
10256 .deliveries
10257 .first()
10258 .map(|d| d.header.document_id.as_str())
10259 .unwrap_or(""),
10260 &ocpm_uuid_factory,
10261 )
10262 .with_invoice(
10263 chain
10264 .customer_invoice
10265 .as_ref()
10266 .map(|ci| ci.header.document_id.as_str())
10267 .unwrap_or(""),
10268 &ocpm_uuid_factory,
10269 )
10270 .with_receipt(
10271 chain
10272 .customer_receipt
10273 .as_ref()
10274 .map(|r| r.header.document_id.as_str())
10275 .unwrap_or(""),
10276 &ocpm_uuid_factory,
10277 );
10278
10279 let start_time =
10280 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10281 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10282 add_result(&mut event_log, result);
10283
10284 if let Some(pb) = &pb {
10285 pb.inc(1);
10286 }
10287 }
10288
10289 for project in &sourcing.sourcing_projects {
10291 let vendor_id = sourcing
10293 .contracts
10294 .iter()
10295 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10296 .map(|c| c.vendor_id.clone())
10297 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10298 .or_else(|| {
10299 self.master_data
10300 .vendors
10301 .first()
10302 .map(|v| v.vendor_id.clone())
10303 })
10304 .unwrap_or_else(|| "V000".to_string());
10305 let mut docs = S2cDocuments::new(
10306 &project.project_id,
10307 &vendor_id,
10308 &project.company_code,
10309 project.estimated_annual_spend,
10310 &ocpm_uuid_factory,
10311 );
10312 if let Some(rfx) = sourcing
10314 .rfx_events
10315 .iter()
10316 .find(|r| r.sourcing_project_id == project.project_id)
10317 {
10318 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10319 if let Some(bid) = sourcing.bids.iter().find(|b| {
10321 b.rfx_id == rfx.rfx_id
10322 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10323 }) {
10324 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10325 }
10326 }
10327 if let Some(contract) = sourcing
10329 .contracts
10330 .iter()
10331 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10332 {
10333 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10334 }
10335 let start_time = base_datetime - chrono::Duration::days(90);
10336 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10337 add_result(&mut event_log, result);
10338
10339 if let Some(pb) = &pb {
10340 pb.inc(1);
10341 }
10342 }
10343
10344 for run in &hr.payroll_runs {
10346 let employee_id = hr
10348 .payroll_line_items
10349 .iter()
10350 .find(|li| li.payroll_id == run.payroll_id)
10351 .map(|li| li.employee_id.as_str())
10352 .unwrap_or("EMP000");
10353 let docs = H2rDocuments::new(
10354 &run.payroll_id,
10355 employee_id,
10356 &run.company_code,
10357 run.total_gross,
10358 &ocpm_uuid_factory,
10359 )
10360 .with_time_entries(
10361 hr.time_entries
10362 .iter()
10363 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10364 .take(5)
10365 .map(|t| t.entry_id.as_str())
10366 .collect(),
10367 );
10368 let start_time = base_datetime - chrono::Duration::days(30);
10369 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10370 add_result(&mut event_log, result);
10371
10372 if let Some(pb) = &pb {
10373 pb.inc(1);
10374 }
10375 }
10376
10377 for order in &manufacturing.production_orders {
10379 let mut docs = MfgDocuments::new(
10380 &order.order_id,
10381 &order.material_id,
10382 &order.company_code,
10383 order.planned_quantity,
10384 &ocpm_uuid_factory,
10385 )
10386 .with_operations(
10387 order
10388 .operations
10389 .iter()
10390 .map(|o| format!("OP-{:04}", o.operation_number))
10391 .collect::<Vec<_>>()
10392 .iter()
10393 .map(std::string::String::as_str)
10394 .collect(),
10395 );
10396 if let Some(insp) = manufacturing
10398 .quality_inspections
10399 .iter()
10400 .find(|i| i.reference_id == order.order_id)
10401 {
10402 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10403 }
10404 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10406 cc.items
10407 .iter()
10408 .any(|item| item.material_id == order.material_id)
10409 }) {
10410 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10411 }
10412 let start_time = base_datetime - chrono::Duration::days(60);
10413 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10414 add_result(&mut event_log, result);
10415
10416 if let Some(pb) = &pb {
10417 pb.inc(1);
10418 }
10419 }
10420
10421 for customer in &banking.customers {
10423 let customer_id_str = customer.customer_id.to_string();
10424 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10425 if let Some(account) = banking
10427 .accounts
10428 .iter()
10429 .find(|a| a.primary_owner_id == customer.customer_id)
10430 {
10431 let account_id_str = account.account_id.to_string();
10432 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10433 let txn_strs: Vec<String> = banking
10435 .transactions
10436 .iter()
10437 .filter(|t| t.account_id == account.account_id)
10438 .take(10)
10439 .map(|t| t.transaction_id.to_string())
10440 .collect();
10441 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10442 let txn_amounts: Vec<rust_decimal::Decimal> = banking
10443 .transactions
10444 .iter()
10445 .filter(|t| t.account_id == account.account_id)
10446 .take(10)
10447 .map(|t| t.amount)
10448 .collect();
10449 if !txn_ids.is_empty() {
10450 docs = docs.with_transactions(txn_ids, txn_amounts);
10451 }
10452 }
10453 let start_time = base_datetime - chrono::Duration::days(180);
10454 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10455 add_result(&mut event_log, result);
10456
10457 if let Some(pb) = &pb {
10458 pb.inc(1);
10459 }
10460 }
10461
10462 for engagement in &audit.engagements {
10464 let engagement_id_str = engagement.engagement_id.to_string();
10465 let docs = AuditDocuments::new(
10466 &engagement_id_str,
10467 &engagement.client_entity_id,
10468 &ocpm_uuid_factory,
10469 )
10470 .with_workpapers(
10471 audit
10472 .workpapers
10473 .iter()
10474 .filter(|w| w.engagement_id == engagement.engagement_id)
10475 .take(10)
10476 .map(|w| w.workpaper_id.to_string())
10477 .collect::<Vec<_>>()
10478 .iter()
10479 .map(std::string::String::as_str)
10480 .collect(),
10481 )
10482 .with_evidence(
10483 audit
10484 .evidence
10485 .iter()
10486 .filter(|e| e.engagement_id == engagement.engagement_id)
10487 .take(10)
10488 .map(|e| e.evidence_id.to_string())
10489 .collect::<Vec<_>>()
10490 .iter()
10491 .map(std::string::String::as_str)
10492 .collect(),
10493 )
10494 .with_risks(
10495 audit
10496 .risk_assessments
10497 .iter()
10498 .filter(|r| r.engagement_id == engagement.engagement_id)
10499 .take(5)
10500 .map(|r| r.risk_id.to_string())
10501 .collect::<Vec<_>>()
10502 .iter()
10503 .map(std::string::String::as_str)
10504 .collect(),
10505 )
10506 .with_findings(
10507 audit
10508 .findings
10509 .iter()
10510 .filter(|f| f.engagement_id == engagement.engagement_id)
10511 .take(5)
10512 .map(|f| f.finding_id.to_string())
10513 .collect::<Vec<_>>()
10514 .iter()
10515 .map(std::string::String::as_str)
10516 .collect(),
10517 )
10518 .with_judgments(
10519 audit
10520 .judgments
10521 .iter()
10522 .filter(|j| j.engagement_id == engagement.engagement_id)
10523 .take(5)
10524 .map(|j| j.judgment_id.to_string())
10525 .collect::<Vec<_>>()
10526 .iter()
10527 .map(std::string::String::as_str)
10528 .collect(),
10529 );
10530 let start_time = base_datetime - chrono::Duration::days(120);
10531 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10532 add_result(&mut event_log, result);
10533
10534 if let Some(pb) = &pb {
10535 pb.inc(1);
10536 }
10537 }
10538
10539 for recon in &financial_reporting.bank_reconciliations {
10541 let docs = BankReconDocuments::new(
10542 &recon.reconciliation_id,
10543 &recon.bank_account_id,
10544 &recon.company_code,
10545 recon.bank_ending_balance,
10546 &ocpm_uuid_factory,
10547 )
10548 .with_statement_lines(
10549 recon
10550 .statement_lines
10551 .iter()
10552 .take(20)
10553 .map(|l| l.line_id.as_str())
10554 .collect(),
10555 )
10556 .with_reconciling_items(
10557 recon
10558 .reconciling_items
10559 .iter()
10560 .take(10)
10561 .map(|i| i.item_id.as_str())
10562 .collect(),
10563 );
10564 let start_time = base_datetime - chrono::Duration::days(30);
10565 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10566 add_result(&mut event_log, result);
10567
10568 if let Some(pb) = &pb {
10569 pb.inc(1);
10570 }
10571 }
10572
10573 event_log.compute_variants();
10575
10576 let summary = event_log.summary();
10577
10578 if let Some(pb) = pb {
10579 pb.finish_with_message(format!(
10580 "Generated {} OCPM events, {} objects",
10581 summary.event_count, summary.object_count
10582 ));
10583 }
10584
10585 Ok(OcpmSnapshot {
10586 event_count: summary.event_count,
10587 object_count: summary.object_count,
10588 case_count: summary.case_count,
10589 event_log: Some(event_log),
10590 })
10591 }
10592
10593 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10595 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10596
10597 let total_rate = if self.config.anomaly_injection.enabled {
10600 self.config.anomaly_injection.rates.total_rate
10601 } else if self.config.fraud.enabled {
10602 self.config.fraud.fraud_rate
10603 } else {
10604 0.02
10605 };
10606
10607 let fraud_rate = if self.config.anomaly_injection.enabled {
10608 self.config.anomaly_injection.rates.fraud_rate
10609 } else {
10610 AnomalyRateConfig::default().fraud_rate
10611 };
10612
10613 let error_rate = if self.config.anomaly_injection.enabled {
10614 self.config.anomaly_injection.rates.error_rate
10615 } else {
10616 AnomalyRateConfig::default().error_rate
10617 };
10618
10619 let process_issue_rate = if self.config.anomaly_injection.enabled {
10620 self.config.anomaly_injection.rates.process_rate
10621 } else {
10622 AnomalyRateConfig::default().process_issue_rate
10623 };
10624
10625 let anomaly_config = AnomalyInjectorConfig {
10626 rates: AnomalyRateConfig {
10627 total_rate,
10628 fraud_rate,
10629 error_rate,
10630 process_issue_rate,
10631 ..Default::default()
10632 },
10633 seed: self.seed + 5000,
10634 ..Default::default()
10635 };
10636
10637 let mut injector = AnomalyInjector::new(anomaly_config);
10638 let result = injector.process_entries(entries);
10639
10640 if let Some(pb) = &pb {
10641 pb.inc(entries.len() as u64);
10642 pb.finish_with_message("Anomaly injection complete");
10643 }
10644
10645 let mut by_type = HashMap::new();
10646 for label in &result.labels {
10647 *by_type
10648 .entry(format!("{:?}", label.anomaly_type))
10649 .or_insert(0) += 1;
10650 }
10651
10652 Ok(AnomalyLabels {
10653 labels: result.labels,
10654 summary: Some(result.summary),
10655 by_type,
10656 })
10657 }
10658
10659 fn validate_journal_entries(
10668 &mut self,
10669 entries: &[JournalEntry],
10670 ) -> SynthResult<BalanceValidationResult> {
10671 let clean_entries: Vec<&JournalEntry> = entries
10673 .iter()
10674 .filter(|e| {
10675 e.header
10676 .header_text
10677 .as_ref()
10678 .map(|t| !t.contains("[HUMAN_ERROR:"))
10679 .unwrap_or(true)
10680 })
10681 .collect();
10682
10683 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10684
10685 let config = BalanceTrackerConfig {
10687 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
10691 };
10692 let validation_currency = self
10693 .config
10694 .companies
10695 .first()
10696 .map(|c| c.currency.clone())
10697 .unwrap_or_else(|| "USD".to_string());
10698
10699 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10700
10701 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10703 let errors = tracker.apply_entries(&clean_refs);
10704
10705 if let Some(pb) = &pb {
10706 pb.inc(entries.len() as u64);
10707 }
10708
10709 let has_unbalanced = tracker
10712 .get_validation_errors()
10713 .iter()
10714 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10715
10716 let mut all_errors = errors;
10719 all_errors.extend(tracker.get_validation_errors().iter().cloned());
10720 let company_codes: Vec<String> = self
10721 .config
10722 .companies
10723 .iter()
10724 .map(|c| c.code.clone())
10725 .collect();
10726
10727 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10728 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10729 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10730
10731 for company_code in &company_codes {
10732 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10733 all_errors.push(e);
10734 }
10735 }
10736
10737 let stats = tracker.get_statistics();
10739
10740 let is_balanced = all_errors.is_empty();
10742
10743 if let Some(pb) = pb {
10744 let msg = if is_balanced {
10745 "Balance validation passed"
10746 } else {
10747 "Balance validation completed with errors"
10748 };
10749 pb.finish_with_message(msg);
10750 }
10751
10752 Ok(BalanceValidationResult {
10753 validated: true,
10754 is_balanced,
10755 entries_processed: stats.entries_processed,
10756 total_debits: stats.total_debits,
10757 total_credits: stats.total_credits,
10758 accounts_tracked: stats.accounts_tracked,
10759 companies_tracked: stats.companies_tracked,
10760 validation_errors: all_errors,
10761 has_unbalanced_entries: has_unbalanced,
10762 })
10763 }
10764
10765 fn inject_data_quality(
10770 &mut self,
10771 entries: &mut [JournalEntry],
10772 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10773 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10774
10775 let config = if self.config.data_quality.enabled {
10778 let dq = &self.config.data_quality;
10779 DataQualityConfig {
10780 enable_missing_values: dq.missing_values.enabled,
10781 missing_values: datasynth_generators::MissingValueConfig {
10782 global_rate: dq.effective_missing_rate(),
10783 ..Default::default()
10784 },
10785 enable_format_variations: dq.format_variations.enabled,
10786 format_variations: datasynth_generators::FormatVariationConfig {
10787 date_variation_rate: dq.format_variations.dates.rate,
10788 amount_variation_rate: dq.format_variations.amounts.rate,
10789 identifier_variation_rate: dq.format_variations.identifiers.rate,
10790 ..Default::default()
10791 },
10792 enable_duplicates: dq.duplicates.enabled,
10793 duplicates: datasynth_generators::DuplicateConfig {
10794 duplicate_rate: dq.effective_duplicate_rate(),
10795 ..Default::default()
10796 },
10797 enable_typos: dq.typos.enabled,
10798 typos: datasynth_generators::TypoConfig {
10799 char_error_rate: dq.effective_typo_rate(),
10800 ..Default::default()
10801 },
10802 enable_encoding_issues: dq.encoding_issues.enabled,
10803 encoding_issue_rate: dq.encoding_issues.rate,
10804 seed: self.seed.wrapping_add(77), track_statistics: true,
10806 }
10807 } else {
10808 DataQualityConfig::minimal()
10809 };
10810 let mut injector = DataQualityInjector::new(config);
10811
10812 injector.set_country_pack(self.primary_pack().clone());
10814
10815 let context = HashMap::new();
10817
10818 for entry in entries.iter_mut() {
10819 if let Some(text) = &entry.header.header_text {
10821 let processed = injector.process_text_field(
10822 "header_text",
10823 text,
10824 &entry.header.document_id.to_string(),
10825 &context,
10826 );
10827 match processed {
10828 Some(new_text) if new_text != *text => {
10829 entry.header.header_text = Some(new_text);
10830 }
10831 None => {
10832 entry.header.header_text = None; }
10834 _ => {}
10835 }
10836 }
10837
10838 if let Some(ref_text) = &entry.header.reference {
10840 let processed = injector.process_text_field(
10841 "reference",
10842 ref_text,
10843 &entry.header.document_id.to_string(),
10844 &context,
10845 );
10846 match processed {
10847 Some(new_text) if new_text != *ref_text => {
10848 entry.header.reference = Some(new_text);
10849 }
10850 None => {
10851 entry.header.reference = None;
10852 }
10853 _ => {}
10854 }
10855 }
10856
10857 let user_persona = entry.header.user_persona.clone();
10859 if let Some(processed) = injector.process_text_field(
10860 "user_persona",
10861 &user_persona,
10862 &entry.header.document_id.to_string(),
10863 &context,
10864 ) {
10865 if processed != user_persona {
10866 entry.header.user_persona = processed;
10867 }
10868 }
10869
10870 for line in &mut entry.lines {
10872 if let Some(ref text) = line.line_text {
10874 let processed = injector.process_text_field(
10875 "line_text",
10876 text,
10877 &entry.header.document_id.to_string(),
10878 &context,
10879 );
10880 match processed {
10881 Some(new_text) if new_text != *text => {
10882 line.line_text = Some(new_text);
10883 }
10884 None => {
10885 line.line_text = None;
10886 }
10887 _ => {}
10888 }
10889 }
10890
10891 if let Some(cc) = &line.cost_center {
10893 let processed = injector.process_text_field(
10894 "cost_center",
10895 cc,
10896 &entry.header.document_id.to_string(),
10897 &context,
10898 );
10899 match processed {
10900 Some(new_cc) if new_cc != *cc => {
10901 line.cost_center = Some(new_cc);
10902 }
10903 None => {
10904 line.cost_center = None;
10905 }
10906 _ => {}
10907 }
10908 }
10909 }
10910
10911 if let Some(pb) = &pb {
10912 pb.inc(1);
10913 }
10914 }
10915
10916 if let Some(pb) = pb {
10917 pb.finish_with_message("Data quality injection complete");
10918 }
10919
10920 let quality_issues = injector.issues().to_vec();
10921 Ok((injector.stats().clone(), quality_issues))
10922 }
10923
10924 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10935 let use_fsm = self
10937 .config
10938 .audit
10939 .fsm
10940 .as_ref()
10941 .map(|f| f.enabled)
10942 .unwrap_or(false);
10943
10944 if use_fsm {
10945 return self.generate_audit_data_with_fsm(entries);
10946 }
10947
10948 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10950 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10951 let fiscal_year = start_date.year() as u16;
10952 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10953
10954 let total_revenue: rust_decimal::Decimal = entries
10956 .iter()
10957 .flat_map(|e| e.lines.iter())
10958 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10959 .map(|l| l.credit_amount)
10960 .sum();
10961
10962 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10964
10965 let mut snapshot = AuditSnapshot::default();
10966
10967 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10969 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10970 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10971 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10972 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10973 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10974 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10975 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10976 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10977 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10978 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10979 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10980
10981 let accounts: Vec<String> = self
10983 .coa
10984 .as_ref()
10985 .map(|coa| {
10986 coa.get_postable_accounts()
10987 .iter()
10988 .map(|acc| acc.account_code().to_string())
10989 .collect()
10990 })
10991 .unwrap_or_default();
10992
10993 for (i, company) in self.config.companies.iter().enumerate() {
10995 let company_revenue = total_revenue
10997 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10998
10999 let engagements_for_company =
11001 self.phase_config.audit_engagements / self.config.companies.len().max(1);
11002 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11003 1
11004 } else {
11005 0
11006 };
11007
11008 for _eng_idx in 0..(engagements_for_company + extra) {
11009 let mut engagement = engagement_gen.generate_engagement(
11011 &company.code,
11012 &company.name,
11013 fiscal_year,
11014 period_end,
11015 company_revenue,
11016 None, );
11018
11019 if !self.master_data.employees.is_empty() {
11021 let emp_count = self.master_data.employees.len();
11022 let base = (i * 10 + _eng_idx) % emp_count;
11024 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11025 .employee_id
11026 .clone();
11027 engagement.engagement_manager_id = self.master_data.employees
11028 [(base + 1) % emp_count]
11029 .employee_id
11030 .clone();
11031 let real_team: Vec<String> = engagement
11032 .team_member_ids
11033 .iter()
11034 .enumerate()
11035 .map(|(j, _)| {
11036 self.master_data.employees[(base + 2 + j) % emp_count]
11037 .employee_id
11038 .clone()
11039 })
11040 .collect();
11041 engagement.team_member_ids = real_team;
11042 }
11043
11044 if let Some(pb) = &pb {
11045 pb.inc(1);
11046 }
11047
11048 let team_members: Vec<String> = engagement.team_member_ids.clone();
11050
11051 let workpapers =
11053 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
11054
11055 for wp in &workpapers {
11056 if let Some(pb) = &pb {
11057 pb.inc(1);
11058 }
11059
11060 let evidence = evidence_gen.generate_evidence_for_workpaper(
11062 wp,
11063 &team_members,
11064 wp.preparer_date,
11065 );
11066
11067 for _ in &evidence {
11068 if let Some(pb) = &pb {
11069 pb.inc(1);
11070 }
11071 }
11072
11073 snapshot.evidence.extend(evidence);
11074 }
11075
11076 let risks =
11078 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11079
11080 for _ in &risks {
11081 if let Some(pb) = &pb {
11082 pb.inc(1);
11083 }
11084 }
11085 snapshot.risk_assessments.extend(risks);
11086
11087 let findings = finding_gen.generate_findings_for_engagement(
11089 &engagement,
11090 &workpapers,
11091 &team_members,
11092 );
11093
11094 for _ in &findings {
11095 if let Some(pb) = &pb {
11096 pb.inc(1);
11097 }
11098 }
11099 snapshot.findings.extend(findings);
11100
11101 let judgments =
11103 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11104
11105 for _ in &judgments {
11106 if let Some(pb) = &pb {
11107 pb.inc(1);
11108 }
11109 }
11110 snapshot.judgments.extend(judgments);
11111
11112 let (confs, resps) =
11114 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
11115 snapshot.confirmations.extend(confs);
11116 snapshot.confirmation_responses.extend(resps);
11117
11118 let team_pairs: Vec<(String, String)> = team_members
11120 .iter()
11121 .map(|id| {
11122 let name = self
11123 .master_data
11124 .employees
11125 .iter()
11126 .find(|e| e.employee_id == *id)
11127 .map(|e| e.display_name.clone())
11128 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
11129 (id.clone(), name)
11130 })
11131 .collect();
11132 for wp in &workpapers {
11133 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
11134 snapshot.procedure_steps.extend(steps);
11135 }
11136
11137 for wp in &workpapers {
11139 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
11140 snapshot.samples.push(sample);
11141 }
11142 }
11143
11144 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
11146 snapshot.analytical_results.extend(analytical);
11147
11148 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
11150 snapshot.ia_functions.push(ia_func);
11151 snapshot.ia_reports.extend(ia_reports);
11152
11153 let vendor_names: Vec<String> = self
11155 .master_data
11156 .vendors
11157 .iter()
11158 .map(|v| v.name.clone())
11159 .collect();
11160 let customer_names: Vec<String> = self
11161 .master_data
11162 .customers
11163 .iter()
11164 .map(|c| c.name.clone())
11165 .collect();
11166 let (parties, rp_txns) =
11167 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
11168 snapshot.related_parties.extend(parties);
11169 snapshot.related_party_transactions.extend(rp_txns);
11170
11171 snapshot.workpapers.extend(workpapers);
11173
11174 {
11176 let scope_id = format!(
11177 "SCOPE-{}-{}",
11178 engagement.engagement_id.simple(),
11179 &engagement.client_entity_id
11180 );
11181 let scope = datasynth_core::models::audit::AuditScope::new(
11182 scope_id.clone(),
11183 engagement.engagement_id.to_string(),
11184 engagement.client_entity_id.clone(),
11185 engagement.materiality,
11186 );
11187 let mut eng = engagement;
11189 eng.scope_id = Some(scope_id);
11190 snapshot.audit_scopes.push(scope);
11191 snapshot.engagements.push(eng);
11192 }
11193 }
11194 }
11195
11196 if self.config.companies.len() > 1 {
11200 let group_materiality = snapshot
11203 .engagements
11204 .first()
11205 .map(|e| e.materiality)
11206 .unwrap_or_else(|| {
11207 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11208 total_revenue * pct
11209 });
11210
11211 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11212 let group_engagement_id = snapshot
11213 .engagements
11214 .first()
11215 .map(|e| e.engagement_id.to_string())
11216 .unwrap_or_else(|| "GROUP-ENG".to_string());
11217
11218 let component_snapshot = component_gen.generate(
11219 &self.config.companies,
11220 group_materiality,
11221 &group_engagement_id,
11222 period_end,
11223 );
11224
11225 snapshot.component_auditors = component_snapshot.component_auditors;
11226 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11227 snapshot.component_instructions = component_snapshot.component_instructions;
11228 snapshot.component_reports = component_snapshot.component_reports;
11229
11230 info!(
11231 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11232 snapshot.component_auditors.len(),
11233 snapshot.component_instructions.len(),
11234 snapshot.component_reports.len(),
11235 );
11236 }
11237
11238 {
11242 let applicable_framework = self
11243 .config
11244 .accounting_standards
11245 .framework
11246 .as_ref()
11247 .map(|f| format!("{f:?}"))
11248 .unwrap_or_else(|| "IFRS".to_string());
11249
11250 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11251 let entity_count = self.config.companies.len();
11252
11253 for engagement in &snapshot.engagements {
11254 let company = self
11255 .config
11256 .companies
11257 .iter()
11258 .find(|c| c.code == engagement.client_entity_id);
11259 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11260 let letter_date = engagement.planning_start;
11261 let letter = letter_gen.generate(
11262 &engagement.engagement_id.to_string(),
11263 &engagement.client_name,
11264 entity_count,
11265 engagement.period_end_date,
11266 currency,
11267 &applicable_framework,
11268 letter_date,
11269 );
11270 snapshot.engagement_letters.push(letter);
11271 }
11272
11273 info!(
11274 "ISA 210 engagement letters: {} generated",
11275 snapshot.engagement_letters.len()
11276 );
11277 }
11278
11279 {
11283 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11284 let entity_codes: Vec<String> = self
11285 .config
11286 .companies
11287 .iter()
11288 .map(|c| c.code.clone())
11289 .collect();
11290 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11291 info!(
11292 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11293 subsequent.len(),
11294 subsequent
11295 .iter()
11296 .filter(|e| matches!(
11297 e.classification,
11298 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11299 ))
11300 .count(),
11301 subsequent
11302 .iter()
11303 .filter(|e| matches!(
11304 e.classification,
11305 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11306 ))
11307 .count(),
11308 );
11309 snapshot.subsequent_events = subsequent;
11310 }
11311
11312 {
11316 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11317 let entity_codes: Vec<String> = self
11318 .config
11319 .companies
11320 .iter()
11321 .map(|c| c.code.clone())
11322 .collect();
11323 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11324 info!(
11325 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11326 soc_snapshot.service_organizations.len(),
11327 soc_snapshot.soc_reports.len(),
11328 soc_snapshot.user_entity_controls.len(),
11329 );
11330 snapshot.service_organizations = soc_snapshot.service_organizations;
11331 snapshot.soc_reports = soc_snapshot.soc_reports;
11332 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11333 }
11334
11335 {
11339 use datasynth_generators::audit::going_concern_generator::{
11340 GoingConcernGenerator, GoingConcernInput,
11341 };
11342 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11343 let entity_codes: Vec<String> = self
11344 .config
11345 .companies
11346 .iter()
11347 .map(|c| c.code.clone())
11348 .collect();
11349 let assessment_date = period_end + chrono::Duration::days(75);
11351 let period_label = format!("FY{}", period_end.year());
11352
11353 let gc_inputs: Vec<GoingConcernInput> = self
11364 .config
11365 .companies
11366 .iter()
11367 .map(|company| {
11368 let code = &company.code;
11369 let mut revenue = rust_decimal::Decimal::ZERO;
11370 let mut expenses = rust_decimal::Decimal::ZERO;
11371 let mut current_assets = rust_decimal::Decimal::ZERO;
11372 let mut current_liabs = rust_decimal::Decimal::ZERO;
11373 let mut total_debt = rust_decimal::Decimal::ZERO;
11374
11375 for je in entries.iter().filter(|je| &je.header.company_code == code) {
11376 for line in &je.lines {
11377 let acct = line.gl_account.as_str();
11378 let net = line.debit_amount - line.credit_amount;
11379 if acct.starts_with('4') {
11380 revenue -= net;
11382 } else if acct.starts_with('6') {
11383 expenses += net;
11385 }
11386 if acct.starts_with('1') {
11388 if let Ok(n) = acct.parse::<u32>() {
11390 if (1000..=1499).contains(&n) {
11391 current_assets += net;
11392 }
11393 }
11394 } else if acct.starts_with('2') {
11395 if let Ok(n) = acct.parse::<u32>() {
11396 if (2000..=2499).contains(&n) {
11397 current_liabs -= net; } else if (2500..=2999).contains(&n) {
11400 total_debt -= net;
11402 }
11403 }
11404 }
11405 }
11406 }
11407
11408 let net_income = revenue - expenses;
11409 let working_capital = current_assets - current_liabs;
11410 let operating_cash_flow = net_income;
11413
11414 GoingConcernInput {
11415 entity_code: code.clone(),
11416 net_income,
11417 working_capital,
11418 operating_cash_flow,
11419 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11420 assessment_date,
11421 }
11422 })
11423 .collect();
11424
11425 let assessments = if gc_inputs.is_empty() {
11426 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11427 } else {
11428 gc_gen.generate_for_entities_with_inputs(
11429 &entity_codes,
11430 &gc_inputs,
11431 assessment_date,
11432 &period_label,
11433 )
11434 };
11435 info!(
11436 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11437 assessments.len(),
11438 assessments.iter().filter(|a| matches!(
11439 a.auditor_conclusion,
11440 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11441 )).count(),
11442 assessments.iter().filter(|a| matches!(
11443 a.auditor_conclusion,
11444 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11445 )).count(),
11446 assessments.iter().filter(|a| matches!(
11447 a.auditor_conclusion,
11448 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11449 )).count(),
11450 );
11451 snapshot.going_concern_assessments = assessments;
11452 }
11453
11454 {
11458 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11459 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11460 let entity_codes: Vec<String> = self
11461 .config
11462 .companies
11463 .iter()
11464 .map(|c| c.code.clone())
11465 .collect();
11466 let estimates = est_gen.generate_for_entities(&entity_codes);
11467 info!(
11468 "ISA 540 accounting estimates: {} estimates across {} entities \
11469 ({} with retrospective reviews, {} with auditor point estimates)",
11470 estimates.len(),
11471 entity_codes.len(),
11472 estimates
11473 .iter()
11474 .filter(|e| e.retrospective_review.is_some())
11475 .count(),
11476 estimates
11477 .iter()
11478 .filter(|e| e.auditor_point_estimate.is_some())
11479 .count(),
11480 );
11481 snapshot.accounting_estimates = estimates;
11482 }
11483
11484 {
11488 use datasynth_generators::audit::audit_opinion_generator::{
11489 AuditOpinionGenerator, AuditOpinionInput,
11490 };
11491
11492 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11493
11494 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11496 .engagements
11497 .iter()
11498 .map(|eng| {
11499 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11501 .findings
11502 .iter()
11503 .filter(|f| f.engagement_id == eng.engagement_id)
11504 .cloned()
11505 .collect();
11506
11507 let gc = snapshot
11509 .going_concern_assessments
11510 .iter()
11511 .find(|g| g.entity_code == eng.client_entity_id)
11512 .cloned();
11513
11514 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11516 snapshot.component_reports.clone();
11517
11518 let auditor = self
11519 .master_data
11520 .employees
11521 .first()
11522 .map(|e| e.display_name.clone())
11523 .unwrap_or_else(|| "Global Audit LLP".into());
11524
11525 let partner = self
11526 .master_data
11527 .employees
11528 .get(1)
11529 .map(|e| e.display_name.clone())
11530 .unwrap_or_else(|| eng.engagement_partner_id.clone());
11531
11532 AuditOpinionInput {
11533 entity_code: eng.client_entity_id.clone(),
11534 entity_name: eng.client_name.clone(),
11535 engagement_id: eng.engagement_id,
11536 period_end: eng.period_end_date,
11537 findings: eng_findings,
11538 going_concern: gc,
11539 component_reports: comp_reports,
11540 is_us_listed: {
11542 let fw = &self.config.audit_standards.isa_compliance.framework;
11543 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11544 },
11545 auditor_name: auditor,
11546 engagement_partner: partner,
11547 }
11548 })
11549 .collect();
11550
11551 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11552
11553 for go in &generated_opinions {
11554 snapshot
11555 .key_audit_matters
11556 .extend(go.key_audit_matters.clone());
11557 }
11558 snapshot.audit_opinions = generated_opinions
11559 .into_iter()
11560 .map(|go| go.opinion)
11561 .collect();
11562
11563 info!(
11564 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11565 snapshot.audit_opinions.len(),
11566 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11567 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11568 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11569 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11570 );
11571 }
11572
11573 {
11577 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11578
11579 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11580
11581 for (i, company) in self.config.companies.iter().enumerate() {
11582 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11584 .engagements
11585 .iter()
11586 .filter(|e| e.client_entity_id == company.code)
11587 .map(|e| e.engagement_id)
11588 .collect();
11589
11590 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11591 .findings
11592 .iter()
11593 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11594 .cloned()
11595 .collect();
11596
11597 let emp_count = self.master_data.employees.len();
11599 let ceo_name = if emp_count > 0 {
11600 self.master_data.employees[i % emp_count]
11601 .display_name
11602 .clone()
11603 } else {
11604 format!("CEO of {}", company.name)
11605 };
11606 let cfo_name = if emp_count > 1 {
11607 self.master_data.employees[(i + 1) % emp_count]
11608 .display_name
11609 .clone()
11610 } else {
11611 format!("CFO of {}", company.name)
11612 };
11613
11614 let materiality = snapshot
11616 .engagements
11617 .iter()
11618 .find(|e| e.client_entity_id == company.code)
11619 .map(|e| e.materiality)
11620 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11621
11622 let input = SoxGeneratorInput {
11623 company_code: company.code.clone(),
11624 company_name: company.name.clone(),
11625 fiscal_year,
11626 period_end,
11627 findings: company_findings,
11628 ceo_name,
11629 cfo_name,
11630 materiality_threshold: materiality,
11631 revenue_percent: rust_decimal::Decimal::from(100),
11632 assets_percent: rust_decimal::Decimal::from(100),
11633 significant_accounts: vec![
11634 "Revenue".into(),
11635 "Accounts Receivable".into(),
11636 "Inventory".into(),
11637 "Fixed Assets".into(),
11638 "Accounts Payable".into(),
11639 ],
11640 };
11641
11642 let (certs, assessment) = sox_gen.generate(&input);
11643 snapshot.sox_302_certifications.extend(certs);
11644 snapshot.sox_404_assessments.push(assessment);
11645 }
11646
11647 info!(
11648 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11649 snapshot.sox_302_certifications.len(),
11650 snapshot.sox_404_assessments.len(),
11651 snapshot
11652 .sox_404_assessments
11653 .iter()
11654 .filter(|a| a.icfr_effective)
11655 .count(),
11656 snapshot
11657 .sox_404_assessments
11658 .iter()
11659 .filter(|a| !a.icfr_effective)
11660 .count(),
11661 );
11662 }
11663
11664 {
11668 use datasynth_generators::audit::materiality_generator::{
11669 MaterialityGenerator, MaterialityInput,
11670 };
11671
11672 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11673
11674 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11678
11679 for company in &self.config.companies {
11680 let company_code = company.code.clone();
11681
11682 let company_revenue: rust_decimal::Decimal = entries
11684 .iter()
11685 .filter(|e| e.company_code() == company_code)
11686 .flat_map(|e| e.lines.iter())
11687 .filter(|l| l.account_code.starts_with('4'))
11688 .map(|l| l.credit_amount)
11689 .sum();
11690
11691 let total_assets: rust_decimal::Decimal = entries
11693 .iter()
11694 .filter(|e| e.company_code() == company_code)
11695 .flat_map(|e| e.lines.iter())
11696 .filter(|l| l.account_code.starts_with('1'))
11697 .map(|l| l.debit_amount)
11698 .sum();
11699
11700 let total_expenses: rust_decimal::Decimal = entries
11702 .iter()
11703 .filter(|e| e.company_code() == company_code)
11704 .flat_map(|e| e.lines.iter())
11705 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11706 .map(|l| l.debit_amount)
11707 .sum();
11708
11709 let equity: rust_decimal::Decimal = entries
11711 .iter()
11712 .filter(|e| e.company_code() == company_code)
11713 .flat_map(|e| e.lines.iter())
11714 .filter(|l| l.account_code.starts_with('3'))
11715 .map(|l| l.credit_amount)
11716 .sum();
11717
11718 let pretax_income = company_revenue - total_expenses;
11719
11720 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11722 let w = rust_decimal::Decimal::try_from(company.volume_weight)
11723 .unwrap_or(rust_decimal::Decimal::ONE);
11724 (
11725 total_revenue * w,
11726 total_revenue * w * rust_decimal::Decimal::from(3),
11727 total_revenue * w * rust_decimal::Decimal::new(1, 1),
11728 total_revenue * w * rust_decimal::Decimal::from(2),
11729 )
11730 } else {
11731 (company_revenue, total_assets, pretax_income, equity)
11732 };
11733
11734 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
11737 entity_code: company_code,
11738 period: format!("FY{}", fiscal_year),
11739 revenue: rev,
11740 pretax_income: pti,
11741 total_assets: assets,
11742 equity: eq,
11743 gross_profit,
11744 });
11745 }
11746
11747 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11748
11749 info!(
11750 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11751 {} total assets, {} equity benchmarks)",
11752 snapshot.materiality_calculations.len(),
11753 snapshot
11754 .materiality_calculations
11755 .iter()
11756 .filter(|m| matches!(
11757 m.benchmark,
11758 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11759 ))
11760 .count(),
11761 snapshot
11762 .materiality_calculations
11763 .iter()
11764 .filter(|m| matches!(
11765 m.benchmark,
11766 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11767 ))
11768 .count(),
11769 snapshot
11770 .materiality_calculations
11771 .iter()
11772 .filter(|m| matches!(
11773 m.benchmark,
11774 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11775 ))
11776 .count(),
11777 snapshot
11778 .materiality_calculations
11779 .iter()
11780 .filter(|m| matches!(
11781 m.benchmark,
11782 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11783 ))
11784 .count(),
11785 );
11786 }
11787
11788 {
11792 use datasynth_generators::audit::cra_generator::CraGenerator;
11793
11794 let mut cra_gen = CraGenerator::new(self.seed + 8315);
11795
11796 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11798 .audit_scopes
11799 .iter()
11800 .map(|s| (s.entity_code.clone(), s.id.clone()))
11801 .collect();
11802
11803 for company in &self.config.companies {
11804 let cras = cra_gen.generate_for_entity(&company.code, None);
11805 let scope_id = entity_scope_map.get(&company.code).cloned();
11806 let cras_with_scope: Vec<_> = cras
11807 .into_iter()
11808 .map(|mut cra| {
11809 cra.scope_id = scope_id.clone();
11810 cra
11811 })
11812 .collect();
11813 snapshot.combined_risk_assessments.extend(cras_with_scope);
11814 }
11815
11816 let significant_count = snapshot
11817 .combined_risk_assessments
11818 .iter()
11819 .filter(|c| c.significant_risk)
11820 .count();
11821 let high_cra_count = snapshot
11822 .combined_risk_assessments
11823 .iter()
11824 .filter(|c| {
11825 matches!(
11826 c.combined_risk,
11827 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11828 )
11829 })
11830 .count();
11831
11832 info!(
11833 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11834 snapshot.combined_risk_assessments.len(),
11835 significant_count,
11836 high_cra_count,
11837 );
11838 }
11839
11840 {
11844 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11845
11846 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11847
11848 for company in &self.config.companies {
11850 let entity_code = company.code.clone();
11851
11852 let tolerable_error = snapshot
11854 .materiality_calculations
11855 .iter()
11856 .find(|m| m.entity_code == entity_code)
11857 .map(|m| m.tolerable_error);
11858
11859 let entity_cras: Vec<_> = snapshot
11861 .combined_risk_assessments
11862 .iter()
11863 .filter(|c| c.entity_code == entity_code)
11864 .cloned()
11865 .collect();
11866
11867 if !entity_cras.is_empty() {
11868 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11869 snapshot.sampling_plans.extend(plans);
11870 snapshot.sampled_items.extend(items);
11871 }
11872 }
11873
11874 let misstatement_count = snapshot
11875 .sampled_items
11876 .iter()
11877 .filter(|i| i.misstatement_found)
11878 .count();
11879
11880 info!(
11881 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11882 snapshot.sampling_plans.len(),
11883 snapshot.sampled_items.len(),
11884 misstatement_count,
11885 );
11886 }
11887
11888 {
11892 use datasynth_generators::audit::scots_generator::{
11893 ScotsGenerator, ScotsGeneratorConfig,
11894 };
11895
11896 let ic_enabled = self.config.intercompany.enabled;
11897
11898 let config = ScotsGeneratorConfig {
11899 intercompany_enabled: ic_enabled,
11900 ..ScotsGeneratorConfig::default()
11901 };
11902 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11903
11904 for company in &self.config.companies {
11905 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11906 snapshot
11907 .significant_transaction_classes
11908 .extend(entity_scots);
11909 }
11910
11911 let estimation_count = snapshot
11912 .significant_transaction_classes
11913 .iter()
11914 .filter(|s| {
11915 matches!(
11916 s.transaction_type,
11917 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11918 )
11919 })
11920 .count();
11921
11922 info!(
11923 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11924 snapshot.significant_transaction_classes.len(),
11925 estimation_count,
11926 );
11927 }
11928
11929 {
11933 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11934
11935 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11936 let entity_codes: Vec<String> = self
11937 .config
11938 .companies
11939 .iter()
11940 .map(|c| c.code.clone())
11941 .collect();
11942 let unusual_flags =
11943 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11944 info!(
11945 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11946 unusual_flags.len(),
11947 unusual_flags
11948 .iter()
11949 .filter(|f| matches!(
11950 f.severity,
11951 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11952 ))
11953 .count(),
11954 unusual_flags
11955 .iter()
11956 .filter(|f| matches!(
11957 f.severity,
11958 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11959 ))
11960 .count(),
11961 unusual_flags
11962 .iter()
11963 .filter(|f| matches!(
11964 f.severity,
11965 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11966 ))
11967 .count(),
11968 );
11969 snapshot.unusual_items = unusual_flags;
11970 }
11971
11972 {
11976 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11977
11978 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11979 let entity_codes: Vec<String> = self
11980 .config
11981 .companies
11982 .iter()
11983 .map(|c| c.code.clone())
11984 .collect();
11985 let current_period_label = format!("FY{fiscal_year}");
11986 let prior_period_label = format!("FY{}", fiscal_year - 1);
11987 let analytical_rels = ar_gen.generate_for_entities(
11988 &entity_codes,
11989 entries,
11990 ¤t_period_label,
11991 &prior_period_label,
11992 );
11993 let out_of_range = analytical_rels
11994 .iter()
11995 .filter(|r| !r.within_expected_range)
11996 .count();
11997 info!(
11998 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11999 analytical_rels.len(),
12000 out_of_range,
12001 );
12002 snapshot.analytical_relationships = analytical_rels;
12003 }
12004
12005 if let Some(pb) = pb {
12006 pb.finish_with_message(format!(
12007 "Audit data: {} engagements, {} workpapers, {} evidence, \
12008 {} confirmations, {} procedure steps, {} samples, \
12009 {} analytical, {} IA funcs, {} related parties, \
12010 {} component auditors, {} letters, {} subsequent events, \
12011 {} service orgs, {} going concern, {} accounting estimates, \
12012 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
12013 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
12014 {} unusual items, {} analytical relationships",
12015 snapshot.engagements.len(),
12016 snapshot.workpapers.len(),
12017 snapshot.evidence.len(),
12018 snapshot.confirmations.len(),
12019 snapshot.procedure_steps.len(),
12020 snapshot.samples.len(),
12021 snapshot.analytical_results.len(),
12022 snapshot.ia_functions.len(),
12023 snapshot.related_parties.len(),
12024 snapshot.component_auditors.len(),
12025 snapshot.engagement_letters.len(),
12026 snapshot.subsequent_events.len(),
12027 snapshot.service_organizations.len(),
12028 snapshot.going_concern_assessments.len(),
12029 snapshot.accounting_estimates.len(),
12030 snapshot.audit_opinions.len(),
12031 snapshot.key_audit_matters.len(),
12032 snapshot.sox_302_certifications.len(),
12033 snapshot.sox_404_assessments.len(),
12034 snapshot.materiality_calculations.len(),
12035 snapshot.combined_risk_assessments.len(),
12036 snapshot.sampling_plans.len(),
12037 snapshot.significant_transaction_classes.len(),
12038 snapshot.unusual_items.len(),
12039 snapshot.analytical_relationships.len(),
12040 ));
12041 }
12042
12043 {
12050 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12051 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12052 debug!(
12053 "PCAOB-ISA mappings generated: {} mappings",
12054 snapshot.isa_pcaob_mappings.len()
12055 );
12056 }
12057
12058 {
12065 use datasynth_standards::audit::isa_reference::IsaStandard;
12066 snapshot.isa_mappings = IsaStandard::standard_entries();
12067 debug!(
12068 "ISA standard entries generated: {} standards",
12069 snapshot.isa_mappings.len()
12070 );
12071 }
12072
12073 {
12076 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
12077 .engagements
12078 .iter()
12079 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
12080 .collect();
12081
12082 for rpt in &mut snapshot.related_party_transactions {
12083 if rpt.journal_entry_id.is_some() {
12084 continue; }
12086 let entity = engagement_by_id
12087 .get(&rpt.engagement_id.to_string())
12088 .copied()
12089 .unwrap_or("");
12090
12091 let best_je = entries
12093 .iter()
12094 .filter(|je| je.header.company_code == entity)
12095 .min_by_key(|je| {
12096 (je.header.posting_date - rpt.transaction_date)
12097 .num_days()
12098 .abs()
12099 });
12100
12101 if let Some(je) = best_je {
12102 rpt.journal_entry_id = Some(je.header.document_id.to_string());
12103 }
12104 }
12105
12106 let linked = snapshot
12107 .related_party_transactions
12108 .iter()
12109 .filter(|t| t.journal_entry_id.is_some())
12110 .count();
12111 debug!(
12112 "Linked {}/{} related party transactions to journal entries",
12113 linked,
12114 snapshot.related_party_transactions.len()
12115 );
12116 }
12117
12118 if !snapshot.engagements.is_empty() {
12124 use datasynth_generators::audit_opinion_generator::{
12125 AuditOpinionGenerator, AuditOpinionInput,
12126 };
12127
12128 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
12129 let inputs: Vec<AuditOpinionInput> = snapshot
12130 .engagements
12131 .iter()
12132 .map(|eng| {
12133 let findings = snapshot
12134 .findings
12135 .iter()
12136 .filter(|f| f.engagement_id == eng.engagement_id)
12137 .cloned()
12138 .collect();
12139 let going_concern = snapshot
12140 .going_concern_assessments
12141 .iter()
12142 .find(|gc| gc.entity_code == eng.client_entity_id)
12143 .cloned();
12144 let component_reports = snapshot
12147 .component_reports
12148 .iter()
12149 .filter(|r| r.entity_code == eng.client_entity_id)
12150 .cloned()
12151 .collect();
12152
12153 AuditOpinionInput {
12154 entity_code: eng.client_entity_id.clone(),
12155 entity_name: eng.client_name.clone(),
12156 engagement_id: eng.engagement_id,
12157 period_end: eng.period_end_date,
12158 findings,
12159 going_concern,
12160 component_reports,
12161 is_us_listed: matches!(
12162 eng.engagement_type,
12163 datasynth_core::audit::EngagementType::IntegratedAudit
12164 | datasynth_core::audit::EngagementType::Sox404
12165 ),
12166 auditor_name: "DataSynth Audit LLP".to_string(),
12167 engagement_partner: "Engagement Partner".to_string(),
12168 }
12169 })
12170 .collect();
12171
12172 let generated = opinion_gen.generate_batch(&inputs);
12173 for g in generated {
12174 snapshot.key_audit_matters.extend(g.key_audit_matters);
12175 snapshot.audit_opinions.push(g.opinion);
12176 }
12177 debug!(
12178 "Generated {} audit opinions with {} key audit matters",
12179 snapshot.audit_opinions.len(),
12180 snapshot.key_audit_matters.len()
12181 );
12182 }
12183
12184 Ok(snapshot)
12185 }
12186
12187 fn generate_audit_data_with_fsm(
12194 &mut self,
12195 entries: &[JournalEntry],
12196 ) -> SynthResult<AuditSnapshot> {
12197 use datasynth_audit_fsm::{
12198 context::EngagementContext,
12199 engine::AuditFsmEngine,
12200 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
12201 };
12202 use rand::SeedableRng;
12203 use rand_chacha::ChaCha8Rng;
12204
12205 info!("Audit FSM: generating audit data via FSM engine");
12206
12207 let fsm_config = self
12208 .config
12209 .audit
12210 .fsm
12211 .as_ref()
12212 .expect("FSM config must be present when FSM is enabled");
12213
12214 let bwp = match fsm_config.blueprint.as_str() {
12216 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
12217 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
12218 _ => {
12219 warn!(
12220 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
12221 fsm_config.blueprint
12222 );
12223 BlueprintWithPreconditions::load_builtin_fsa()
12224 }
12225 }
12226 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
12227
12228 let overlay = match fsm_config.overlay.as_str() {
12230 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
12231 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
12232 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
12233 _ => {
12234 warn!(
12235 "Unknown FSM overlay '{}', falling back to builtin:default",
12236 fsm_config.overlay
12237 );
12238 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
12239 }
12240 }
12241 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
12242
12243 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12245 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12246 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12247
12248 let company = self.config.companies.first();
12250 let company_code = company
12251 .map(|c| c.code.clone())
12252 .unwrap_or_else(|| "UNKNOWN".to_string());
12253 let company_name = company
12254 .map(|c| c.name.clone())
12255 .unwrap_or_else(|| "Unknown Company".to_string());
12256 let currency = company
12257 .map(|c| c.currency.clone())
12258 .unwrap_or_else(|| "USD".to_string());
12259
12260 let entity_entries: Vec<_> = entries
12262 .iter()
12263 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
12264 .cloned()
12265 .collect();
12266 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
12270 .iter()
12271 .flat_map(|e| e.lines.iter())
12272 .filter(|l| l.account_code.starts_with('4'))
12273 .map(|l| l.credit_amount - l.debit_amount)
12274 .sum();
12275
12276 let total_assets: rust_decimal::Decimal = entries
12277 .iter()
12278 .flat_map(|e| e.lines.iter())
12279 .filter(|l| l.account_code.starts_with('1'))
12280 .map(|l| l.debit_amount - l.credit_amount)
12281 .sum();
12282
12283 let total_expenses: rust_decimal::Decimal = entries
12284 .iter()
12285 .flat_map(|e| e.lines.iter())
12286 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12287 .map(|l| l.debit_amount)
12288 .sum();
12289
12290 let equity: rust_decimal::Decimal = entries
12291 .iter()
12292 .flat_map(|e| e.lines.iter())
12293 .filter(|l| l.account_code.starts_with('3'))
12294 .map(|l| l.credit_amount - l.debit_amount)
12295 .sum();
12296
12297 let total_debt: rust_decimal::Decimal = entries
12298 .iter()
12299 .flat_map(|e| e.lines.iter())
12300 .filter(|l| l.account_code.starts_with('2'))
12301 .map(|l| l.credit_amount - l.debit_amount)
12302 .sum();
12303
12304 let pretax_income = total_revenue - total_expenses;
12305
12306 let cogs: rust_decimal::Decimal = entries
12307 .iter()
12308 .flat_map(|e| e.lines.iter())
12309 .filter(|l| l.account_code.starts_with('5'))
12310 .map(|l| l.debit_amount)
12311 .sum();
12312 let gross_profit = total_revenue - cogs;
12313
12314 let current_assets: rust_decimal::Decimal = entries
12315 .iter()
12316 .flat_map(|e| e.lines.iter())
12317 .filter(|l| {
12318 l.account_code.starts_with("10")
12319 || l.account_code.starts_with("11")
12320 || l.account_code.starts_with("12")
12321 || l.account_code.starts_with("13")
12322 })
12323 .map(|l| l.debit_amount - l.credit_amount)
12324 .sum();
12325 let current_liabilities: rust_decimal::Decimal = entries
12326 .iter()
12327 .flat_map(|e| e.lines.iter())
12328 .filter(|l| {
12329 l.account_code.starts_with("20")
12330 || l.account_code.starts_with("21")
12331 || l.account_code.starts_with("22")
12332 })
12333 .map(|l| l.credit_amount - l.debit_amount)
12334 .sum();
12335 let working_capital = current_assets - current_liabilities;
12336
12337 let depreciation: rust_decimal::Decimal = entries
12338 .iter()
12339 .flat_map(|e| e.lines.iter())
12340 .filter(|l| l.account_code.starts_with("60"))
12341 .map(|l| l.debit_amount)
12342 .sum();
12343 let operating_cash_flow = pretax_income + depreciation;
12344
12345 let accounts: Vec<String> = self
12347 .coa
12348 .as_ref()
12349 .map(|coa| {
12350 coa.get_postable_accounts()
12351 .iter()
12352 .map(|acc| acc.account_code().to_string())
12353 .collect()
12354 })
12355 .unwrap_or_default();
12356
12357 let team_member_ids: Vec<String> = self
12359 .master_data
12360 .employees
12361 .iter()
12362 .take(8) .map(|e| e.employee_id.clone())
12364 .collect();
12365 let team_member_pairs: Vec<(String, String)> = self
12366 .master_data
12367 .employees
12368 .iter()
12369 .take(8)
12370 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12371 .collect();
12372
12373 let vendor_names: Vec<String> = self
12374 .master_data
12375 .vendors
12376 .iter()
12377 .map(|v| v.name.clone())
12378 .collect();
12379 let customer_names: Vec<String> = self
12380 .master_data
12381 .customers
12382 .iter()
12383 .map(|c| c.name.clone())
12384 .collect();
12385
12386 let entity_codes: Vec<String> = self
12387 .config
12388 .companies
12389 .iter()
12390 .map(|c| c.code.clone())
12391 .collect();
12392
12393 let journal_entry_ids: Vec<String> = entries
12395 .iter()
12396 .take(50)
12397 .map(|e| e.header.document_id.to_string())
12398 .collect();
12399
12400 let mut account_balances = std::collections::HashMap::<String, f64>::new();
12402 for entry in entries {
12403 for line in &entry.lines {
12404 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12405 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12406 *account_balances
12407 .entry(line.account_code.clone())
12408 .or_insert(0.0) += debit_f64 - credit_f64;
12409 }
12410 }
12411
12412 let control_ids: Vec<String> = Vec::new();
12417 let anomaly_refs: Vec<String> = Vec::new();
12418
12419 let mut context = EngagementContext {
12420 company_code,
12421 company_name,
12422 fiscal_year: start_date.year(),
12423 currency,
12424 total_revenue,
12425 total_assets,
12426 engagement_start: start_date,
12427 report_date: period_end,
12428 pretax_income,
12429 equity,
12430 gross_profit,
12431 working_capital,
12432 operating_cash_flow,
12433 total_debt,
12434 team_member_ids,
12435 team_member_pairs,
12436 accounts,
12437 vendor_names,
12438 customer_names,
12439 journal_entry_ids,
12440 account_balances,
12441 control_ids,
12442 anomaly_refs,
12443 journal_entries: entries.to_vec(),
12444 is_us_listed: false,
12445 entity_codes,
12446 auditor_firm_name: "DataSynth Audit LLP".into(),
12447 accounting_framework: self
12448 .config
12449 .accounting_standards
12450 .framework
12451 .map(|f| match f {
12452 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12453 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12454 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12455 "French GAAP"
12456 }
12457 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12458 "German GAAP"
12459 }
12460 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12461 "Dual Reporting"
12462 }
12463 })
12464 .unwrap_or("IFRS")
12465 .into(),
12466 };
12467
12468 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12470 let rng = ChaCha8Rng::seed_from_u64(seed);
12471 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12472
12473 let mut result = engine
12474 .run_engagement(&context)
12475 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12476
12477 info!(
12478 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12479 {} phases completed, duration {:.1}h",
12480 result.event_log.len(),
12481 result.artifacts.total_artifacts(),
12482 result.anomalies.len(),
12483 result.phases_completed.len(),
12484 result.total_duration_hours,
12485 );
12486
12487 let tb_entity = context.company_code.clone();
12489 let tb_fy = context.fiscal_year;
12490 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12491 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12492 entries,
12493 &tb_entity,
12494 tb_fy,
12495 self.coa.as_ref().map(|c| c.as_ref()),
12496 );
12497
12498 let bag = result.artifacts;
12500 let mut snapshot = AuditSnapshot {
12501 engagements: bag.engagements,
12502 engagement_letters: bag.engagement_letters,
12503 materiality_calculations: bag.materiality_calculations,
12504 risk_assessments: bag.risk_assessments,
12505 combined_risk_assessments: bag.combined_risk_assessments,
12506 workpapers: bag.workpapers,
12507 evidence: bag.evidence,
12508 findings: bag.findings,
12509 judgments: bag.judgments,
12510 sampling_plans: bag.sampling_plans,
12511 sampled_items: bag.sampled_items,
12512 analytical_results: bag.analytical_results,
12513 going_concern_assessments: bag.going_concern_assessments,
12514 subsequent_events: bag.subsequent_events,
12515 audit_opinions: bag.audit_opinions,
12516 key_audit_matters: bag.key_audit_matters,
12517 procedure_steps: bag.procedure_steps,
12518 samples: bag.samples,
12519 confirmations: bag.confirmations,
12520 confirmation_responses: bag.confirmation_responses,
12521 fsm_event_trail: Some(result.event_log),
12523 ..Default::default()
12525 };
12526
12527 {
12529 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12530 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12531 }
12532 {
12533 use datasynth_standards::audit::isa_reference::IsaStandard;
12534 snapshot.isa_mappings = IsaStandard::standard_entries();
12535 }
12536
12537 info!(
12538 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12539 {} risk assessments, {} findings, {} materiality calcs",
12540 snapshot.engagements.len(),
12541 snapshot.workpapers.len(),
12542 snapshot.evidence.len(),
12543 snapshot.risk_assessments.len(),
12544 snapshot.findings.len(),
12545 snapshot.materiality_calculations.len(),
12546 );
12547
12548 Ok(snapshot)
12549 }
12550
12551 fn export_graphs(
12558 &mut self,
12559 entries: &[JournalEntry],
12560 _coa: &Arc<ChartOfAccounts>,
12561 stats: &mut EnhancedGenerationStatistics,
12562 ) -> SynthResult<GraphExportSnapshot> {
12563 let pb = self.create_progress_bar(100, "Exporting Graphs");
12564
12565 let mut snapshot = GraphExportSnapshot::default();
12566
12567 let output_dir = self
12569 .output_path
12570 .clone()
12571 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12572 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12573
12574 for graph_type in &self.config.graph_export.graph_types {
12576 if let Some(pb) = &pb {
12577 pb.inc(10);
12578 }
12579
12580 let graph_config = TransactionGraphConfig {
12582 include_vendors: false,
12583 include_customers: false,
12584 create_debit_credit_edges: true,
12585 include_document_nodes: graph_type.include_document_nodes,
12586 min_edge_weight: graph_type.min_edge_weight,
12587 aggregate_parallel_edges: graph_type.aggregate_edges,
12588 framework: None,
12589 };
12590
12591 let mut builder = TransactionGraphBuilder::new(graph_config);
12592 builder.add_journal_entries(entries);
12593 let graph = builder.build();
12594
12595 stats.graph_node_count += graph.node_count();
12597 stats.graph_edge_count += graph.edge_count();
12598
12599 if let Some(pb) = &pb {
12600 pb.inc(40);
12601 }
12602
12603 for format in &self.config.graph_export.formats {
12605 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12606
12607 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12609 warn!("Failed to create graph output directory: {}", e);
12610 continue;
12611 }
12612
12613 match format {
12614 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12615 let pyg_config = PyGExportConfig {
12616 common: datasynth_graph::CommonExportConfig {
12617 export_node_features: true,
12618 export_edge_features: true,
12619 export_node_labels: true,
12620 export_edge_labels: true,
12621 export_masks: true,
12622 train_ratio: self.config.graph_export.train_ratio,
12623 val_ratio: self.config.graph_export.validation_ratio,
12624 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12625 },
12626 one_hot_categoricals: false,
12627 };
12628
12629 let exporter = PyGExporter::new(pyg_config);
12630 match exporter.export(&graph, &format_dir) {
12631 Ok(metadata) => {
12632 snapshot.exports.insert(
12633 format!("{}_{}", graph_type.name, "pytorch_geometric"),
12634 GraphExportInfo {
12635 name: graph_type.name.clone(),
12636 format: "pytorch_geometric".to_string(),
12637 output_path: format_dir.clone(),
12638 node_count: metadata.num_nodes,
12639 edge_count: metadata.num_edges,
12640 },
12641 );
12642 snapshot.graph_count += 1;
12643 }
12644 Err(e) => {
12645 warn!("Failed to export PyTorch Geometric graph: {}", e);
12646 }
12647 }
12648 }
12649 datasynth_config::schema::GraphExportFormat::Neo4j => {
12650 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12651
12652 let neo4j_config = Neo4jExportConfig {
12653 export_node_properties: true,
12654 export_edge_properties: true,
12655 export_features: true,
12656 generate_cypher: true,
12657 generate_admin_import: true,
12658 database_name: "synth".to_string(),
12659 cypher_batch_size: 1000,
12660 };
12661
12662 let exporter = Neo4jExporter::new(neo4j_config);
12663 match exporter.export(&graph, &format_dir) {
12664 Ok(metadata) => {
12665 snapshot.exports.insert(
12666 format!("{}_{}", graph_type.name, "neo4j"),
12667 GraphExportInfo {
12668 name: graph_type.name.clone(),
12669 format: "neo4j".to_string(),
12670 output_path: format_dir.clone(),
12671 node_count: metadata.num_nodes,
12672 edge_count: metadata.num_edges,
12673 },
12674 );
12675 snapshot.graph_count += 1;
12676 }
12677 Err(e) => {
12678 warn!("Failed to export Neo4j graph: {}", e);
12679 }
12680 }
12681 }
12682 datasynth_config::schema::GraphExportFormat::Dgl => {
12683 use datasynth_graph::{DGLExportConfig, DGLExporter};
12684
12685 let dgl_config = DGLExportConfig {
12686 common: datasynth_graph::CommonExportConfig {
12687 export_node_features: true,
12688 export_edge_features: true,
12689 export_node_labels: true,
12690 export_edge_labels: true,
12691 export_masks: true,
12692 train_ratio: self.config.graph_export.train_ratio,
12693 val_ratio: self.config.graph_export.validation_ratio,
12694 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12695 },
12696 heterogeneous: self.config.graph_export.dgl.heterogeneous,
12697 include_pickle_script: true, };
12699
12700 let exporter = DGLExporter::new(dgl_config);
12701 match exporter.export(&graph, &format_dir) {
12702 Ok(metadata) => {
12703 snapshot.exports.insert(
12704 format!("{}_{}", graph_type.name, "dgl"),
12705 GraphExportInfo {
12706 name: graph_type.name.clone(),
12707 format: "dgl".to_string(),
12708 output_path: format_dir.clone(),
12709 node_count: metadata.common.num_nodes,
12710 edge_count: metadata.common.num_edges,
12711 },
12712 );
12713 snapshot.graph_count += 1;
12714 }
12715 Err(e) => {
12716 warn!("Failed to export DGL graph: {}", e);
12717 }
12718 }
12719 }
12720 datasynth_config::schema::GraphExportFormat::RustGraph => {
12721 use datasynth_graph::{
12722 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12723 };
12724
12725 let rustgraph_config = RustGraphExportConfig {
12726 include_features: true,
12727 include_temporal: true,
12728 include_labels: true,
12729 source_name: "datasynth".to_string(),
12730 batch_id: None,
12731 output_format: RustGraphOutputFormat::JsonLines,
12732 export_node_properties: true,
12733 export_edge_properties: true,
12734 pretty_print: false,
12735 };
12736
12737 let exporter = RustGraphExporter::new(rustgraph_config);
12738 match exporter.export(&graph, &format_dir) {
12739 Ok(metadata) => {
12740 snapshot.exports.insert(
12741 format!("{}_{}", graph_type.name, "rustgraph"),
12742 GraphExportInfo {
12743 name: graph_type.name.clone(),
12744 format: "rustgraph".to_string(),
12745 output_path: format_dir.clone(),
12746 node_count: metadata.num_nodes,
12747 edge_count: metadata.num_edges,
12748 },
12749 );
12750 snapshot.graph_count += 1;
12751 }
12752 Err(e) => {
12753 warn!("Failed to export RustGraph: {}", e);
12754 }
12755 }
12756 }
12757 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12758 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12760 }
12761 }
12762 }
12763
12764 if let Some(pb) = &pb {
12765 pb.inc(40);
12766 }
12767 }
12768
12769 stats.graph_export_count = snapshot.graph_count;
12770 snapshot.exported = snapshot.graph_count > 0;
12771
12772 if let Some(pb) = pb {
12773 pb.finish_with_message(format!(
12774 "Graphs exported: {} graphs ({} nodes, {} edges)",
12775 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12776 ));
12777 }
12778
12779 Ok(snapshot)
12780 }
12781
12782 fn build_additional_graphs(
12787 &self,
12788 banking: &BankingSnapshot,
12789 intercompany: &IntercompanySnapshot,
12790 entries: &[JournalEntry],
12791 stats: &mut EnhancedGenerationStatistics,
12792 ) {
12793 let output_dir = self
12794 .output_path
12795 .clone()
12796 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12797 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12798
12799 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12801 info!("Phase 10c: Building banking network graph");
12802 let config = BankingGraphConfig::default();
12803 let mut builder = BankingGraphBuilder::new(config);
12804 builder.add_customers(&banking.customers);
12805 builder.add_accounts(&banking.accounts, &banking.customers);
12806 builder.add_transactions(&banking.transactions);
12807 let graph = builder.build();
12808
12809 let node_count = graph.node_count();
12810 let edge_count = graph.edge_count();
12811 stats.graph_node_count += node_count;
12812 stats.graph_edge_count += edge_count;
12813
12814 for format in &self.config.graph_export.formats {
12816 if matches!(
12817 format,
12818 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12819 ) {
12820 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12821 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12822 warn!("Failed to create banking graph output dir: {}", e);
12823 continue;
12824 }
12825 let pyg_config = PyGExportConfig::default();
12826 let exporter = PyGExporter::new(pyg_config);
12827 if let Err(e) = exporter.export(&graph, &format_dir) {
12828 warn!("Failed to export banking graph as PyG: {}", e);
12829 } else {
12830 info!(
12831 "Banking network graph exported: {} nodes, {} edges",
12832 node_count, edge_count
12833 );
12834 }
12835 }
12836 }
12837 }
12838
12839 let approval_entries: Vec<_> = entries
12841 .iter()
12842 .filter(|je| je.header.approval_workflow.is_some())
12843 .collect();
12844
12845 if !approval_entries.is_empty() {
12846 info!(
12847 "Phase 10c: Building approval network graph ({} entries with approvals)",
12848 approval_entries.len()
12849 );
12850 let config = ApprovalGraphConfig::default();
12851 let mut builder = ApprovalGraphBuilder::new(config);
12852
12853 for je in &approval_entries {
12854 if let Some(ref wf) = je.header.approval_workflow {
12855 for action in &wf.actions {
12856 let record = datasynth_core::models::ApprovalRecord {
12857 approval_id: format!(
12858 "APR-{}-{}",
12859 je.header.document_id, action.approval_level
12860 ),
12861 document_number: je.header.document_id.to_string(),
12862 document_type: "JE".to_string(),
12863 company_code: je.company_code().to_string(),
12864 requester_id: wf.preparer_id.clone(),
12865 requester_name: Some(wf.preparer_name.clone()),
12866 approver_id: action.actor_id.clone(),
12867 approver_name: action.actor_name.clone(),
12868 approval_date: je.posting_date(),
12869 action: format!("{:?}", action.action),
12870 amount: wf.amount,
12871 approval_limit: None,
12872 comments: action.comments.clone(),
12873 delegation_from: None,
12874 is_auto_approved: false,
12875 };
12876 builder.add_approval(&record);
12877 }
12878 }
12879 }
12880
12881 let graph = builder.build();
12882 let node_count = graph.node_count();
12883 let edge_count = graph.edge_count();
12884 stats.graph_node_count += node_count;
12885 stats.graph_edge_count += edge_count;
12886
12887 for format in &self.config.graph_export.formats {
12889 if matches!(
12890 format,
12891 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12892 ) {
12893 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12894 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12895 warn!("Failed to create approval graph output dir: {}", e);
12896 continue;
12897 }
12898 let pyg_config = PyGExportConfig::default();
12899 let exporter = PyGExporter::new(pyg_config);
12900 if let Err(e) = exporter.export(&graph, &format_dir) {
12901 warn!("Failed to export approval graph as PyG: {}", e);
12902 } else {
12903 info!(
12904 "Approval network graph exported: {} nodes, {} edges",
12905 node_count, edge_count
12906 );
12907 }
12908 }
12909 }
12910 }
12911
12912 if self.config.companies.len() >= 2 {
12914 info!(
12915 "Phase 10c: Building entity relationship graph ({} companies)",
12916 self.config.companies.len()
12917 );
12918
12919 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12920 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12921
12922 let parent_code = &self.config.companies[0].code;
12924 let mut companies: Vec<datasynth_core::models::Company> =
12925 Vec::with_capacity(self.config.companies.len());
12926
12927 let first = &self.config.companies[0];
12929 companies.push(datasynth_core::models::Company::parent(
12930 &first.code,
12931 &first.name,
12932 &first.country,
12933 &first.currency,
12934 ));
12935
12936 for cc in self.config.companies.iter().skip(1) {
12938 companies.push(datasynth_core::models::Company::subsidiary(
12939 &cc.code,
12940 &cc.name,
12941 &cc.country,
12942 &cc.currency,
12943 parent_code,
12944 rust_decimal::Decimal::from(100),
12945 ));
12946 }
12947
12948 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12950 self.config
12951 .companies
12952 .iter()
12953 .skip(1)
12954 .enumerate()
12955 .map(|(i, cc)| {
12956 let mut rel =
12957 datasynth_core::models::intercompany::IntercompanyRelationship::new(
12958 format!("REL{:03}", i + 1),
12959 parent_code.clone(),
12960 cc.code.clone(),
12961 rust_decimal::Decimal::from(100),
12962 start_date,
12963 );
12964 rel.functional_currency = cc.currency.clone();
12965 rel
12966 })
12967 .collect();
12968
12969 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12970 builder.add_companies(&companies);
12971 builder.add_ownership_relationships(&relationships);
12972
12973 for pair in &intercompany.matched_pairs {
12975 builder.add_intercompany_edge(
12976 &pair.seller_company,
12977 &pair.buyer_company,
12978 pair.amount,
12979 &format!("{:?}", pair.transaction_type),
12980 );
12981 }
12982
12983 let graph = builder.build();
12984 let node_count = graph.node_count();
12985 let edge_count = graph.edge_count();
12986 stats.graph_node_count += node_count;
12987 stats.graph_edge_count += edge_count;
12988
12989 for format in &self.config.graph_export.formats {
12991 if matches!(
12992 format,
12993 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12994 ) {
12995 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12996 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12997 warn!("Failed to create entity graph output dir: {}", e);
12998 continue;
12999 }
13000 let pyg_config = PyGExportConfig::default();
13001 let exporter = PyGExporter::new(pyg_config);
13002 if let Err(e) = exporter.export(&graph, &format_dir) {
13003 warn!("Failed to export entity graph as PyG: {}", e);
13004 } else {
13005 info!(
13006 "Entity relationship graph exported: {} nodes, {} edges",
13007 node_count, edge_count
13008 );
13009 }
13010 }
13011 }
13012 } else {
13013 debug!(
13014 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
13015 self.config.companies.len()
13016 );
13017 }
13018 }
13019
13020 #[allow(clippy::too_many_arguments)]
13027 fn export_hypergraph(
13028 &self,
13029 coa: &Arc<ChartOfAccounts>,
13030 entries: &[JournalEntry],
13031 document_flows: &DocumentFlowSnapshot,
13032 sourcing: &SourcingSnapshot,
13033 hr: &HrSnapshot,
13034 manufacturing: &ManufacturingSnapshot,
13035 banking: &BankingSnapshot,
13036 audit: &AuditSnapshot,
13037 financial_reporting: &FinancialReportingSnapshot,
13038 ocpm: &OcpmSnapshot,
13039 compliance: &ComplianceRegulationsSnapshot,
13040 stats: &mut EnhancedGenerationStatistics,
13041 ) -> SynthResult<HypergraphExportInfo> {
13042 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
13043 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
13044 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
13045 use datasynth_graph::models::hypergraph::AggregationStrategy;
13046
13047 let hg_settings = &self.config.graph_export.hypergraph;
13048
13049 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
13051 "truncate" => AggregationStrategy::Truncate,
13052 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
13053 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
13054 "importance_sample" => AggregationStrategy::ImportanceSample,
13055 _ => AggregationStrategy::PoolByCounterparty,
13056 };
13057
13058 let builder_config = HypergraphConfig {
13059 max_nodes: hg_settings.max_nodes,
13060 aggregation_strategy,
13061 include_coso: hg_settings.governance_layer.include_coso,
13062 include_controls: hg_settings.governance_layer.include_controls,
13063 include_sox: hg_settings.governance_layer.include_sox,
13064 include_vendors: hg_settings.governance_layer.include_vendors,
13065 include_customers: hg_settings.governance_layer.include_customers,
13066 include_employees: hg_settings.governance_layer.include_employees,
13067 include_p2p: hg_settings.process_layer.include_p2p,
13068 include_o2c: hg_settings.process_layer.include_o2c,
13069 include_s2c: hg_settings.process_layer.include_s2c,
13070 include_h2r: hg_settings.process_layer.include_h2r,
13071 include_mfg: hg_settings.process_layer.include_mfg,
13072 include_bank: hg_settings.process_layer.include_bank,
13073 include_audit: hg_settings.process_layer.include_audit,
13074 include_r2r: hg_settings.process_layer.include_r2r,
13075 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
13076 docs_per_counterparty_threshold: hg_settings
13077 .process_layer
13078 .docs_per_counterparty_threshold,
13079 include_accounts: hg_settings.accounting_layer.include_accounts,
13080 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
13081 include_cross_layer_edges: hg_settings.cross_layer.enabled,
13082 include_compliance: self.config.compliance_regulations.enabled,
13083 include_tax: true,
13084 include_treasury: true,
13085 include_esg: true,
13086 include_project: true,
13087 include_intercompany: true,
13088 include_temporal_events: true,
13089 };
13090
13091 let mut builder = HypergraphBuilder::new(builder_config);
13092
13093 builder.add_coso_framework();
13095
13096 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
13099 let controls = InternalControl::standard_controls();
13100 builder.add_controls(&controls);
13101 }
13102
13103 builder.add_vendors(&self.master_data.vendors);
13105 builder.add_customers(&self.master_data.customers);
13106 builder.add_employees(&self.master_data.employees);
13107
13108 builder.add_p2p_documents(
13110 &document_flows.purchase_orders,
13111 &document_flows.goods_receipts,
13112 &document_flows.vendor_invoices,
13113 &document_flows.payments,
13114 );
13115 builder.add_o2c_documents(
13116 &document_flows.sales_orders,
13117 &document_flows.deliveries,
13118 &document_flows.customer_invoices,
13119 );
13120 builder.add_s2c_documents(
13121 &sourcing.sourcing_projects,
13122 &sourcing.qualifications,
13123 &sourcing.rfx_events,
13124 &sourcing.bids,
13125 &sourcing.bid_evaluations,
13126 &sourcing.contracts,
13127 );
13128 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
13129 builder.add_mfg_documents(
13130 &manufacturing.production_orders,
13131 &manufacturing.quality_inspections,
13132 &manufacturing.cycle_counts,
13133 );
13134 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
13135 builder.add_audit_documents(
13136 &audit.engagements,
13137 &audit.workpapers,
13138 &audit.findings,
13139 &audit.evidence,
13140 &audit.risk_assessments,
13141 &audit.judgments,
13142 &audit.materiality_calculations,
13143 &audit.audit_opinions,
13144 &audit.going_concern_assessments,
13145 );
13146 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
13147
13148 if let Some(ref event_log) = ocpm.event_log {
13150 builder.add_ocpm_events(event_log);
13151 }
13152
13153 if self.config.compliance_regulations.enabled
13155 && hg_settings.governance_layer.include_controls
13156 {
13157 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13159 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
13160 .standard_records
13161 .iter()
13162 .filter_map(|r| {
13163 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
13164 registry.get(&sid).cloned()
13165 })
13166 .collect();
13167
13168 builder.add_compliance_regulations(
13169 &standards,
13170 &compliance.findings,
13171 &compliance.filings,
13172 );
13173 }
13174
13175 builder.add_accounts(coa);
13177 builder.add_journal_entries_as_hyperedges(entries);
13178
13179 let hypergraph = builder.build();
13181
13182 let output_dir = self
13184 .output_path
13185 .clone()
13186 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13187 let hg_dir = output_dir
13188 .join(&self.config.graph_export.output_subdirectory)
13189 .join(&hg_settings.output_subdirectory);
13190
13191 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
13193 "unified" => {
13194 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13195 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13196 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
13197 })?;
13198 (
13199 metadata.num_nodes,
13200 metadata.num_edges,
13201 metadata.num_hyperedges,
13202 )
13203 }
13204 _ => {
13205 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
13207 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13208 SynthError::generation(format!("Hypergraph export failed: {e}"))
13209 })?;
13210 (
13211 metadata.num_nodes,
13212 metadata.num_edges,
13213 metadata.num_hyperedges,
13214 )
13215 }
13216 };
13217
13218 #[cfg(feature = "streaming")]
13220 if let Some(ref target_url) = hg_settings.stream_target {
13221 use crate::stream_client::{StreamClient, StreamConfig};
13222 use std::io::Write as _;
13223
13224 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
13225 let stream_config = StreamConfig {
13226 target_url: target_url.clone(),
13227 batch_size: hg_settings.stream_batch_size,
13228 api_key,
13229 ..StreamConfig::default()
13230 };
13231
13232 match StreamClient::new(stream_config) {
13233 Ok(mut client) => {
13234 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13235 match exporter.export_to_writer(&hypergraph, &mut client) {
13236 Ok(_) => {
13237 if let Err(e) = client.flush() {
13238 warn!("Failed to flush stream client: {}", e);
13239 } else {
13240 info!("Streamed {} records to {}", client.total_sent(), target_url);
13241 }
13242 }
13243 Err(e) => {
13244 warn!("Streaming export failed: {}", e);
13245 }
13246 }
13247 }
13248 Err(e) => {
13249 warn!("Failed to create stream client: {}", e);
13250 }
13251 }
13252 }
13253
13254 stats.graph_node_count += num_nodes;
13256 stats.graph_edge_count += num_edges;
13257 stats.graph_export_count += 1;
13258
13259 Ok(HypergraphExportInfo {
13260 node_count: num_nodes,
13261 edge_count: num_edges,
13262 hyperedge_count: num_hyperedges,
13263 output_path: hg_dir,
13264 })
13265 }
13266
13267 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
13272 let pb = self.create_progress_bar(100, "Generating Banking Data");
13273
13274 let orchestrator = BankingOrchestratorBuilder::new()
13276 .config(self.config.banking.clone())
13277 .seed(self.seed + 9000)
13278 .country_pack(self.primary_pack().clone())
13279 .build();
13280
13281 if let Some(pb) = &pb {
13282 pb.inc(10);
13283 }
13284
13285 let result = orchestrator.generate();
13287
13288 if let Some(pb) = &pb {
13289 pb.inc(90);
13290 pb.finish_with_message(format!(
13291 "Banking: {} customers, {} transactions",
13292 result.customers.len(),
13293 result.transactions.len()
13294 ));
13295 }
13296
13297 let mut banking_customers = result.customers;
13302 let core_customers = &self.master_data.customers;
13303 if !core_customers.is_empty() {
13304 for (i, bc) in banking_customers.iter_mut().enumerate() {
13305 let core = &core_customers[i % core_customers.len()];
13306 bc.name = CustomerName::business(&core.name);
13307 bc.residence_country = core.country.clone();
13308 bc.enterprise_customer_id = Some(core.customer_id.clone());
13309 }
13310 debug!(
13311 "Cross-referenced {} banking customers with {} core customers",
13312 banking_customers.len(),
13313 core_customers.len()
13314 );
13315 }
13316
13317 Ok(BankingSnapshot {
13318 customers: banking_customers,
13319 accounts: result.accounts,
13320 transactions: result.transactions,
13321 transaction_labels: result.transaction_labels,
13322 customer_labels: result.customer_labels,
13323 account_labels: result.account_labels,
13324 relationship_labels: result.relationship_labels,
13325 narratives: result.narratives,
13326 suspicious_count: result.stats.suspicious_count,
13327 scenario_count: result.scenarios.len(),
13328 })
13329 }
13330
13331 fn calculate_total_transactions(&self) -> u64 {
13333 let months = self.config.global.period_months as f64;
13334 self.config
13335 .companies
13336 .iter()
13337 .map(|c| {
13338 let annual = c.annual_transaction_volume.count() as f64;
13339 let weighted = annual * c.volume_weight;
13340 (weighted * months / 12.0) as u64
13341 })
13342 .sum()
13343 }
13344
13345 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13347 if !self.phase_config.show_progress {
13348 return None;
13349 }
13350
13351 let pb = if let Some(mp) = &self.multi_progress {
13352 mp.add(ProgressBar::new(total))
13353 } else {
13354 ProgressBar::new(total)
13355 };
13356
13357 pb.set_style(
13358 ProgressStyle::default_bar()
13359 .template(&format!(
13360 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13361 ))
13362 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13363 .progress_chars("#>-"),
13364 );
13365
13366 Some(pb)
13367 }
13368
13369 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13371 self.coa.clone()
13372 }
13373
13374 pub fn get_master_data(&self) -> &MasterDataSnapshot {
13376 &self.master_data
13377 }
13378
13379 fn phase_compliance_regulations(
13381 &mut self,
13382 _stats: &mut EnhancedGenerationStatistics,
13383 ) -> SynthResult<ComplianceRegulationsSnapshot> {
13384 if !self.phase_config.generate_compliance_regulations {
13385 return Ok(ComplianceRegulationsSnapshot::default());
13386 }
13387
13388 info!("Phase: Generating Compliance Regulations Data");
13389
13390 let cr_config = &self.config.compliance_regulations;
13391
13392 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13394 self.config
13395 .companies
13396 .iter()
13397 .map(|c| c.country.clone())
13398 .collect::<std::collections::HashSet<_>>()
13399 .into_iter()
13400 .collect()
13401 } else {
13402 cr_config.jurisdictions.clone()
13403 };
13404
13405 let fallback_date =
13407 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13408 let reference_date = cr_config
13409 .reference_date
13410 .as_ref()
13411 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13412 .unwrap_or_else(|| {
13413 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13414 .unwrap_or(fallback_date)
13415 });
13416
13417 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13419 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13420 let cross_reference_records = reg_gen.generate_cross_reference_records();
13421 let jurisdiction_records =
13422 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13423
13424 info!(
13425 " Standards: {} records, {} cross-references, {} jurisdictions",
13426 standard_records.len(),
13427 cross_reference_records.len(),
13428 jurisdiction_records.len()
13429 );
13430
13431 let audit_procedures = if cr_config.audit_procedures.enabled {
13433 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13434 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13435 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13436 confidence_level: cr_config.audit_procedures.confidence_level,
13437 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13438 };
13439 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13440 self.seed + 9000,
13441 proc_config,
13442 );
13443 let registry = reg_gen.registry();
13444 let mut all_procs = Vec::new();
13445 for jurisdiction in &jurisdictions {
13446 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13447 all_procs.extend(procs);
13448 }
13449 info!(" Audit procedures: {}", all_procs.len());
13450 all_procs
13451 } else {
13452 Vec::new()
13453 };
13454
13455 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13457 let finding_config =
13458 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13459 finding_rate: cr_config.findings.finding_rate,
13460 material_weakness_rate: cr_config.findings.material_weakness_rate,
13461 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13462 generate_remediation: cr_config.findings.generate_remediation,
13463 };
13464 let mut finding_gen =
13465 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13466 self.seed + 9100,
13467 finding_config,
13468 );
13469 let mut all_findings = Vec::new();
13470 for company in &self.config.companies {
13471 let company_findings =
13472 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13473 all_findings.extend(company_findings);
13474 }
13475 info!(" Compliance findings: {}", all_findings.len());
13476 all_findings
13477 } else {
13478 Vec::new()
13479 };
13480
13481 let filings = if cr_config.filings.enabled {
13483 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13484 filing_types: cr_config.filings.filing_types.clone(),
13485 generate_status_progression: cr_config.filings.generate_status_progression,
13486 };
13487 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13488 self.seed + 9200,
13489 filing_config,
13490 );
13491 let company_codes: Vec<String> = self
13492 .config
13493 .companies
13494 .iter()
13495 .map(|c| c.code.clone())
13496 .collect();
13497 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13498 .unwrap_or(fallback_date);
13499 let filings = filing_gen.generate_filings(
13500 &company_codes,
13501 &jurisdictions,
13502 start_date,
13503 self.config.global.period_months,
13504 );
13505 info!(" Regulatory filings: {}", filings.len());
13506 filings
13507 } else {
13508 Vec::new()
13509 };
13510
13511 let compliance_graph = if cr_config.graph.enabled {
13513 let graph_config = datasynth_graph::ComplianceGraphConfig {
13514 include_standard_nodes: cr_config.graph.include_compliance_nodes,
13515 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13516 include_cross_references: cr_config.graph.include_cross_references,
13517 include_supersession_edges: cr_config.graph.include_supersession_edges,
13518 include_account_links: cr_config.graph.include_account_links,
13519 include_control_links: cr_config.graph.include_control_links,
13520 include_company_links: cr_config.graph.include_company_links,
13521 };
13522 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13523
13524 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13526 .iter()
13527 .map(|r| datasynth_graph::StandardNodeInput {
13528 standard_id: r.standard_id.clone(),
13529 title: r.title.clone(),
13530 category: r.category.clone(),
13531 domain: r.domain.clone(),
13532 is_active: r.is_active,
13533 features: vec![if r.is_active { 1.0 } else { 0.0 }],
13534 applicable_account_types: r.applicable_account_types.clone(),
13535 applicable_processes: r.applicable_processes.clone(),
13536 })
13537 .collect();
13538 builder.add_standards(&standard_inputs);
13539
13540 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13542 jurisdiction_records
13543 .iter()
13544 .map(|r| datasynth_graph::JurisdictionNodeInput {
13545 country_code: r.country_code.clone(),
13546 country_name: r.country_name.clone(),
13547 framework: r.accounting_framework.clone(),
13548 standard_count: r.standard_count,
13549 tax_rate: r.statutory_tax_rate,
13550 })
13551 .collect();
13552 builder.add_jurisdictions(&jurisdiction_inputs);
13553
13554 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13556 cross_reference_records
13557 .iter()
13558 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13559 from_standard: r.from_standard.clone(),
13560 to_standard: r.to_standard.clone(),
13561 relationship: r.relationship.clone(),
13562 convergence_level: r.convergence_level,
13563 })
13564 .collect();
13565 builder.add_cross_references(&xref_inputs);
13566
13567 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13569 .iter()
13570 .map(|r| datasynth_graph::JurisdictionMappingInput {
13571 country_code: r.jurisdiction.clone(),
13572 standard_id: r.standard_id.clone(),
13573 })
13574 .collect();
13575 builder.add_jurisdiction_mappings(&mapping_inputs);
13576
13577 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13579 .iter()
13580 .map(|p| datasynth_graph::ProcedureNodeInput {
13581 procedure_id: p.procedure_id.clone(),
13582 standard_id: p.standard_id.clone(),
13583 procedure_type: p.procedure_type.clone(),
13584 sample_size: p.sample_size,
13585 confidence_level: p.confidence_level,
13586 })
13587 .collect();
13588 builder.add_procedures(&proc_inputs);
13589
13590 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13592 .iter()
13593 .map(|f| datasynth_graph::FindingNodeInput {
13594 finding_id: f.finding_id.to_string(),
13595 standard_id: f
13596 .related_standards
13597 .first()
13598 .map(|s| s.as_str().to_string())
13599 .unwrap_or_default(),
13600 severity: f.severity.to_string(),
13601 deficiency_level: f.deficiency_level.to_string(),
13602 severity_score: f.deficiency_level.severity_score(),
13603 control_id: f.control_id.clone(),
13604 affected_accounts: f.affected_accounts.clone(),
13605 })
13606 .collect();
13607 builder.add_findings(&finding_inputs);
13608
13609 if cr_config.graph.include_account_links {
13611 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13612 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13613 for std_record in &standard_records {
13614 if let Some(std_obj) =
13615 registry.get(&datasynth_core::models::compliance::StandardId::parse(
13616 &std_record.standard_id,
13617 ))
13618 {
13619 for acct_type in &std_obj.applicable_account_types {
13620 account_links.push(datasynth_graph::AccountLinkInput {
13621 standard_id: std_record.standard_id.clone(),
13622 account_code: acct_type.clone(),
13623 account_name: acct_type.clone(),
13624 });
13625 }
13626 }
13627 }
13628 builder.add_account_links(&account_links);
13629 }
13630
13631 if cr_config.graph.include_control_links {
13633 let mut control_links = Vec::new();
13634 let sox_like_ids: Vec<String> = standard_records
13636 .iter()
13637 .filter(|r| {
13638 r.standard_id.starts_with("SOX")
13639 || r.standard_id.starts_with("PCAOB-AS-2201")
13640 })
13641 .map(|r| r.standard_id.clone())
13642 .collect();
13643 let control_ids = [
13645 ("C001", "Cash Controls"),
13646 ("C002", "Large Transaction Approval"),
13647 ("C010", "PO Approval"),
13648 ("C011", "Three-Way Match"),
13649 ("C020", "Revenue Recognition"),
13650 ("C021", "Credit Check"),
13651 ("C030", "Manual JE Approval"),
13652 ("C031", "Period Close Review"),
13653 ("C032", "Account Reconciliation"),
13654 ("C040", "Payroll Processing"),
13655 ("C050", "Fixed Asset Capitalization"),
13656 ("C060", "Intercompany Elimination"),
13657 ];
13658 for sox_id in &sox_like_ids {
13659 for (ctrl_id, ctrl_name) in &control_ids {
13660 control_links.push(datasynth_graph::ControlLinkInput {
13661 standard_id: sox_id.clone(),
13662 control_id: ctrl_id.to_string(),
13663 control_name: ctrl_name.to_string(),
13664 });
13665 }
13666 }
13667 builder.add_control_links(&control_links);
13668 }
13669
13670 if cr_config.graph.include_company_links {
13672 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13673 .iter()
13674 .enumerate()
13675 .map(|(i, f)| datasynth_graph::FilingNodeInput {
13676 filing_id: format!("F{:04}", i + 1),
13677 filing_type: f.filing_type.to_string(),
13678 company_code: f.company_code.clone(),
13679 jurisdiction: f.jurisdiction.clone(),
13680 status: format!("{:?}", f.status),
13681 })
13682 .collect();
13683 builder.add_filings(&filing_inputs);
13684 }
13685
13686 let graph = builder.build();
13687 info!(
13688 " Compliance graph: {} nodes, {} edges",
13689 graph.nodes.len(),
13690 graph.edges.len()
13691 );
13692 Some(graph)
13693 } else {
13694 None
13695 };
13696
13697 self.check_resources_with_log("post-compliance-regulations")?;
13698
13699 Ok(ComplianceRegulationsSnapshot {
13700 standard_records,
13701 cross_reference_records,
13702 jurisdiction_records,
13703 audit_procedures,
13704 findings,
13705 filings,
13706 compliance_graph,
13707 })
13708 }
13709
13710 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13712 use super::lineage::LineageGraphBuilder;
13713
13714 let mut builder = LineageGraphBuilder::new();
13715
13716 builder.add_config_section("config:global", "Global Config");
13718 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13719 builder.add_config_section("config:transactions", "Transaction Config");
13720
13721 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13723 builder.add_generator_phase("phase:je", "Journal Entry Generation");
13724
13725 builder.configured_by("phase:coa", "config:chart_of_accounts");
13727 builder.configured_by("phase:je", "config:transactions");
13728
13729 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13731 builder.produced_by("output:je", "phase:je");
13732
13733 if self.phase_config.generate_master_data {
13735 builder.add_config_section("config:master_data", "Master Data Config");
13736 builder.add_generator_phase("phase:master_data", "Master Data Generation");
13737 builder.configured_by("phase:master_data", "config:master_data");
13738 builder.input_to("phase:master_data", "phase:je");
13739 }
13740
13741 if self.phase_config.generate_document_flows {
13742 builder.add_config_section("config:document_flows", "Document Flow Config");
13743 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13744 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13745 builder.configured_by("phase:p2p", "config:document_flows");
13746 builder.configured_by("phase:o2c", "config:document_flows");
13747
13748 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13749 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13750 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13751 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13752 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13753
13754 builder.produced_by("output:po", "phase:p2p");
13755 builder.produced_by("output:gr", "phase:p2p");
13756 builder.produced_by("output:vi", "phase:p2p");
13757 builder.produced_by("output:so", "phase:o2c");
13758 builder.produced_by("output:ci", "phase:o2c");
13759 }
13760
13761 if self.phase_config.inject_anomalies {
13762 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13763 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13764 builder.configured_by("phase:anomaly", "config:fraud");
13765 builder.add_output_file(
13766 "output:labels",
13767 "Anomaly Labels",
13768 "labels/anomaly_labels.csv",
13769 );
13770 builder.produced_by("output:labels", "phase:anomaly");
13771 }
13772
13773 if self.phase_config.generate_audit {
13774 builder.add_config_section("config:audit", "Audit Config");
13775 builder.add_generator_phase("phase:audit", "Audit Data Generation");
13776 builder.configured_by("phase:audit", "config:audit");
13777 }
13778
13779 if self.phase_config.generate_banking {
13780 builder.add_config_section("config:banking", "Banking Config");
13781 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13782 builder.configured_by("phase:banking", "config:banking");
13783 }
13784
13785 if self.config.llm.enabled {
13786 builder.add_config_section("config:llm", "LLM Enrichment Config");
13787 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13788 builder.configured_by("phase:llm_enrichment", "config:llm");
13789 }
13790
13791 if self.config.diffusion.enabled {
13792 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13793 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13794 builder.configured_by("phase:diffusion", "config:diffusion");
13795 }
13796
13797 if self.config.causal.enabled {
13798 builder.add_config_section("config:causal", "Causal Generation Config");
13799 builder.add_generator_phase("phase:causal", "Causal Overlay");
13800 builder.configured_by("phase:causal", "config:causal");
13801 }
13802
13803 builder.build()
13804 }
13805
13806 fn compute_company_revenue(
13815 entries: &[JournalEntry],
13816 company_code: &str,
13817 ) -> rust_decimal::Decimal {
13818 use rust_decimal::Decimal;
13819 let mut revenue = Decimal::ZERO;
13820 for je in entries {
13821 if je.header.company_code != company_code {
13822 continue;
13823 }
13824 for line in &je.lines {
13825 if line.gl_account.starts_with('4') {
13826 revenue += line.credit_amount - line.debit_amount;
13828 }
13829 }
13830 }
13831 revenue.max(Decimal::ZERO)
13832 }
13833
13834 fn compute_entity_net_assets(
13838 entries: &[JournalEntry],
13839 entity_code: &str,
13840 ) -> rust_decimal::Decimal {
13841 use rust_decimal::Decimal;
13842 let mut asset_net = Decimal::ZERO;
13843 let mut liability_net = Decimal::ZERO;
13844 for je in entries {
13845 if je.header.company_code != entity_code {
13846 continue;
13847 }
13848 for line in &je.lines {
13849 if line.gl_account.starts_with('1') {
13850 asset_net += line.debit_amount - line.credit_amount;
13851 } else if line.gl_account.starts_with('2') {
13852 liability_net += line.credit_amount - line.debit_amount;
13853 }
13854 }
13855 }
13856 asset_net - liability_net
13857 }
13858}
13859
13860fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13862 match format {
13863 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13864 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13865 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13866 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13867 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13868 }
13869}
13870
13871fn compute_trial_balance_entries(
13876 entries: &[JournalEntry],
13877 entity_code: &str,
13878 fiscal_year: i32,
13879 coa: Option<&ChartOfAccounts>,
13880) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13881 use std::collections::BTreeMap;
13882
13883 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13884 BTreeMap::new();
13885
13886 for je in entries {
13887 for line in &je.lines {
13888 let entry = balances.entry(line.account_code.clone()).or_default();
13889 entry.0 += line.debit_amount;
13890 entry.1 += line.credit_amount;
13891 }
13892 }
13893
13894 balances
13895 .into_iter()
13896 .map(
13897 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13898 account_description: coa
13899 .and_then(|c| c.get_account(&account_code))
13900 .map(|a| a.description().to_string())
13901 .unwrap_or_else(|| account_code.clone()),
13902 account_code,
13903 debit_balance: debit,
13904 credit_balance: credit,
13905 net_balance: debit - credit,
13906 entity_code: entity_code.to_string(),
13907 period: format!("FY{}", fiscal_year),
13908 },
13909 )
13910 .collect()
13911}
13912
13913#[cfg(test)]
13914#[allow(clippy::unwrap_used)]
13915mod tests {
13916 use super::*;
13917 use datasynth_config::schema::*;
13918
13919 fn create_test_config() -> GeneratorConfig {
13920 GeneratorConfig {
13921 global: GlobalConfig {
13922 industry: IndustrySector::Manufacturing,
13923 start_date: "2024-01-01".to_string(),
13924 period_months: 1,
13925 seed: Some(42),
13926 parallel: false,
13927 group_currency: "USD".to_string(),
13928 presentation_currency: None,
13929 worker_threads: 0,
13930 memory_limit_mb: 0,
13931 fiscal_year_months: None,
13932 },
13933 companies: vec![CompanyConfig {
13934 code: "1000".to_string(),
13935 name: "Test Company".to_string(),
13936 currency: "USD".to_string(),
13937 functional_currency: None,
13938 country: "US".to_string(),
13939 annual_transaction_volume: TransactionVolume::TenK,
13940 volume_weight: 1.0,
13941 fiscal_year_variant: "K4".to_string(),
13942 }],
13943 chart_of_accounts: ChartOfAccountsConfig {
13944 complexity: CoAComplexity::Small,
13945 industry_specific: true,
13946 custom_accounts: None,
13947 min_hierarchy_depth: 2,
13948 max_hierarchy_depth: 4,
13949 },
13950 transactions: TransactionConfig::default(),
13951 output: OutputConfig::default(),
13952 fraud: FraudConfig::default(),
13953 internal_controls: InternalControlsConfig::default(),
13954 business_processes: BusinessProcessConfig::default(),
13955 user_personas: UserPersonaConfig::default(),
13956 templates: TemplateConfig::default(),
13957 approval: ApprovalConfig::default(),
13958 departments: DepartmentConfig::default(),
13959 master_data: MasterDataConfig::default(),
13960 document_flows: DocumentFlowConfig::default(),
13961 intercompany: IntercompanyConfig::default(),
13962 balance: BalanceConfig::default(),
13963 ocpm: OcpmConfig::default(),
13964 audit: AuditGenerationConfig::default(),
13965 banking: datasynth_banking::BankingConfig::default(),
13966 data_quality: DataQualitySchemaConfig::default(),
13967 scenario: ScenarioConfig::default(),
13968 temporal: TemporalDriftConfig::default(),
13969 graph_export: GraphExportConfig::default(),
13970 streaming: StreamingSchemaConfig::default(),
13971 rate_limit: RateLimitSchemaConfig::default(),
13972 temporal_attributes: TemporalAttributeSchemaConfig::default(),
13973 relationships: RelationshipSchemaConfig::default(),
13974 accounting_standards: AccountingStandardsConfig::default(),
13975 audit_standards: AuditStandardsConfig::default(),
13976 distributions: Default::default(),
13977 temporal_patterns: Default::default(),
13978 vendor_network: VendorNetworkSchemaConfig::default(),
13979 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13980 relationship_strength: RelationshipStrengthSchemaConfig::default(),
13981 cross_process_links: CrossProcessLinksSchemaConfig::default(),
13982 organizational_events: OrganizationalEventsSchemaConfig::default(),
13983 behavioral_drift: BehavioralDriftSchemaConfig::default(),
13984 market_drift: MarketDriftSchemaConfig::default(),
13985 drift_labeling: DriftLabelingSchemaConfig::default(),
13986 anomaly_injection: Default::default(),
13987 industry_specific: Default::default(),
13988 fingerprint_privacy: Default::default(),
13989 quality_gates: Default::default(),
13990 compliance: Default::default(),
13991 webhooks: Default::default(),
13992 llm: Default::default(),
13993 diffusion: Default::default(),
13994 causal: Default::default(),
13995 source_to_pay: Default::default(),
13996 financial_reporting: Default::default(),
13997 hr: Default::default(),
13998 manufacturing: Default::default(),
13999 sales_quotes: Default::default(),
14000 tax: Default::default(),
14001 treasury: Default::default(),
14002 project_accounting: Default::default(),
14003 esg: Default::default(),
14004 country_packs: None,
14005 scenarios: Default::default(),
14006 session: Default::default(),
14007 compliance_regulations: Default::default(),
14008 }
14009 }
14010
14011 #[test]
14012 fn test_enhanced_orchestrator_creation() {
14013 let config = create_test_config();
14014 let orchestrator = EnhancedOrchestrator::with_defaults(config);
14015 assert!(orchestrator.is_ok());
14016 }
14017
14018 #[test]
14019 fn test_minimal_generation() {
14020 let config = create_test_config();
14021 let phase_config = PhaseConfig {
14022 generate_master_data: false,
14023 generate_document_flows: false,
14024 generate_journal_entries: true,
14025 inject_anomalies: false,
14026 show_progress: false,
14027 ..Default::default()
14028 };
14029
14030 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14031 let result = orchestrator.generate();
14032
14033 assert!(result.is_ok());
14034 let result = result.unwrap();
14035 assert!(!result.journal_entries.is_empty());
14036 }
14037
14038 #[test]
14039 fn test_master_data_generation() {
14040 let config = create_test_config();
14041 let phase_config = PhaseConfig {
14042 generate_master_data: true,
14043 generate_document_flows: false,
14044 generate_journal_entries: false,
14045 inject_anomalies: false,
14046 show_progress: false,
14047 vendors_per_company: 5,
14048 customers_per_company: 5,
14049 materials_per_company: 10,
14050 assets_per_company: 5,
14051 employees_per_company: 10,
14052 ..Default::default()
14053 };
14054
14055 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14056 let result = orchestrator.generate().unwrap();
14057
14058 assert!(!result.master_data.vendors.is_empty());
14059 assert!(!result.master_data.customers.is_empty());
14060 assert!(!result.master_data.materials.is_empty());
14061 }
14062
14063 #[test]
14064 fn test_document_flow_generation() {
14065 let config = create_test_config();
14066 let phase_config = PhaseConfig {
14067 generate_master_data: true,
14068 generate_document_flows: true,
14069 generate_journal_entries: false,
14070 inject_anomalies: false,
14071 inject_data_quality: false,
14072 validate_balances: false,
14073 generate_ocpm_events: false,
14074 show_progress: false,
14075 vendors_per_company: 5,
14076 customers_per_company: 5,
14077 materials_per_company: 10,
14078 assets_per_company: 5,
14079 employees_per_company: 10,
14080 p2p_chains: 5,
14081 o2c_chains: 5,
14082 ..Default::default()
14083 };
14084
14085 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14086 let result = orchestrator.generate().unwrap();
14087
14088 assert!(!result.document_flows.p2p_chains.is_empty());
14090 assert!(!result.document_flows.o2c_chains.is_empty());
14091
14092 assert!(!result.document_flows.purchase_orders.is_empty());
14094 assert!(!result.document_flows.sales_orders.is_empty());
14095 }
14096
14097 #[test]
14098 fn test_anomaly_injection() {
14099 let config = create_test_config();
14100 let phase_config = PhaseConfig {
14101 generate_master_data: false,
14102 generate_document_flows: false,
14103 generate_journal_entries: true,
14104 inject_anomalies: true,
14105 show_progress: false,
14106 ..Default::default()
14107 };
14108
14109 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14110 let result = orchestrator.generate().unwrap();
14111
14112 assert!(!result.journal_entries.is_empty());
14114
14115 assert!(result.anomaly_labels.summary.is_some());
14118 }
14119
14120 #[test]
14121 fn test_full_generation_pipeline() {
14122 let config = create_test_config();
14123 let phase_config = PhaseConfig {
14124 generate_master_data: true,
14125 generate_document_flows: true,
14126 generate_journal_entries: true,
14127 inject_anomalies: false,
14128 inject_data_quality: false,
14129 validate_balances: true,
14130 generate_ocpm_events: false,
14131 show_progress: false,
14132 vendors_per_company: 3,
14133 customers_per_company: 3,
14134 materials_per_company: 5,
14135 assets_per_company: 3,
14136 employees_per_company: 5,
14137 p2p_chains: 3,
14138 o2c_chains: 3,
14139 ..Default::default()
14140 };
14141
14142 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14143 let result = orchestrator.generate().unwrap();
14144
14145 assert!(!result.master_data.vendors.is_empty());
14147 assert!(!result.master_data.customers.is_empty());
14148 assert!(!result.document_flows.p2p_chains.is_empty());
14149 assert!(!result.document_flows.o2c_chains.is_empty());
14150 assert!(!result.journal_entries.is_empty());
14151 assert!(result.statistics.accounts_count > 0);
14152
14153 assert!(!result.subledger.ap_invoices.is_empty());
14155 assert!(!result.subledger.ar_invoices.is_empty());
14156
14157 assert!(result.balance_validation.validated);
14159 assert!(result.balance_validation.entries_processed > 0);
14160 }
14161
14162 #[test]
14163 fn test_subledger_linking() {
14164 let config = create_test_config();
14165 let phase_config = PhaseConfig {
14166 generate_master_data: true,
14167 generate_document_flows: true,
14168 generate_journal_entries: false,
14169 inject_anomalies: false,
14170 inject_data_quality: false,
14171 validate_balances: false,
14172 generate_ocpm_events: false,
14173 show_progress: false,
14174 vendors_per_company: 5,
14175 customers_per_company: 5,
14176 materials_per_company: 10,
14177 assets_per_company: 3,
14178 employees_per_company: 5,
14179 p2p_chains: 5,
14180 o2c_chains: 5,
14181 ..Default::default()
14182 };
14183
14184 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14185 let result = orchestrator.generate().unwrap();
14186
14187 assert!(!result.document_flows.vendor_invoices.is_empty());
14189 assert!(!result.document_flows.customer_invoices.is_empty());
14190
14191 assert!(!result.subledger.ap_invoices.is_empty());
14193 assert!(!result.subledger.ar_invoices.is_empty());
14194
14195 assert_eq!(
14197 result.subledger.ap_invoices.len(),
14198 result.document_flows.vendor_invoices.len()
14199 );
14200
14201 assert_eq!(
14203 result.subledger.ar_invoices.len(),
14204 result.document_flows.customer_invoices.len()
14205 );
14206
14207 assert_eq!(
14209 result.statistics.ap_invoice_count,
14210 result.subledger.ap_invoices.len()
14211 );
14212 assert_eq!(
14213 result.statistics.ar_invoice_count,
14214 result.subledger.ar_invoices.len()
14215 );
14216 }
14217
14218 #[test]
14219 fn test_balance_validation() {
14220 let config = create_test_config();
14221 let phase_config = PhaseConfig {
14222 generate_master_data: false,
14223 generate_document_flows: false,
14224 generate_journal_entries: true,
14225 inject_anomalies: false,
14226 validate_balances: true,
14227 show_progress: false,
14228 ..Default::default()
14229 };
14230
14231 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14232 let result = orchestrator.generate().unwrap();
14233
14234 assert!(result.balance_validation.validated);
14236 assert!(result.balance_validation.entries_processed > 0);
14237
14238 assert!(!result.balance_validation.has_unbalanced_entries);
14240
14241 assert_eq!(
14243 result.balance_validation.total_debits,
14244 result.balance_validation.total_credits
14245 );
14246 }
14247
14248 #[test]
14249 fn test_statistics_accuracy() {
14250 let config = create_test_config();
14251 let phase_config = PhaseConfig {
14252 generate_master_data: true,
14253 generate_document_flows: false,
14254 generate_journal_entries: true,
14255 inject_anomalies: false,
14256 show_progress: false,
14257 vendors_per_company: 10,
14258 customers_per_company: 20,
14259 materials_per_company: 15,
14260 assets_per_company: 5,
14261 employees_per_company: 8,
14262 ..Default::default()
14263 };
14264
14265 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14266 let result = orchestrator.generate().unwrap();
14267
14268 assert_eq!(
14270 result.statistics.vendor_count,
14271 result.master_data.vendors.len()
14272 );
14273 assert_eq!(
14274 result.statistics.customer_count,
14275 result.master_data.customers.len()
14276 );
14277 assert_eq!(
14278 result.statistics.material_count,
14279 result.master_data.materials.len()
14280 );
14281 assert_eq!(
14282 result.statistics.total_entries as usize,
14283 result.journal_entries.len()
14284 );
14285 }
14286
14287 #[test]
14288 fn test_phase_config_defaults() {
14289 let config = PhaseConfig::default();
14290 assert!(config.generate_master_data);
14291 assert!(config.generate_document_flows);
14292 assert!(config.generate_journal_entries);
14293 assert!(!config.inject_anomalies);
14294 assert!(config.validate_balances);
14295 assert!(config.show_progress);
14296 assert!(config.vendors_per_company > 0);
14297 assert!(config.customers_per_company > 0);
14298 }
14299
14300 #[test]
14301 fn test_get_coa_before_generation() {
14302 let config = create_test_config();
14303 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
14304
14305 assert!(orchestrator.get_coa().is_none());
14307 }
14308
14309 #[test]
14310 fn test_get_coa_after_generation() {
14311 let config = create_test_config();
14312 let phase_config = PhaseConfig {
14313 generate_master_data: false,
14314 generate_document_flows: false,
14315 generate_journal_entries: true,
14316 inject_anomalies: false,
14317 show_progress: false,
14318 ..Default::default()
14319 };
14320
14321 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14322 let _ = orchestrator.generate().unwrap();
14323
14324 assert!(orchestrator.get_coa().is_some());
14326 }
14327
14328 #[test]
14329 fn test_get_master_data() {
14330 let config = create_test_config();
14331 let phase_config = PhaseConfig {
14332 generate_master_data: true,
14333 generate_document_flows: false,
14334 generate_journal_entries: false,
14335 inject_anomalies: false,
14336 show_progress: false,
14337 vendors_per_company: 5,
14338 customers_per_company: 5,
14339 materials_per_company: 5,
14340 assets_per_company: 5,
14341 employees_per_company: 5,
14342 ..Default::default()
14343 };
14344
14345 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14346 let result = orchestrator.generate().unwrap();
14347
14348 assert!(!result.master_data.vendors.is_empty());
14350 }
14351
14352 #[test]
14353 fn test_with_progress_builder() {
14354 let config = create_test_config();
14355 let orchestrator = EnhancedOrchestrator::with_defaults(config)
14356 .unwrap()
14357 .with_progress(false);
14358
14359 assert!(!orchestrator.phase_config.show_progress);
14361 }
14362
14363 #[test]
14364 fn test_multi_company_generation() {
14365 let mut config = create_test_config();
14366 config.companies.push(CompanyConfig {
14367 code: "2000".to_string(),
14368 name: "Subsidiary".to_string(),
14369 currency: "EUR".to_string(),
14370 functional_currency: None,
14371 country: "DE".to_string(),
14372 annual_transaction_volume: TransactionVolume::TenK,
14373 volume_weight: 0.5,
14374 fiscal_year_variant: "K4".to_string(),
14375 });
14376
14377 let phase_config = PhaseConfig {
14378 generate_master_data: true,
14379 generate_document_flows: false,
14380 generate_journal_entries: true,
14381 inject_anomalies: false,
14382 show_progress: false,
14383 vendors_per_company: 5,
14384 customers_per_company: 5,
14385 materials_per_company: 5,
14386 assets_per_company: 5,
14387 employees_per_company: 5,
14388 ..Default::default()
14389 };
14390
14391 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14392 let result = orchestrator.generate().unwrap();
14393
14394 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
14397 assert!(result.statistics.companies_count == 2);
14398 }
14399
14400 #[test]
14401 fn test_empty_master_data_skips_document_flows() {
14402 let config = create_test_config();
14403 let phase_config = PhaseConfig {
14404 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
14407 inject_anomalies: false,
14408 show_progress: false,
14409 ..Default::default()
14410 };
14411
14412 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14413 let result = orchestrator.generate().unwrap();
14414
14415 assert!(result.document_flows.p2p_chains.is_empty());
14417 assert!(result.document_flows.o2c_chains.is_empty());
14418 }
14419
14420 #[test]
14421 fn test_journal_entry_line_item_count() {
14422 let config = create_test_config();
14423 let phase_config = PhaseConfig {
14424 generate_master_data: false,
14425 generate_document_flows: false,
14426 generate_journal_entries: true,
14427 inject_anomalies: false,
14428 show_progress: false,
14429 ..Default::default()
14430 };
14431
14432 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14433 let result = orchestrator.generate().unwrap();
14434
14435 let calculated_line_items: u64 = result
14437 .journal_entries
14438 .iter()
14439 .map(|e| e.line_count() as u64)
14440 .sum();
14441 assert_eq!(result.statistics.total_line_items, calculated_line_items);
14442 }
14443
14444 #[test]
14445 fn test_audit_generation() {
14446 let config = create_test_config();
14447 let phase_config = PhaseConfig {
14448 generate_master_data: false,
14449 generate_document_flows: false,
14450 generate_journal_entries: true,
14451 inject_anomalies: false,
14452 show_progress: false,
14453 generate_audit: true,
14454 audit_engagements: 2,
14455 workpapers_per_engagement: 5,
14456 evidence_per_workpaper: 2,
14457 risks_per_engagement: 3,
14458 findings_per_engagement: 2,
14459 judgments_per_engagement: 2,
14460 ..Default::default()
14461 };
14462
14463 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14464 let result = orchestrator.generate().unwrap();
14465
14466 assert_eq!(result.audit.engagements.len(), 2);
14468 assert!(!result.audit.workpapers.is_empty());
14469 assert!(!result.audit.evidence.is_empty());
14470 assert!(!result.audit.risk_assessments.is_empty());
14471 assert!(!result.audit.findings.is_empty());
14472 assert!(!result.audit.judgments.is_empty());
14473
14474 assert!(
14476 !result.audit.confirmations.is_empty(),
14477 "ISA 505 confirmations should be generated"
14478 );
14479 assert!(
14480 !result.audit.confirmation_responses.is_empty(),
14481 "ISA 505 confirmation responses should be generated"
14482 );
14483 assert!(
14484 !result.audit.procedure_steps.is_empty(),
14485 "ISA 330 procedure steps should be generated"
14486 );
14487 assert!(
14489 !result.audit.analytical_results.is_empty(),
14490 "ISA 520 analytical procedures should be generated"
14491 );
14492 assert!(
14493 !result.audit.ia_functions.is_empty(),
14494 "ISA 610 IA functions should be generated (one per engagement)"
14495 );
14496 assert!(
14497 !result.audit.related_parties.is_empty(),
14498 "ISA 550 related parties should be generated"
14499 );
14500
14501 assert_eq!(
14503 result.statistics.audit_engagement_count,
14504 result.audit.engagements.len()
14505 );
14506 assert_eq!(
14507 result.statistics.audit_workpaper_count,
14508 result.audit.workpapers.len()
14509 );
14510 assert_eq!(
14511 result.statistics.audit_evidence_count,
14512 result.audit.evidence.len()
14513 );
14514 assert_eq!(
14515 result.statistics.audit_risk_count,
14516 result.audit.risk_assessments.len()
14517 );
14518 assert_eq!(
14519 result.statistics.audit_finding_count,
14520 result.audit.findings.len()
14521 );
14522 assert_eq!(
14523 result.statistics.audit_judgment_count,
14524 result.audit.judgments.len()
14525 );
14526 assert_eq!(
14527 result.statistics.audit_confirmation_count,
14528 result.audit.confirmations.len()
14529 );
14530 assert_eq!(
14531 result.statistics.audit_confirmation_response_count,
14532 result.audit.confirmation_responses.len()
14533 );
14534 assert_eq!(
14535 result.statistics.audit_procedure_step_count,
14536 result.audit.procedure_steps.len()
14537 );
14538 assert_eq!(
14539 result.statistics.audit_sample_count,
14540 result.audit.samples.len()
14541 );
14542 assert_eq!(
14543 result.statistics.audit_analytical_result_count,
14544 result.audit.analytical_results.len()
14545 );
14546 assert_eq!(
14547 result.statistics.audit_ia_function_count,
14548 result.audit.ia_functions.len()
14549 );
14550 assert_eq!(
14551 result.statistics.audit_ia_report_count,
14552 result.audit.ia_reports.len()
14553 );
14554 assert_eq!(
14555 result.statistics.audit_related_party_count,
14556 result.audit.related_parties.len()
14557 );
14558 assert_eq!(
14559 result.statistics.audit_related_party_transaction_count,
14560 result.audit.related_party_transactions.len()
14561 );
14562 }
14563
14564 #[test]
14565 fn test_new_phases_disabled_by_default() {
14566 let config = create_test_config();
14567 assert!(!config.llm.enabled);
14569 assert!(!config.diffusion.enabled);
14570 assert!(!config.causal.enabled);
14571
14572 let phase_config = PhaseConfig {
14573 generate_master_data: false,
14574 generate_document_flows: false,
14575 generate_journal_entries: true,
14576 inject_anomalies: false,
14577 show_progress: false,
14578 ..Default::default()
14579 };
14580
14581 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14582 let result = orchestrator.generate().unwrap();
14583
14584 assert_eq!(result.statistics.llm_enrichment_ms, 0);
14586 assert_eq!(result.statistics.llm_vendors_enriched, 0);
14587 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14588 assert_eq!(result.statistics.diffusion_samples_generated, 0);
14589 assert_eq!(result.statistics.causal_generation_ms, 0);
14590 assert_eq!(result.statistics.causal_samples_generated, 0);
14591 assert!(result.statistics.causal_validation_passed.is_none());
14592 assert_eq!(result.statistics.counterfactual_pair_count, 0);
14593 assert!(result.counterfactual_pairs.is_empty());
14594 }
14595
14596 #[test]
14597 fn test_counterfactual_generation_enabled() {
14598 let config = create_test_config();
14599 let phase_config = PhaseConfig {
14600 generate_master_data: false,
14601 generate_document_flows: false,
14602 generate_journal_entries: true,
14603 inject_anomalies: false,
14604 show_progress: false,
14605 generate_counterfactuals: true,
14606 generate_period_close: false, ..Default::default()
14608 };
14609
14610 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14611 let result = orchestrator.generate().unwrap();
14612
14613 if !result.journal_entries.is_empty() {
14615 assert_eq!(
14616 result.counterfactual_pairs.len(),
14617 result.journal_entries.len()
14618 );
14619 assert_eq!(
14620 result.statistics.counterfactual_pair_count,
14621 result.journal_entries.len()
14622 );
14623 let ids: std::collections::HashSet<_> = result
14625 .counterfactual_pairs
14626 .iter()
14627 .map(|p| p.pair_id.clone())
14628 .collect();
14629 assert_eq!(ids.len(), result.counterfactual_pairs.len());
14630 }
14631 }
14632
14633 #[test]
14634 fn test_llm_enrichment_enabled() {
14635 let mut config = create_test_config();
14636 config.llm.enabled = true;
14637 config.llm.max_vendor_enrichments = 3;
14638
14639 let phase_config = PhaseConfig {
14640 generate_master_data: true,
14641 generate_document_flows: false,
14642 generate_journal_entries: false,
14643 inject_anomalies: false,
14644 show_progress: false,
14645 vendors_per_company: 5,
14646 customers_per_company: 3,
14647 materials_per_company: 3,
14648 assets_per_company: 3,
14649 employees_per_company: 3,
14650 ..Default::default()
14651 };
14652
14653 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14654 let result = orchestrator.generate().unwrap();
14655
14656 assert!(result.statistics.llm_vendors_enriched > 0);
14658 assert!(result.statistics.llm_vendors_enriched <= 3);
14659 }
14660
14661 #[test]
14662 fn test_diffusion_enhancement_enabled() {
14663 let mut config = create_test_config();
14664 config.diffusion.enabled = true;
14665 config.diffusion.n_steps = 50;
14666 config.diffusion.sample_size = 20;
14667
14668 let phase_config = PhaseConfig {
14669 generate_master_data: false,
14670 generate_document_flows: false,
14671 generate_journal_entries: true,
14672 inject_anomalies: false,
14673 show_progress: false,
14674 ..Default::default()
14675 };
14676
14677 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14678 let result = orchestrator.generate().unwrap();
14679
14680 assert_eq!(result.statistics.diffusion_samples_generated, 20);
14682 }
14683
14684 #[test]
14685 fn test_causal_overlay_enabled() {
14686 let mut config = create_test_config();
14687 config.causal.enabled = true;
14688 config.causal.template = "fraud_detection".to_string();
14689 config.causal.sample_size = 100;
14690 config.causal.validate = true;
14691
14692 let phase_config = PhaseConfig {
14693 generate_master_data: false,
14694 generate_document_flows: false,
14695 generate_journal_entries: true,
14696 inject_anomalies: false,
14697 show_progress: false,
14698 ..Default::default()
14699 };
14700
14701 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14702 let result = orchestrator.generate().unwrap();
14703
14704 assert_eq!(result.statistics.causal_samples_generated, 100);
14706 assert!(result.statistics.causal_validation_passed.is_some());
14708 }
14709
14710 #[test]
14711 fn test_causal_overlay_revenue_cycle_template() {
14712 let mut config = create_test_config();
14713 config.causal.enabled = true;
14714 config.causal.template = "revenue_cycle".to_string();
14715 config.causal.sample_size = 50;
14716 config.causal.validate = false;
14717
14718 let phase_config = PhaseConfig {
14719 generate_master_data: false,
14720 generate_document_flows: false,
14721 generate_journal_entries: true,
14722 inject_anomalies: false,
14723 show_progress: false,
14724 ..Default::default()
14725 };
14726
14727 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14728 let result = orchestrator.generate().unwrap();
14729
14730 assert_eq!(result.statistics.causal_samples_generated, 50);
14732 assert!(result.statistics.causal_validation_passed.is_none());
14734 }
14735
14736 #[test]
14737 fn test_all_new_phases_enabled_together() {
14738 let mut config = create_test_config();
14739 config.llm.enabled = true;
14740 config.llm.max_vendor_enrichments = 2;
14741 config.diffusion.enabled = true;
14742 config.diffusion.n_steps = 20;
14743 config.diffusion.sample_size = 10;
14744 config.causal.enabled = true;
14745 config.causal.sample_size = 50;
14746 config.causal.validate = true;
14747
14748 let phase_config = PhaseConfig {
14749 generate_master_data: true,
14750 generate_document_flows: false,
14751 generate_journal_entries: true,
14752 inject_anomalies: false,
14753 show_progress: false,
14754 vendors_per_company: 5,
14755 customers_per_company: 3,
14756 materials_per_company: 3,
14757 assets_per_company: 3,
14758 employees_per_company: 3,
14759 ..Default::default()
14760 };
14761
14762 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14763 let result = orchestrator.generate().unwrap();
14764
14765 assert!(result.statistics.llm_vendors_enriched > 0);
14767 assert_eq!(result.statistics.diffusion_samples_generated, 10);
14768 assert_eq!(result.statistics.causal_samples_generated, 50);
14769 assert!(result.statistics.causal_validation_passed.is_some());
14770 }
14771
14772 #[test]
14773 fn test_statistics_serialization_with_new_fields() {
14774 let stats = EnhancedGenerationStatistics {
14775 total_entries: 100,
14776 total_line_items: 500,
14777 llm_enrichment_ms: 42,
14778 llm_vendors_enriched: 10,
14779 diffusion_enhancement_ms: 100,
14780 diffusion_samples_generated: 50,
14781 causal_generation_ms: 200,
14782 causal_samples_generated: 100,
14783 causal_validation_passed: Some(true),
14784 ..Default::default()
14785 };
14786
14787 let json = serde_json::to_string(&stats).unwrap();
14788 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14789
14790 assert_eq!(deserialized.llm_enrichment_ms, 42);
14791 assert_eq!(deserialized.llm_vendors_enriched, 10);
14792 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14793 assert_eq!(deserialized.diffusion_samples_generated, 50);
14794 assert_eq!(deserialized.causal_generation_ms, 200);
14795 assert_eq!(deserialized.causal_samples_generated, 100);
14796 assert_eq!(deserialized.causal_validation_passed, Some(true));
14797 }
14798
14799 #[test]
14800 fn test_statistics_backward_compat_deserialization() {
14801 let old_json = r#"{
14803 "total_entries": 100,
14804 "total_line_items": 500,
14805 "accounts_count": 50,
14806 "companies_count": 1,
14807 "period_months": 12,
14808 "vendor_count": 10,
14809 "customer_count": 20,
14810 "material_count": 15,
14811 "asset_count": 5,
14812 "employee_count": 8,
14813 "p2p_chain_count": 5,
14814 "o2c_chain_count": 5,
14815 "ap_invoice_count": 5,
14816 "ar_invoice_count": 5,
14817 "ocpm_event_count": 0,
14818 "ocpm_object_count": 0,
14819 "ocpm_case_count": 0,
14820 "audit_engagement_count": 0,
14821 "audit_workpaper_count": 0,
14822 "audit_evidence_count": 0,
14823 "audit_risk_count": 0,
14824 "audit_finding_count": 0,
14825 "audit_judgment_count": 0,
14826 "anomalies_injected": 0,
14827 "data_quality_issues": 0,
14828 "banking_customer_count": 0,
14829 "banking_account_count": 0,
14830 "banking_transaction_count": 0,
14831 "banking_suspicious_count": 0,
14832 "graph_export_count": 0,
14833 "graph_node_count": 0,
14834 "graph_edge_count": 0
14835 }"#;
14836
14837 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14838
14839 assert_eq!(stats.llm_enrichment_ms, 0);
14841 assert_eq!(stats.llm_vendors_enriched, 0);
14842 assert_eq!(stats.diffusion_enhancement_ms, 0);
14843 assert_eq!(stats.diffusion_samples_generated, 0);
14844 assert_eq!(stats.causal_generation_ms, 0);
14845 assert_eq!(stats.causal_samples_generated, 0);
14846 assert!(stats.causal_validation_passed.is_none());
14847 }
14848}