1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338 fn default() -> Self {
339 Self {
340 generate_master_data: true,
341 generate_document_flows: true,
342 generate_ocpm_events: false, generate_journal_entries: true,
344 inject_anomalies: false,
345 inject_data_quality: false, validate_balances: true,
347 show_progress: true,
348 vendors_per_company: 50,
349 customers_per_company: 100,
350 materials_per_company: 200,
351 assets_per_company: 50,
352 employees_per_company: 100,
353 p2p_chains: 100,
354 o2c_chains: 100,
355 generate_audit: false, audit_engagements: 5,
357 workpapers_per_engagement: 20,
358 evidence_per_workpaper: 5,
359 risks_per_engagement: 15,
360 findings_per_engagement: 8,
361 judgments_per_engagement: 10,
362 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
381 }
382}
383
384impl PhaseConfig {
385 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390 Self {
391 generate_master_data: true,
393 generate_document_flows: true,
394 generate_journal_entries: true,
395 validate_balances: true,
396 generate_period_close: true,
397 generate_evolution_events: true,
398 show_progress: true,
399
400 generate_audit: cfg.audit.enabled,
402 generate_banking: cfg.banking.enabled,
403 generate_graph_export: cfg.graph_export.enabled,
404 generate_sourcing: cfg.source_to_pay.enabled,
405 generate_intercompany: cfg.intercompany.enabled,
406 generate_financial_statements: cfg.financial_reporting.enabled,
407 generate_bank_reconciliation: cfg.financial_reporting.enabled,
408 generate_accounting_standards: cfg.accounting_standards.enabled,
409 generate_manufacturing: cfg.manufacturing.enabled,
410 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411 generate_tax: cfg.tax.enabled,
412 generate_esg: cfg.esg.enabled,
413 generate_ocpm_events: cfg.ocpm.enabled,
414 generate_compliance_regulations: cfg.compliance_regulations.enabled,
415 generate_hr: cfg.hr.enabled,
416 generate_treasury: cfg.treasury.enabled,
417 generate_project_accounting: cfg.project_accounting.enabled,
418
419 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423 inject_data_quality: cfg.data_quality.enabled,
424
425 vendors_per_company: 50,
427 customers_per_company: 100,
428 materials_per_company: 200,
429 assets_per_company: 50,
430 employees_per_company: 100,
431 p2p_chains: 100,
432 o2c_chains: 100,
433 audit_engagements: 5,
434 workpapers_per_engagement: 20,
435 evidence_per_workpaper: 5,
436 risks_per_engagement: 15,
437 findings_per_engagement: 8,
438 judgments_per_engagement: 10,
439 }
440 }
441}
442
443#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446 pub vendors: Vec<Vendor>,
448 pub customers: Vec<Customer>,
450 pub materials: Vec<Material>,
452 pub assets: Vec<FixedAsset>,
454 pub employees: Vec<Employee>,
456 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465 pub node_count: usize,
467 pub edge_count: usize,
469 pub hyperedge_count: usize,
471 pub output_path: PathBuf,
473}
474
475#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478 pub p2p_chains: Vec<P2PDocumentChain>,
480 pub o2c_chains: Vec<O2CDocumentChain>,
482 pub purchase_orders: Vec<documents::PurchaseOrder>,
484 pub goods_receipts: Vec<documents::GoodsReceipt>,
486 pub vendor_invoices: Vec<documents::VendorInvoice>,
488 pub sales_orders: Vec<documents::SalesOrder>,
490 pub deliveries: Vec<documents::Delivery>,
492 pub customer_invoices: Vec<documents::CustomerInvoice>,
494 pub payments: Vec<documents::Payment>,
496 pub document_references: Vec<documents::DocumentReference>,
499}
500
501#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504 pub ap_invoices: Vec<APInvoice>,
506 pub ar_invoices: Vec<ARInvoice>,
508 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514 pub ar_aging_reports: Vec<ARAgingReport>,
516 pub ap_aging_reports: Vec<APAgingReport>,
518 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531 pub event_log: Option<OcpmEventLog>,
533 pub event_count: usize,
535 pub object_count: usize,
537 pub case_count: usize,
539}
540
541#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544 pub engagements: Vec<AuditEngagement>,
546 pub workpapers: Vec<Workpaper>,
548 pub evidence: Vec<AuditEvidence>,
550 pub risk_assessments: Vec<RiskAssessment>,
552 pub findings: Vec<AuditFinding>,
554 pub judgments: Vec<ProfessionalJudgment>,
556 pub confirmations: Vec<ExternalConfirmation>,
558 pub confirmation_responses: Vec<ConfirmationResponse>,
560 pub procedure_steps: Vec<AuditProcedureStep>,
562 pub samples: Vec<AuditSample>,
564 pub analytical_results: Vec<AnalyticalProcedureResult>,
566 pub ia_functions: Vec<InternalAuditFunction>,
568 pub ia_reports: Vec<InternalAuditReport>,
570 pub related_parties: Vec<RelatedParty>,
572 pub related_party_transactions: Vec<RelatedPartyTransaction>,
574 pub component_auditors: Vec<ComponentAuditor>,
577 pub group_audit_plan: Option<GroupAuditPlan>,
579 pub component_instructions: Vec<ComponentInstruction>,
581 pub component_reports: Vec<ComponentAuditorReport>,
583 pub engagement_letters: Vec<EngagementLetter>,
586 pub subsequent_events: Vec<SubsequentEvent>,
589 pub service_organizations: Vec<ServiceOrganization>,
592 pub soc_reports: Vec<SocReport>,
594 pub user_entity_controls: Vec<UserEntityControl>,
596 pub going_concern_assessments:
599 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600 pub accounting_estimates:
603 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614 pub materiality_calculations:
617 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618 pub combined_risk_assessments:
621 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627 pub significant_transaction_classes:
630 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634 pub analytical_relationships:
637 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657 pub customers: Vec<BankingCustomer>,
659 pub accounts: Vec<BankAccount>,
661 pub transactions: Vec<BankTransaction>,
663 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673 pub suspicious_count: usize,
675 pub scenario_count: usize,
677}
678
679#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682 pub exported: bool,
684 pub graph_count: usize,
686 pub exports: HashMap<String, GraphExportInfo>,
688}
689
690#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693 pub name: String,
695 pub format: String,
697 pub output_path: PathBuf,
699 pub node_count: usize,
701 pub edge_count: usize,
703}
704
705#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708 pub spend_analyses: Vec<SpendAnalysis>,
710 pub sourcing_projects: Vec<SourcingProject>,
712 pub qualifications: Vec<SupplierQualification>,
714 pub rfx_events: Vec<RfxEvent>,
716 pub bids: Vec<SupplierBid>,
718 pub bid_evaluations: Vec<BidEvaluation>,
720 pub contracts: Vec<ProcurementContract>,
722 pub catalog_items: Vec<CatalogItem>,
724 pub scorecards: Vec<SupplierScorecard>,
726}
727
728#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731 pub fiscal_year: u16,
733 pub fiscal_period: u8,
735 pub period_start: NaiveDate,
737 pub period_end: NaiveDate,
739 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746 pub financial_statements: Vec<FinancialStatement>,
749 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752 pub consolidated_statements: Vec<FinancialStatement>,
754 pub consolidation_schedules: Vec<ConsolidationSchedule>,
756 pub bank_reconciliations: Vec<BankReconciliation>,
758 pub trial_balances: Vec<PeriodTrialBalance>,
760 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771 pub payroll_runs: Vec<PayrollRun>,
773 pub payroll_line_items: Vec<PayrollLineItem>,
775 pub time_entries: Vec<TimeEntry>,
777 pub expense_reports: Vec<ExpenseReport>,
779 pub benefit_enrollments: Vec<BenefitEnrollment>,
781 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789 pub pension_journal_entries: Vec<JournalEntry>,
791 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795 pub stock_comp_journal_entries: Vec<JournalEntry>,
797 pub payroll_run_count: usize,
799 pub payroll_line_item_count: usize,
801 pub time_entry_count: usize,
803 pub expense_report_count: usize,
805 pub benefit_enrollment_count: usize,
807 pub pension_plan_count: usize,
809 pub stock_grant_count: usize,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820 pub business_combinations:
822 Vec<datasynth_core::models::business_combination::BusinessCombination>,
823 pub business_combination_journal_entries: Vec<JournalEntry>,
825 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827 pub ecl_provision_movements:
829 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830 pub ecl_journal_entries: Vec<JournalEntry>,
832 pub provisions: Vec<datasynth_core::models::provision::Provision>,
834 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838 pub provision_journal_entries: Vec<JournalEntry>,
840 pub currency_translation_results:
842 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843 pub revenue_contract_count: usize,
845 pub impairment_test_count: usize,
847 pub business_combination_count: usize,
849 pub ecl_model_count: usize,
851 pub provision_count: usize,
853 pub currency_translation_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872 pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879 pub production_orders: Vec<ProductionOrder>,
881 pub quality_inspections: Vec<QualityInspection>,
883 pub cycle_counts: Vec<CycleCount>,
885 pub bom_components: Vec<BomComponent>,
887 pub inventory_movements: Vec<InventoryMovement>,
889 pub production_order_count: usize,
891 pub quality_inspection_count: usize,
893 pub cycle_count_count: usize,
895 pub bom_component_count: usize,
897 pub inventory_movement_count: usize,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904 pub sales_quotes: Vec<SalesQuote>,
906 pub kpis: Vec<ManagementKpi>,
908 pub budgets: Vec<Budget>,
910 pub sales_quote_count: usize,
912 pub kpi_count: usize,
914 pub budget_line_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921 pub labels: Vec<LabeledAnomaly>,
923 pub summary: Option<AnomalySummary>,
925 pub by_type: HashMap<String, usize>,
927}
928
929#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932 pub validated: bool,
934 pub is_balanced: bool,
936 pub entries_processed: u64,
938 pub total_debits: rust_decimal::Decimal,
940 pub total_credits: rust_decimal::Decimal,
942 pub accounts_tracked: usize,
944 pub companies_tracked: usize,
946 pub validation_errors: Vec<ValidationError>,
948 pub has_unbalanced_entries: bool,
950}
951
952#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955 pub jurisdictions: Vec<TaxJurisdiction>,
957 pub codes: Vec<TaxCode>,
959 pub tax_lines: Vec<TaxLine>,
961 pub tax_returns: Vec<TaxReturn>,
963 pub tax_provisions: Vec<TaxProvision>,
965 pub withholding_records: Vec<WithholdingTaxRecord>,
967 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969 pub jurisdiction_count: usize,
971 pub code_count: usize,
973 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975 pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986 pub seller_journal_entries: Vec<JournalEntry>,
988 pub buyer_journal_entries: Vec<JournalEntry>,
990 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994 #[serde(skip)]
996 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997 pub matched_pair_count: usize,
999 pub elimination_entry_count: usize,
1001 pub match_rate: f64,
1003}
1004
1005#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008 pub emissions: Vec<EmissionRecord>,
1010 pub energy: Vec<EnergyConsumption>,
1012 pub water: Vec<WaterUsage>,
1014 pub waste: Vec<WasteRecord>,
1016 pub diversity: Vec<WorkforceDiversityMetric>,
1018 pub pay_equity: Vec<PayEquityMetric>,
1020 pub safety_incidents: Vec<SafetyIncident>,
1022 pub safety_metrics: Vec<SafetyMetric>,
1024 pub governance: Vec<GovernanceMetric>,
1026 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028 pub materiality: Vec<MaterialityAssessment>,
1030 pub disclosures: Vec<EsgDisclosure>,
1032 pub climate_scenarios: Vec<ClimateScenario>,
1034 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036 pub emission_count: usize,
1038 pub disclosure_count: usize,
1040}
1041
1042#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045 pub cash_positions: Vec<CashPosition>,
1047 pub cash_forecasts: Vec<CashForecast>,
1049 pub cash_pools: Vec<CashPool>,
1051 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053 pub hedging_instruments: Vec<HedgingInstrument>,
1055 pub hedge_relationships: Vec<HedgeRelationship>,
1057 pub debt_instruments: Vec<DebtInstrument>,
1059 pub bank_guarantees: Vec<BankGuarantee>,
1061 pub netting_runs: Vec<NettingRun>,
1063 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065 pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073 pub projects: Vec<Project>,
1075 pub cost_lines: Vec<ProjectCostLine>,
1077 pub revenue_records: Vec<ProjectRevenue>,
1079 pub earned_value_metrics: Vec<EarnedValueMetric>,
1081 pub change_orders: Vec<ChangeOrder>,
1083 pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090 pub chart_of_accounts: ChartOfAccounts,
1092 pub master_data: MasterDataSnapshot,
1094 pub document_flows: DocumentFlowSnapshot,
1096 pub subledger: SubledgerSnapshot,
1098 pub ocpm: OcpmSnapshot,
1100 pub audit: AuditSnapshot,
1102 pub banking: BankingSnapshot,
1104 pub graph_export: GraphExportSnapshot,
1106 pub sourcing: SourcingSnapshot,
1108 pub financial_reporting: FinancialReportingSnapshot,
1110 pub hr: HrSnapshot,
1112 pub accounting_standards: AccountingStandardsSnapshot,
1114 pub manufacturing: ManufacturingSnapshot,
1116 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118 pub tax: TaxSnapshot,
1120 pub esg: EsgSnapshot,
1122 pub treasury: TreasurySnapshot,
1124 pub project_accounting: ProjectAccountingSnapshot,
1126 pub process_evolution: Vec<ProcessEvolutionEvent>,
1128 pub organizational_events: Vec<OrganizationalEvent>,
1130 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132 pub intercompany: IntercompanySnapshot,
1134 pub journal_entries: Vec<JournalEntry>,
1136 pub anomaly_labels: AnomalyLabels,
1138 pub balance_validation: BalanceValidationResult,
1140 pub data_quality_stats: DataQualityStats,
1142 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144 pub statistics: EnhancedGenerationStatistics,
1146 pub lineage: Option<super::lineage::LineageGraph>,
1148 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150 pub internal_controls: Vec<InternalControl>,
1152 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156 pub opening_balances: Vec<GeneratedOpeningBalance>,
1158 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166 pub temporal_vendor_chains:
1168 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175 pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182 pub total_entries: u64,
1184 pub total_line_items: u64,
1186 pub accounts_count: usize,
1188 pub companies_count: usize,
1190 pub period_months: u32,
1192 pub vendor_count: usize,
1194 pub customer_count: usize,
1195 pub material_count: usize,
1196 pub asset_count: usize,
1197 pub employee_count: usize,
1198 pub p2p_chain_count: usize,
1200 pub o2c_chain_count: usize,
1201 pub ap_invoice_count: usize,
1203 pub ar_invoice_count: usize,
1204 pub ocpm_event_count: usize,
1206 pub ocpm_object_count: usize,
1207 pub ocpm_case_count: usize,
1208 pub audit_engagement_count: usize,
1210 pub audit_workpaper_count: usize,
1211 pub audit_evidence_count: usize,
1212 pub audit_risk_count: usize,
1213 pub audit_finding_count: usize,
1214 pub audit_judgment_count: usize,
1215 #[serde(default)]
1217 pub audit_confirmation_count: usize,
1218 #[serde(default)]
1219 pub audit_confirmation_response_count: usize,
1220 #[serde(default)]
1222 pub audit_procedure_step_count: usize,
1223 #[serde(default)]
1224 pub audit_sample_count: usize,
1225 #[serde(default)]
1227 pub audit_analytical_result_count: usize,
1228 #[serde(default)]
1230 pub audit_ia_function_count: usize,
1231 #[serde(default)]
1232 pub audit_ia_report_count: usize,
1233 #[serde(default)]
1235 pub audit_related_party_count: usize,
1236 #[serde(default)]
1237 pub audit_related_party_transaction_count: usize,
1238 pub anomalies_injected: usize,
1240 pub data_quality_issues: usize,
1242 pub banking_customer_count: usize,
1244 pub banking_account_count: usize,
1245 pub banking_transaction_count: usize,
1246 pub banking_suspicious_count: usize,
1247 pub graph_export_count: usize,
1249 pub graph_node_count: usize,
1250 pub graph_edge_count: usize,
1251 #[serde(default)]
1253 pub llm_enrichment_ms: u64,
1254 #[serde(default)]
1256 pub llm_vendors_enriched: usize,
1257 #[serde(default)]
1259 pub diffusion_enhancement_ms: u64,
1260 #[serde(default)]
1262 pub diffusion_samples_generated: usize,
1263 #[serde(default, skip_serializing_if = "Option::is_none")]
1266 pub neural_hybrid_weight: Option<f64>,
1267 #[serde(default, skip_serializing_if = "Option::is_none")]
1269 pub neural_hybrid_strategy: Option<String>,
1270 #[serde(default, skip_serializing_if = "Option::is_none")]
1272 pub neural_routed_column_count: Option<usize>,
1273 #[serde(default)]
1275 pub causal_generation_ms: u64,
1276 #[serde(default)]
1278 pub causal_samples_generated: usize,
1279 #[serde(default)]
1281 pub causal_validation_passed: Option<bool>,
1282 #[serde(default)]
1284 pub sourcing_project_count: usize,
1285 #[serde(default)]
1286 pub rfx_event_count: usize,
1287 #[serde(default)]
1288 pub bid_count: usize,
1289 #[serde(default)]
1290 pub contract_count: usize,
1291 #[serde(default)]
1292 pub catalog_item_count: usize,
1293 #[serde(default)]
1294 pub scorecard_count: usize,
1295 #[serde(default)]
1297 pub financial_statement_count: usize,
1298 #[serde(default)]
1299 pub bank_reconciliation_count: usize,
1300 #[serde(default)]
1302 pub payroll_run_count: usize,
1303 #[serde(default)]
1304 pub time_entry_count: usize,
1305 #[serde(default)]
1306 pub expense_report_count: usize,
1307 #[serde(default)]
1308 pub benefit_enrollment_count: usize,
1309 #[serde(default)]
1310 pub pension_plan_count: usize,
1311 #[serde(default)]
1312 pub stock_grant_count: usize,
1313 #[serde(default)]
1315 pub revenue_contract_count: usize,
1316 #[serde(default)]
1317 pub impairment_test_count: usize,
1318 #[serde(default)]
1319 pub business_combination_count: usize,
1320 #[serde(default)]
1321 pub ecl_model_count: usize,
1322 #[serde(default)]
1323 pub provision_count: usize,
1324 #[serde(default)]
1326 pub production_order_count: usize,
1327 #[serde(default)]
1328 pub quality_inspection_count: usize,
1329 #[serde(default)]
1330 pub cycle_count_count: usize,
1331 #[serde(default)]
1332 pub bom_component_count: usize,
1333 #[serde(default)]
1334 pub inventory_movement_count: usize,
1335 #[serde(default)]
1337 pub sales_quote_count: usize,
1338 #[serde(default)]
1339 pub kpi_count: usize,
1340 #[serde(default)]
1341 pub budget_line_count: usize,
1342 #[serde(default)]
1344 pub tax_jurisdiction_count: usize,
1345 #[serde(default)]
1346 pub tax_code_count: usize,
1347 #[serde(default)]
1349 pub esg_emission_count: usize,
1350 #[serde(default)]
1351 pub esg_disclosure_count: usize,
1352 #[serde(default)]
1354 pub ic_matched_pair_count: usize,
1355 #[serde(default)]
1356 pub ic_elimination_count: usize,
1357 #[serde(default)]
1359 pub ic_transaction_count: usize,
1360 #[serde(default)]
1362 pub fa_subledger_count: usize,
1363 #[serde(default)]
1365 pub inventory_subledger_count: usize,
1366 #[serde(default)]
1368 pub treasury_debt_instrument_count: usize,
1369 #[serde(default)]
1371 pub treasury_hedging_instrument_count: usize,
1372 #[serde(default)]
1374 pub project_count: usize,
1375 #[serde(default)]
1377 pub project_change_order_count: usize,
1378 #[serde(default)]
1380 pub tax_provision_count: usize,
1381 #[serde(default)]
1383 pub opening_balance_count: usize,
1384 #[serde(default)]
1386 pub subledger_reconciliation_count: usize,
1387 #[serde(default)]
1389 pub tax_line_count: usize,
1390 #[serde(default)]
1392 pub project_cost_line_count: usize,
1393 #[serde(default)]
1395 pub cash_position_count: usize,
1396 #[serde(default)]
1398 pub cash_forecast_count: usize,
1399 #[serde(default)]
1401 pub cash_pool_count: usize,
1402 #[serde(default)]
1404 pub process_evolution_event_count: usize,
1405 #[serde(default)]
1407 pub organizational_event_count: usize,
1408 #[serde(default)]
1410 pub counterfactual_pair_count: usize,
1411 #[serde(default)]
1413 pub red_flag_count: usize,
1414 #[serde(default)]
1416 pub collusion_ring_count: usize,
1417 #[serde(default)]
1419 pub temporal_version_chain_count: usize,
1420 #[serde(default)]
1422 pub entity_relationship_node_count: usize,
1423 #[serde(default)]
1425 pub entity_relationship_edge_count: usize,
1426 #[serde(default)]
1428 pub cross_process_link_count: usize,
1429 #[serde(default)]
1431 pub disruption_event_count: usize,
1432 #[serde(default)]
1434 pub industry_gl_account_count: usize,
1435 #[serde(default)]
1437 pub period_close_je_count: usize,
1438}
1439
1440pub struct EnhancedOrchestrator {
1442 config: GeneratorConfig,
1443 phase_config: PhaseConfig,
1444 coa: Option<Arc<ChartOfAccounts>>,
1445 master_data: MasterDataSnapshot,
1446 seed: u64,
1447 multi_progress: Option<MultiProgress>,
1448 resource_guard: ResourceGuard,
1450 output_path: Option<PathBuf>,
1452 copula_generators: Vec<CopulaGeneratorSpec>,
1454 country_pack_registry: datasynth_core::CountryPackRegistry,
1456 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1458}
1459
1460impl EnhancedOrchestrator {
1461 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1463 datasynth_config::validate_config(&config)?;
1464
1465 let seed = config.global.seed.unwrap_or_else(rand::random);
1466
1467 let resource_guard = Self::build_resource_guard(&config, None);
1469
1470 let country_pack_registry = match &config.country_packs {
1472 Some(cp) => {
1473 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1474 .map_err(|e| SynthError::config(e.to_string()))?
1475 }
1476 None => datasynth_core::CountryPackRegistry::builtin_only()
1477 .map_err(|e| SynthError::config(e.to_string()))?,
1478 };
1479
1480 Ok(Self {
1481 config,
1482 phase_config,
1483 coa: None,
1484 master_data: MasterDataSnapshot::default(),
1485 seed,
1486 multi_progress: None,
1487 resource_guard,
1488 output_path: None,
1489 copula_generators: Vec::new(),
1490 country_pack_registry,
1491 phase_sink: None,
1492 })
1493 }
1494
1495 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1497 Self::new(config, PhaseConfig::default())
1498 }
1499
1500 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1502 self.phase_sink = Some(sink);
1503 self
1504 }
1505
1506 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1508 self.phase_sink = Some(sink);
1509 }
1510
1511 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1513 if let Some(ref sink) = self.phase_sink {
1514 for item in items {
1515 if let Ok(value) = serde_json::to_value(item) {
1516 if let Err(e) = sink.emit(phase, type_name, &value) {
1517 warn!(
1518 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1519 );
1520 }
1521 }
1522 }
1523 if let Err(e) = sink.phase_complete(phase) {
1524 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1525 }
1526 }
1527 }
1528
1529 pub fn with_progress(mut self, show: bool) -> Self {
1531 self.phase_config.show_progress = show;
1532 if show {
1533 self.multi_progress = Some(MultiProgress::new());
1534 }
1535 self
1536 }
1537
1538 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1540 let path = path.into();
1541 self.output_path = Some(path.clone());
1542 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1544 self
1545 }
1546
1547 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1549 &self.country_pack_registry
1550 }
1551
1552 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1554 self.country_pack_registry.get_by_str(country)
1555 }
1556
1557 fn primary_country_code(&self) -> &str {
1560 self.config
1561 .companies
1562 .first()
1563 .map(|c| c.country.as_str())
1564 .unwrap_or("US")
1565 }
1566
1567 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1569 self.country_pack_for(self.primary_country_code())
1570 }
1571
1572 fn resolve_coa_framework(&self) -> CoAFramework {
1574 if self.config.accounting_standards.enabled {
1575 match self.config.accounting_standards.framework {
1576 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1577 return CoAFramework::FrenchPcg;
1578 }
1579 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1580 return CoAFramework::GermanSkr04;
1581 }
1582 _ => {}
1583 }
1584 }
1585 let pack = self.primary_pack();
1587 match pack.accounting.framework.as_str() {
1588 "french_gaap" => CoAFramework::FrenchPcg,
1589 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1590 _ => CoAFramework::UsGaap,
1591 }
1592 }
1593
1594 pub fn has_copulas(&self) -> bool {
1599 !self.copula_generators.is_empty()
1600 }
1601
1602 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1608 &self.copula_generators
1609 }
1610
1611 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1615 &mut self.copula_generators
1616 }
1617
1618 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1622 self.copula_generators
1623 .iter_mut()
1624 .find(|c| c.name == copula_name)
1625 .map(|c| c.generator.sample())
1626 }
1627
1628 pub fn from_fingerprint(
1651 fingerprint_path: &std::path::Path,
1652 phase_config: PhaseConfig,
1653 scale: f64,
1654 ) -> SynthResult<Self> {
1655 info!("Loading fingerprint from: {}", fingerprint_path.display());
1656
1657 let reader = FingerprintReader::new();
1659 let fingerprint = reader
1660 .read_from_file(fingerprint_path)
1661 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1662
1663 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1664 }
1665
1666 pub fn from_fingerprint_data(
1673 fingerprint: Fingerprint,
1674 phase_config: PhaseConfig,
1675 scale: f64,
1676 ) -> SynthResult<Self> {
1677 info!(
1678 "Synthesizing config from fingerprint (version: {}, tables: {})",
1679 fingerprint.manifest.version,
1680 fingerprint.schema.tables.len()
1681 );
1682
1683 let seed: u64 = rand::random();
1685 info!("Fingerprint synthesis seed: {}", seed);
1686
1687 let options = SynthesisOptions {
1689 scale,
1690 seed: Some(seed),
1691 preserve_correlations: true,
1692 inject_anomalies: true,
1693 };
1694 let synthesizer = ConfigSynthesizer::with_options(options);
1695
1696 let synthesis_result = synthesizer
1698 .synthesize_full(&fingerprint, seed)
1699 .map_err(|e| {
1700 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1701 })?;
1702
1703 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1705 Self::base_config_for_industry(industry)
1706 } else {
1707 Self::base_config_for_industry("manufacturing")
1708 };
1709
1710 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1712
1713 info!(
1715 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1716 fingerprint.schema.tables.len(),
1717 scale,
1718 synthesis_result.copula_generators.len()
1719 );
1720
1721 if !synthesis_result.copula_generators.is_empty() {
1722 for spec in &synthesis_result.copula_generators {
1723 info!(
1724 " Copula '{}' for table '{}': {} columns",
1725 spec.name,
1726 spec.table,
1727 spec.columns.len()
1728 );
1729 }
1730 }
1731
1732 let mut orchestrator = Self::new(config, phase_config)?;
1734
1735 orchestrator.copula_generators = synthesis_result.copula_generators;
1737
1738 Ok(orchestrator)
1739 }
1740
1741 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1743 use datasynth_config::presets::create_preset;
1744 use datasynth_config::TransactionVolume;
1745 use datasynth_core::models::{CoAComplexity, IndustrySector};
1746
1747 let sector = match industry.to_lowercase().as_str() {
1748 "manufacturing" => IndustrySector::Manufacturing,
1749 "retail" => IndustrySector::Retail,
1750 "financial" | "financial_services" => IndustrySector::FinancialServices,
1751 "healthcare" => IndustrySector::Healthcare,
1752 "technology" | "tech" => IndustrySector::Technology,
1753 _ => IndustrySector::Manufacturing,
1754 };
1755
1756 create_preset(
1758 sector,
1759 1, 12, CoAComplexity::Medium,
1762 TransactionVolume::TenK,
1763 )
1764 }
1765
1766 fn apply_config_patch(
1768 mut config: GeneratorConfig,
1769 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1770 ) -> GeneratorConfig {
1771 use datasynth_fingerprint::synthesis::ConfigValue;
1772
1773 for (key, value) in patch.values() {
1774 match (key.as_str(), value) {
1775 ("transactions.count", ConfigValue::Integer(n)) => {
1778 info!(
1779 "Fingerprint suggests {} transactions (apply via company volumes)",
1780 n
1781 );
1782 }
1783 ("global.period_months", ConfigValue::Integer(n)) => {
1784 config.global.period_months = (*n).clamp(1, 120) as u32;
1785 }
1786 ("global.start_date", ConfigValue::String(s)) => {
1787 config.global.start_date = s.clone();
1788 }
1789 ("global.seed", ConfigValue::Integer(n)) => {
1790 config.global.seed = Some(*n as u64);
1791 }
1792 ("fraud.enabled", ConfigValue::Bool(b)) => {
1793 config.fraud.enabled = *b;
1794 }
1795 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1796 config.fraud.fraud_rate = *f;
1797 }
1798 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1799 config.data_quality.enabled = *b;
1800 }
1801 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1803 config.fraud.enabled = *b;
1804 }
1805 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1806 config.fraud.fraud_rate = *f;
1807 }
1808 _ => {
1809 debug!("Ignoring unknown config patch key: {}", key);
1810 }
1811 }
1812 }
1813
1814 config
1815 }
1816
1817 fn build_resource_guard(
1819 config: &GeneratorConfig,
1820 output_path: Option<PathBuf>,
1821 ) -> ResourceGuard {
1822 let mut builder = ResourceGuardBuilder::new();
1823
1824 if config.global.memory_limit_mb > 0 {
1826 builder = builder.memory_limit(config.global.memory_limit_mb);
1827 }
1828
1829 if let Some(path) = output_path {
1831 builder = builder.output_path(path).min_free_disk(100); }
1833
1834 builder = builder.conservative();
1836
1837 builder.build()
1838 }
1839
1840 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1845 self.resource_guard.check()
1846 }
1847
1848 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1850 let level = self.resource_guard.check()?;
1851
1852 if level != DegradationLevel::Normal {
1853 warn!(
1854 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1855 phase,
1856 level,
1857 self.resource_guard.current_memory_mb(),
1858 self.resource_guard.available_disk_mb()
1859 );
1860 }
1861
1862 Ok(level)
1863 }
1864
1865 fn get_degradation_actions(&self) -> DegradationActions {
1867 self.resource_guard.get_actions()
1868 }
1869
1870 fn check_memory_limit(&self) -> SynthResult<()> {
1872 self.check_resources()?;
1873 Ok(())
1874 }
1875
1876 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1878 info!("Starting enhanced generation workflow");
1879 info!(
1880 "Config: industry={:?}, period_months={}, companies={}",
1881 self.config.global.industry,
1882 self.config.global.period_months,
1883 self.config.companies.len()
1884 );
1885
1886 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1889 datasynth_core::serde_decimal::set_numeric_native(is_native);
1890 struct NumericModeGuard;
1891 impl Drop for NumericModeGuard {
1892 fn drop(&mut self) {
1893 datasynth_core::serde_decimal::set_numeric_native(false);
1894 }
1895 }
1896 let _numeric_guard = if is_native {
1897 Some(NumericModeGuard)
1898 } else {
1899 None
1900 };
1901
1902 let initial_level = self.check_resources_with_log("initial")?;
1904 if initial_level == DegradationLevel::Emergency {
1905 return Err(SynthError::resource(
1906 "Insufficient resources to start generation",
1907 ));
1908 }
1909
1910 let mut stats = EnhancedGenerationStatistics {
1911 companies_count: self.config.companies.len(),
1912 period_months: self.config.global.period_months,
1913 ..Default::default()
1914 };
1915
1916 let coa = self.phase_chart_of_accounts(&mut stats)?;
1918
1919 self.phase_master_data(&mut stats)?;
1921
1922 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1924 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1925 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1926
1927 let (mut document_flows, mut subledger, fa_journal_entries) =
1929 self.phase_document_flows(&mut stats)?;
1930
1931 self.emit_phase_items(
1933 "document_flows",
1934 "PurchaseOrder",
1935 &document_flows.purchase_orders,
1936 );
1937 self.emit_phase_items(
1938 "document_flows",
1939 "GoodsReceipt",
1940 &document_flows.goods_receipts,
1941 );
1942 self.emit_phase_items(
1943 "document_flows",
1944 "VendorInvoice",
1945 &document_flows.vendor_invoices,
1946 );
1947 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1948 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1949
1950 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1952
1953 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1958 .iter()
1959 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1960 .collect();
1961 if !opening_balance_jes.is_empty() {
1962 debug!(
1963 "Prepending {} opening balance JEs to entries",
1964 opening_balance_jes.len()
1965 );
1966 }
1967
1968 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1970
1971 if !opening_balance_jes.is_empty() {
1974 let mut combined = opening_balance_jes;
1975 combined.extend(entries);
1976 entries = combined;
1977 }
1978
1979 if !fa_journal_entries.is_empty() {
1981 debug!(
1982 "Appending {} FA acquisition JEs to main entries",
1983 fa_journal_entries.len()
1984 );
1985 entries.extend(fa_journal_entries);
1986 }
1987
1988 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1990
1991 let actions = self.get_degradation_actions();
1993
1994 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1996
1997 if !sourcing.contracts.is_empty() {
2000 let mut linked_count = 0usize;
2001 let po_vendor_pairs: Vec<(String, String)> = document_flows
2003 .p2p_chains
2004 .iter()
2005 .map(|chain| {
2006 (
2007 chain.purchase_order.vendor_id.clone(),
2008 chain.purchase_order.header.document_id.clone(),
2009 )
2010 })
2011 .collect();
2012
2013 for chain in &mut document_flows.p2p_chains {
2014 if chain.purchase_order.contract_id.is_none() {
2015 if let Some(contract) = sourcing
2016 .contracts
2017 .iter()
2018 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2019 {
2020 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2021 linked_count += 1;
2022 }
2023 }
2024 }
2025
2026 for contract in &mut sourcing.contracts {
2028 let po_ids: Vec<String> = po_vendor_pairs
2029 .iter()
2030 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2031 .map(|(_, po_id)| po_id.clone())
2032 .collect();
2033 if !po_ids.is_empty() {
2034 contract.purchase_order_ids = po_ids;
2035 }
2036 }
2037
2038 if linked_count > 0 {
2039 debug!(
2040 "Linked {} purchase orders to S2C contracts by vendor match",
2041 linked_count
2042 );
2043 }
2044 }
2045
2046 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2048
2049 if !intercompany.seller_journal_entries.is_empty()
2051 || !intercompany.buyer_journal_entries.is_empty()
2052 {
2053 let ic_je_count = intercompany.seller_journal_entries.len()
2054 + intercompany.buyer_journal_entries.len();
2055 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2056 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2057 debug!(
2058 "Appended {} IC journal entries to main entries",
2059 ic_je_count
2060 );
2061 }
2062
2063 if !intercompany.elimination_entries.is_empty() {
2065 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2066 &intercompany.elimination_entries,
2067 );
2068 if !elim_jes.is_empty() {
2069 debug!(
2070 "Appended {} elimination journal entries to main entries",
2071 elim_jes.len()
2072 );
2073 let elim_debit: rust_decimal::Decimal =
2075 elim_jes.iter().map(|je| je.total_debit()).sum();
2076 let elim_credit: rust_decimal::Decimal =
2077 elim_jes.iter().map(|je| je.total_credit()).sum();
2078 let elim_diff = (elim_debit - elim_credit).abs();
2079 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2081 return Err(datasynth_core::error::SynthError::generation(format!(
2082 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2083 elim_debit, elim_credit, elim_diff, tolerance
2084 )));
2085 }
2086 debug!(
2087 "IC elimination balance verified: debits={}, credits={} (diff={})",
2088 elim_debit, elim_credit, elim_diff
2089 );
2090 entries.extend(elim_jes);
2091 }
2092 }
2093
2094 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2096 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2097 document_flows
2098 .customer_invoices
2099 .extend(ic_docs.seller_invoices.iter().cloned());
2100 document_flows
2101 .purchase_orders
2102 .extend(ic_docs.buyer_orders.iter().cloned());
2103 document_flows
2104 .goods_receipts
2105 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2106 document_flows
2107 .vendor_invoices
2108 .extend(ic_docs.buyer_invoices.iter().cloned());
2109 debug!(
2110 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2111 ic_docs.seller_invoices.len(),
2112 ic_docs.buyer_orders.len(),
2113 ic_docs.buyer_goods_receipts.len(),
2114 ic_docs.buyer_invoices.len(),
2115 );
2116 }
2117 }
2118
2119 let hr = self.phase_hr_data(&mut stats)?;
2121
2122 if !hr.payroll_runs.is_empty() {
2124 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2125 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2126 entries.extend(payroll_jes);
2127 }
2128
2129 if !hr.pension_journal_entries.is_empty() {
2131 debug!(
2132 "Generated {} JEs from pension plans",
2133 hr.pension_journal_entries.len()
2134 );
2135 entries.extend(hr.pension_journal_entries.iter().cloned());
2136 }
2137
2138 if !hr.stock_comp_journal_entries.is_empty() {
2140 debug!(
2141 "Generated {} JEs from stock-based compensation",
2142 hr.stock_comp_journal_entries.len()
2143 );
2144 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2145 }
2146
2147 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2149
2150 if !manufacturing_snap.production_orders.is_empty() {
2152 let currency = self
2153 .config
2154 .companies
2155 .first()
2156 .map(|c| c.currency.as_str())
2157 .unwrap_or("USD");
2158 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2159 &manufacturing_snap.production_orders,
2160 &manufacturing_snap.quality_inspections,
2161 currency,
2162 );
2163 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2164 entries.extend(mfg_jes);
2165 }
2166
2167 if !manufacturing_snap.quality_inspections.is_empty() {
2169 let framework = match self.config.accounting_standards.framework {
2170 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2171 _ => "US_GAAP",
2172 };
2173 for company in &self.config.companies {
2174 let company_orders: Vec<_> = manufacturing_snap
2175 .production_orders
2176 .iter()
2177 .filter(|o| o.company_code == company.code)
2178 .cloned()
2179 .collect();
2180 let company_inspections: Vec<_> = manufacturing_snap
2181 .quality_inspections
2182 .iter()
2183 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2184 .cloned()
2185 .collect();
2186 if company_inspections.is_empty() {
2187 continue;
2188 }
2189 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2190 let warranty_result = warranty_gen.generate(
2191 &company.code,
2192 &company_orders,
2193 &company_inspections,
2194 &company.currency,
2195 framework,
2196 );
2197 if !warranty_result.journal_entries.is_empty() {
2198 debug!(
2199 "Generated {} warranty provision JEs for {}",
2200 warranty_result.journal_entries.len(),
2201 company.code
2202 );
2203 entries.extend(warranty_result.journal_entries);
2204 }
2205 }
2206 }
2207
2208 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2210 {
2211 let cogs_currency = self
2212 .config
2213 .companies
2214 .first()
2215 .map(|c| c.currency.as_str())
2216 .unwrap_or("USD");
2217 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2218 &document_flows.deliveries,
2219 &manufacturing_snap.production_orders,
2220 cogs_currency,
2221 );
2222 if !cogs_jes.is_empty() {
2223 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2224 entries.extend(cogs_jes);
2225 }
2226 }
2227
2228 if !manufacturing_snap.inventory_movements.is_empty()
2234 && !subledger.inventory_positions.is_empty()
2235 {
2236 use datasynth_core::models::MovementType as MfgMovementType;
2237 let mut receipt_count = 0usize;
2238 let mut issue_count = 0usize;
2239 for movement in &manufacturing_snap.inventory_movements {
2240 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2242 p.material_id == movement.material_code
2243 && p.company_code == movement.entity_code
2244 }) {
2245 match movement.movement_type {
2246 MfgMovementType::GoodsReceipt => {
2247 pos.add_quantity(
2249 movement.quantity,
2250 movement.value,
2251 movement.movement_date,
2252 );
2253 receipt_count += 1;
2254 }
2255 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2256 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2258 issue_count += 1;
2259 }
2260 _ => {}
2261 }
2262 }
2263 }
2264 debug!(
2265 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2266 manufacturing_snap.inventory_movements.len(),
2267 receipt_count,
2268 issue_count,
2269 );
2270 }
2271
2272 if !entries.is_empty() {
2275 stats.total_entries = entries.len() as u64;
2276 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2277 debug!(
2278 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2279 stats.total_entries, stats.total_line_items
2280 );
2281 }
2282
2283 if self.config.internal_controls.enabled && !entries.is_empty() {
2285 info!("Phase 7b: Applying internal controls to journal entries");
2286 let control_config = ControlGeneratorConfig {
2287 exception_rate: self.config.internal_controls.exception_rate,
2288 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2289 enable_sox_marking: true,
2290 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2291 self.config.internal_controls.sox_materiality_threshold,
2292 )
2293 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2294 ..Default::default()
2295 };
2296 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2297 for entry in &mut entries {
2298 control_gen.apply_controls(entry, &coa);
2299 }
2300 let with_controls = entries
2301 .iter()
2302 .filter(|e| !e.header.control_ids.is_empty())
2303 .count();
2304 info!(
2305 "Applied controls to {} entries ({} with control IDs assigned)",
2306 entries.len(),
2307 with_controls
2308 );
2309 }
2310
2311 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2315 .iter()
2316 .filter(|e| e.header.sod_violation)
2317 .filter_map(|e| {
2318 e.header.sod_conflict_type.map(|ct| {
2319 use datasynth_core::models::{RiskLevel, SodViolation};
2320 let severity = match ct {
2321 datasynth_core::models::SodConflictType::PaymentReleaser
2322 | datasynth_core::models::SodConflictType::RequesterApprover => {
2323 RiskLevel::Critical
2324 }
2325 datasynth_core::models::SodConflictType::PreparerApprover
2326 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2327 | datasynth_core::models::SodConflictType::JournalEntryPoster
2328 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2329 RiskLevel::High
2330 }
2331 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2332 RiskLevel::Medium
2333 }
2334 };
2335 let action = format!(
2336 "SoD conflict {:?} on entry {} ({})",
2337 ct, e.header.document_id, e.header.company_code
2338 );
2339 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2340 })
2341 })
2342 .collect();
2343 if !sod_violations.is_empty() {
2344 info!(
2345 "Phase 7c: Extracted {} SoD violations from {} entries",
2346 sod_violations.len(),
2347 entries.len()
2348 );
2349 }
2350
2351 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2353
2354 {
2362 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2363 if self.config.fraud.enabled && doc_rate > 0.0 {
2364 use datasynth_core::fraud_propagation::{
2365 inject_document_fraud, propagate_documents_to_entries,
2366 };
2367 use datasynth_core::utils::weighted_select;
2368 use datasynth_core::FraudType;
2369 use rand_chacha::rand_core::SeedableRng;
2370
2371 let dist = &self.config.fraud.fraud_type_distribution;
2372 let fraud_type_weights: [(FraudType, f64); 8] = [
2373 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2374 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2375 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2376 (
2377 FraudType::ImproperCapitalization,
2378 dist.expense_capitalization,
2379 ),
2380 (FraudType::SplitTransaction, dist.split_transaction),
2381 (FraudType::TimingAnomaly, dist.timing_anomaly),
2382 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2383 (FraudType::DuplicatePayment, dist.duplicate_payment),
2384 ];
2385 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2386 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2387 if weights_sum <= 0.0 {
2388 FraudType::FictitiousEntry
2389 } else {
2390 *weighted_select(rng, &fraud_type_weights)
2391 }
2392 };
2393
2394 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2395 let mut doc_tagged = 0usize;
2396 macro_rules! inject_into {
2397 ($collection:expr) => {{
2398 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2399 $collection.iter_mut().map(|d| &mut d.header).collect();
2400 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2401 }};
2402 }
2403 inject_into!(document_flows.purchase_orders);
2404 inject_into!(document_flows.goods_receipts);
2405 inject_into!(document_flows.vendor_invoices);
2406 inject_into!(document_flows.payments);
2407 inject_into!(document_flows.sales_orders);
2408 inject_into!(document_flows.deliveries);
2409 inject_into!(document_flows.customer_invoices);
2410 if doc_tagged > 0 {
2411 info!(
2412 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2413 );
2414 }
2415
2416 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2417 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2418 Vec::new();
2419 headers.extend(
2420 document_flows
2421 .purchase_orders
2422 .iter()
2423 .map(|d| d.header.clone()),
2424 );
2425 headers.extend(
2426 document_flows
2427 .goods_receipts
2428 .iter()
2429 .map(|d| d.header.clone()),
2430 );
2431 headers.extend(
2432 document_flows
2433 .vendor_invoices
2434 .iter()
2435 .map(|d| d.header.clone()),
2436 );
2437 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2438 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2439 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2440 headers.extend(
2441 document_flows
2442 .customer_invoices
2443 .iter()
2444 .map(|d| d.header.clone()),
2445 );
2446 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2447 if propagated > 0 {
2448 info!(
2449 "Propagated document-level fraud to {propagated} derived journal entries"
2450 );
2451 }
2452 }
2453 }
2454 }
2455
2456 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2458
2459 {
2477 use datasynth_core::fraud_bias::{
2478 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2479 };
2480 use rand_chacha::rand_core::SeedableRng;
2481 let cfg = FraudBehavioralBiasConfig::default();
2482 if cfg.enabled {
2483 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2484 let mut swept = 0usize;
2485 for entry in entries.iter_mut() {
2486 if entry.header.is_fraud && !entry.header.is_anomaly {
2487 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2488 swept += 1;
2489 }
2490 }
2491 if swept > 0 {
2492 info!(
2493 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2494 (doc-propagated + je_generator intrinsic fraud)"
2495 );
2496 }
2497 }
2498 }
2499
2500 self.emit_phase_items(
2502 "anomaly_injection",
2503 "LabeledAnomaly",
2504 &anomaly_labels.labels,
2505 );
2506
2507 if self.config.fraud.propagate_to_document {
2515 use std::collections::HashMap;
2516 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2529 for je in &entries {
2530 if je.header.is_fraud {
2531 if let Some(ref fraud_type) = je.header.fraud_type {
2532 if let Some(ref reference) = je.header.reference {
2533 fraud_map.insert(reference.clone(), *fraud_type);
2535 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2538 if !bare.is_empty() {
2539 fraud_map.insert(bare.to_string(), *fraud_type);
2540 }
2541 }
2542 }
2543 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2545 }
2546 }
2547 }
2548 if !fraud_map.is_empty() {
2549 let mut propagated = 0usize;
2550 macro_rules! propagate_to {
2552 ($collection:expr) => {
2553 for doc in &mut $collection {
2554 if doc.header.propagate_fraud(&fraud_map) {
2555 propagated += 1;
2556 }
2557 }
2558 };
2559 }
2560 propagate_to!(document_flows.purchase_orders);
2561 propagate_to!(document_flows.goods_receipts);
2562 propagate_to!(document_flows.vendor_invoices);
2563 propagate_to!(document_flows.payments);
2564 propagate_to!(document_flows.sales_orders);
2565 propagate_to!(document_flows.deliveries);
2566 propagate_to!(document_flows.customer_invoices);
2567 if propagated > 0 {
2568 info!(
2569 "Propagated fraud labels to {} document flow records",
2570 propagated
2571 );
2572 }
2573 }
2574 }
2575
2576 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2578
2579 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2581
2582 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2584
2585 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2587
2588 let balance_validation = self.phase_balance_validation(&entries)?;
2590
2591 let subledger_reconciliation =
2593 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2594
2595 let (data_quality_stats, quality_issues) =
2597 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2598
2599 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2601
2602 {
2604 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2609 for je in &entries {
2610 if je.header.is_fraud || je.header.is_anomaly {
2611 continue;
2612 }
2613 let diff = (je.total_debit() - je.total_credit()).abs();
2614 if diff > tolerance {
2615 unbalanced_clean += 1;
2616 if unbalanced_clean <= 3 {
2617 warn!(
2618 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2619 je.header.document_id,
2620 je.total_debit(),
2621 je.total_credit(),
2622 diff
2623 );
2624 }
2625 }
2626 }
2627 if unbalanced_clean > 0 {
2628 return Err(datasynth_core::error::SynthError::generation(format!(
2629 "{} non-anomaly JEs are unbalanced (debits != credits). \
2630 First few logged above. Tolerance={}",
2631 unbalanced_clean, tolerance
2632 )));
2633 }
2634 debug!(
2635 "Phase 10c: All {} non-anomaly JEs individually balanced",
2636 entries
2637 .iter()
2638 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2639 .count()
2640 );
2641
2642 let company_codes: Vec<String> = self
2644 .config
2645 .companies
2646 .iter()
2647 .map(|c| c.code.clone())
2648 .collect();
2649 for company_code in &company_codes {
2650 let mut assets = rust_decimal::Decimal::ZERO;
2651 let mut liab_equity = rust_decimal::Decimal::ZERO;
2652
2653 for entry in &entries {
2654 if entry.header.company_code != *company_code {
2655 continue;
2656 }
2657 for line in &entry.lines {
2658 let acct = &line.gl_account;
2659 let net = line.debit_amount - line.credit_amount;
2660 if acct.starts_with('1') {
2662 assets += net;
2663 }
2664 else if acct.starts_with('2') || acct.starts_with('3') {
2666 liab_equity -= net; }
2668 }
2671 }
2672
2673 let bs_diff = (assets - liab_equity).abs();
2674 if bs_diff > tolerance {
2675 warn!(
2676 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2677 revenue/expense closing entries may not fully offset",
2678 company_code, assets, liab_equity, bs_diff
2679 );
2680 } else {
2684 debug!(
2685 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2686 company_code, assets, liab_equity, bs_diff
2687 );
2688 }
2689 }
2690
2691 info!("Phase 10c: All generation-time accounting assertions passed");
2692 }
2693
2694 let audit = self.phase_audit_data(&entries, &mut stats)?;
2696
2697 let mut banking = self.phase_banking_data(&mut stats)?;
2699
2700 if self.phase_config.generate_banking
2705 && !document_flows.payments.is_empty()
2706 && !banking.accounts.is_empty()
2707 {
2708 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2709 if bridge_rate > 0.0 {
2710 let mut bridge =
2711 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2712 self.seed,
2713 );
2714 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2715 &document_flows.payments,
2716 &banking.customers,
2717 &banking.accounts,
2718 bridge_rate,
2719 );
2720 info!(
2721 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2722 bridge_stats.bridged_count,
2723 bridge_stats.transactions_emitted,
2724 bridge_stats.fraud_propagated,
2725 );
2726 let bridged_count = bridged_txns.len();
2727 banking.transactions.extend(bridged_txns);
2728
2729 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2732 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2733 &mut banking.transactions,
2734 );
2735 }
2736
2737 banking.suspicious_count = banking
2739 .transactions
2740 .iter()
2741 .filter(|t| t.is_suspicious)
2742 .count();
2743 stats.banking_transaction_count = banking.transactions.len();
2744 stats.banking_suspicious_count = banking.suspicious_count;
2745 }
2746 }
2747
2748 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2750
2751 self.phase_llm_enrichment(&mut stats);
2753
2754 self.phase_diffusion_enhancement(&mut stats);
2756
2757 self.phase_causal_overlay(&mut stats);
2759
2760 let mut financial_reporting = self.phase_financial_reporting(
2764 &document_flows,
2765 &entries,
2766 &coa,
2767 &hr,
2768 &audit,
2769 &mut stats,
2770 )?;
2771
2772 {
2774 use datasynth_core::models::StatementType;
2775 for stmt in &financial_reporting.consolidated_statements {
2776 if stmt.statement_type == StatementType::BalanceSheet {
2777 let total_assets: rust_decimal::Decimal = stmt
2778 .line_items
2779 .iter()
2780 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2781 .map(|li| li.amount)
2782 .sum();
2783 let total_le: rust_decimal::Decimal = stmt
2784 .line_items
2785 .iter()
2786 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2787 .map(|li| li.amount)
2788 .sum();
2789 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2790 warn!(
2791 "BS equation imbalance: assets={}, L+E={}",
2792 total_assets, total_le
2793 );
2794 }
2795 }
2796 }
2797 }
2798
2799 let accounting_standards =
2801 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2802
2803 if !accounting_standards.ecl_journal_entries.is_empty() {
2805 debug!(
2806 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2807 accounting_standards.ecl_journal_entries.len()
2808 );
2809 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2810 }
2811
2812 if !accounting_standards.provision_journal_entries.is_empty() {
2814 debug!(
2815 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2816 accounting_standards.provision_journal_entries.len()
2817 );
2818 entries.extend(
2819 accounting_standards
2820 .provision_journal_entries
2821 .iter()
2822 .cloned(),
2823 );
2824 }
2825
2826 let mut ocpm = self.phase_ocpm_events(
2828 &document_flows,
2829 &sourcing,
2830 &hr,
2831 &manufacturing_snap,
2832 &banking,
2833 &audit,
2834 &financial_reporting,
2835 &mut stats,
2836 )?;
2837
2838 if let Some(ref event_log) = ocpm.event_log {
2840 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2841 }
2842
2843 if let Some(ref event_log) = ocpm.event_log {
2845 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2847 std::collections::HashMap::new();
2848 for (idx, event) in event_log.events.iter().enumerate() {
2849 if let Some(ref doc_ref) = event.document_ref {
2850 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2851 }
2852 }
2853
2854 if !doc_index.is_empty() {
2855 let mut annotated = 0usize;
2856 for entry in &mut entries {
2857 let doc_id_str = entry.header.document_id.to_string();
2858 let mut matched_indices: Vec<usize> = Vec::new();
2860 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2861 matched_indices.extend(indices);
2862 }
2863 if let Some(ref reference) = entry.header.reference {
2864 let bare_ref = reference
2865 .find(':')
2866 .map(|i| &reference[i + 1..])
2867 .unwrap_or(reference.as_str());
2868 if let Some(indices) = doc_index.get(bare_ref) {
2869 for &idx in indices {
2870 if !matched_indices.contains(&idx) {
2871 matched_indices.push(idx);
2872 }
2873 }
2874 }
2875 }
2876 if !matched_indices.is_empty() {
2878 for &idx in &matched_indices {
2879 let event = &event_log.events[idx];
2880 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2881 entry.header.ocpm_event_ids.push(event.event_id);
2882 }
2883 for obj_ref in &event.object_refs {
2884 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2885 entry.header.ocpm_object_ids.push(obj_ref.object_id);
2886 }
2887 }
2888 if entry.header.ocpm_case_id.is_none() {
2889 entry.header.ocpm_case_id = event.case_id;
2890 }
2891 }
2892 annotated += 1;
2893 }
2894 }
2895 debug!(
2896 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2897 annotated
2898 );
2899 }
2900 }
2901
2902 if let Some(ref mut event_log) = ocpm.event_log {
2906 let synthesized =
2907 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
2908 if synthesized > 0 {
2909 info!(
2910 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
2911 );
2912 }
2913
2914 let anomaly_events =
2919 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
2920 if anomaly_events > 0 {
2921 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
2922 }
2923
2924 let p2p_cfg = &self.config.ocpm.p2p_process;
2929 let any_imperfection = p2p_cfg.rework_probability > 0.0
2930 || p2p_cfg.skip_step_probability > 0.0
2931 || p2p_cfg.out_of_order_probability > 0.0;
2932 if any_imperfection {
2933 use rand_chacha::rand_core::SeedableRng;
2934 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
2935 rework_rate: p2p_cfg.rework_probability,
2936 skip_rate: p2p_cfg.skip_step_probability,
2937 out_of_order_rate: p2p_cfg.out_of_order_probability,
2938 };
2939 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
2940 let stats =
2941 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
2942 if stats.rework + stats.skipped + stats.out_of_order > 0 {
2943 info!(
2944 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
2945 stats.rework, stats.skipped, stats.out_of_order
2946 );
2947 }
2948 }
2949 }
2950
2951 let sales_kpi_budgets =
2953 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2954
2955 let treasury =
2959 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2960
2961 if !treasury.journal_entries.is_empty() {
2963 debug!(
2964 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2965 treasury.journal_entries.len()
2966 );
2967 entries.extend(treasury.journal_entries.iter().cloned());
2968 }
2969
2970 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2972
2973 if !tax.tax_posting_journal_entries.is_empty() {
2975 debug!(
2976 "Merging {} tax posting JEs into GL",
2977 tax.tax_posting_journal_entries.len()
2978 );
2979 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2980 }
2981
2982 {
2986 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2987
2988 let framework_str = {
2989 use datasynth_config::schema::AccountingFrameworkConfig;
2990 match self
2991 .config
2992 .accounting_standards
2993 .framework
2994 .unwrap_or_default()
2995 {
2996 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2997 "IFRS"
2998 }
2999 _ => "US_GAAP",
3000 }
3001 };
3002
3003 let depreciation_total: rust_decimal::Decimal = entries
3005 .iter()
3006 .filter(|je| je.header.document_type == "CL")
3007 .flat_map(|je| je.lines.iter())
3008 .filter(|l| l.gl_account.starts_with("6000"))
3009 .map(|l| l.debit_amount)
3010 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3011
3012 let interest_paid: rust_decimal::Decimal = entries
3014 .iter()
3015 .flat_map(|je| je.lines.iter())
3016 .filter(|l| l.gl_account.starts_with("7100"))
3017 .map(|l| l.debit_amount)
3018 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3019
3020 let tax_paid: rust_decimal::Decimal = entries
3022 .iter()
3023 .flat_map(|je| je.lines.iter())
3024 .filter(|l| l.gl_account.starts_with("8000"))
3025 .map(|l| l.debit_amount)
3026 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3027
3028 let capex: rust_decimal::Decimal = entries
3030 .iter()
3031 .flat_map(|je| je.lines.iter())
3032 .filter(|l| l.gl_account.starts_with("1500"))
3033 .map(|l| l.debit_amount)
3034 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3035
3036 let dividends_paid: rust_decimal::Decimal = entries
3038 .iter()
3039 .flat_map(|je| je.lines.iter())
3040 .filter(|l| l.gl_account == "2170")
3041 .map(|l| l.debit_amount)
3042 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3043
3044 let cf_data = CashFlowSourceData {
3045 depreciation_total,
3046 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3048 delta_ap: rust_decimal::Decimal::ZERO,
3049 delta_inventory: rust_decimal::Decimal::ZERO,
3050 capex,
3051 debt_issuance: rust_decimal::Decimal::ZERO,
3052 debt_repayment: rust_decimal::Decimal::ZERO,
3053 interest_paid,
3054 tax_paid,
3055 dividends_paid,
3056 framework: framework_str.to_string(),
3057 };
3058
3059 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3060 if !enhanced_cf_items.is_empty() {
3061 use datasynth_core::models::StatementType;
3063 let merge_count = enhanced_cf_items.len();
3064 for stmt in financial_reporting
3065 .financial_statements
3066 .iter_mut()
3067 .chain(financial_reporting.consolidated_statements.iter_mut())
3068 .chain(
3069 financial_reporting
3070 .standalone_statements
3071 .values_mut()
3072 .flat_map(|v| v.iter_mut()),
3073 )
3074 {
3075 if stmt.statement_type == StatementType::CashFlowStatement {
3076 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3077 }
3078 }
3079 info!(
3080 "Enhanced cash flow: {} supplementary items merged into CF statements",
3081 merge_count
3082 );
3083 }
3084 }
3085
3086 self.generate_notes_to_financial_statements(
3089 &mut financial_reporting,
3090 &accounting_standards,
3091 &tax,
3092 &hr,
3093 &audit,
3094 &treasury,
3095 );
3096
3097 if self.config.companies.len() >= 2 && !entries.is_empty() {
3101 let companies: Vec<(String, String)> = self
3102 .config
3103 .companies
3104 .iter()
3105 .map(|c| (c.code.clone(), c.name.clone()))
3106 .collect();
3107 let ic_elim: rust_decimal::Decimal =
3108 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3109 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3110 .unwrap_or(NaiveDate::MIN);
3111 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3112 let period_label = format!(
3113 "{}-{:02}",
3114 end_date.year(),
3115 (end_date - chrono::Days::new(1)).month()
3116 );
3117
3118 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3119 let (je_segments, je_recon) =
3120 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3121 if !je_segments.is_empty() {
3122 info!(
3123 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3124 je_segments.len(),
3125 ic_elim,
3126 );
3127 if financial_reporting.segment_reports.is_empty() {
3129 financial_reporting.segment_reports = je_segments;
3130 financial_reporting.segment_reconciliations = vec![je_recon];
3131 } else {
3132 financial_reporting.segment_reports.extend(je_segments);
3133 financial_reporting.segment_reconciliations.push(je_recon);
3134 }
3135 }
3136 }
3137
3138 let esg_snap =
3140 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3141
3142 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3144
3145 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3147
3148 let disruption_events = self.phase_disruption_events(&mut stats)?;
3150
3151 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3153
3154 let (entity_relationship_graph, cross_process_links) =
3156 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3157
3158 let industry_output = self.phase_industry_data(&mut stats);
3160
3161 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3163
3164 if self.config.diffusion.enabled
3166 && (self.config.diffusion.backend == "neural"
3167 || self.config.diffusion.backend == "hybrid")
3168 {
3169 let neural = &self.config.diffusion.neural;
3170 const VALID_STRATEGIES: &[&str] = &["weighted_average", "column_select", "threshold"];
3173 if !VALID_STRATEGIES.contains(&neural.hybrid_strategy.as_str()) {
3174 warn!(
3175 "Unknown diffusion.neural.hybrid_strategy='{}' — expected one of {:?}; \
3176 falling back to 'weighted_average'.",
3177 neural.hybrid_strategy, VALID_STRATEGIES
3178 );
3179 }
3180 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3181 if (weight - neural.hybrid_weight).abs() > f64::EPSILON {
3182 warn!(
3183 "diffusion.neural.hybrid_weight={} clamped to [0,1] → {}",
3184 neural.hybrid_weight, weight
3185 );
3186 }
3187 info!(
3188 "Phase neural enhancement: backend={} strategy={} weight={:.2} columns={} \
3189 (neural_columns: {:?})",
3190 self.config.diffusion.backend,
3191 neural.hybrid_strategy,
3192 weight,
3193 neural.neural_columns.len(),
3194 neural.neural_columns,
3195 );
3196 stats.neural_hybrid_weight = Some(weight);
3197 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3198 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3199 }
3207
3208 self.phase_hypergraph_export(
3210 &coa,
3211 &entries,
3212 &document_flows,
3213 &sourcing,
3214 &hr,
3215 &manufacturing_snap,
3216 &banking,
3217 &audit,
3218 &financial_reporting,
3219 &ocpm,
3220 &compliance_regulations,
3221 &mut stats,
3222 )?;
3223
3224 if self.phase_config.generate_graph_export {
3227 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3228 }
3229
3230 if self.config.streaming.enabled {
3232 info!("Note: streaming config is enabled but batch mode does not use it");
3233 }
3234 if self.config.vendor_network.enabled {
3235 debug!("Vendor network config available; relationship graph generation is partial");
3236 }
3237 if self.config.customer_segmentation.enabled {
3238 debug!("Customer segmentation config available; segment-aware generation is partial");
3239 }
3240
3241 let resource_stats = self.resource_guard.stats();
3243 info!(
3244 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3245 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3246 resource_stats.disk.estimated_bytes_written,
3247 resource_stats.degradation_level
3248 );
3249
3250 if let Some(ref sink) = self.phase_sink {
3252 if let Err(e) = sink.flush() {
3253 warn!("Stream sink flush failed: {e}");
3254 }
3255 }
3256
3257 let lineage = self.build_lineage_graph();
3259
3260 let gate_result = if self.config.quality_gates.enabled {
3262 let profile_name = &self.config.quality_gates.profile;
3263 match datasynth_eval::gates::get_profile(profile_name) {
3264 Some(profile) => {
3265 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3267
3268 if balance_validation.validated {
3270 eval.coherence.balance =
3271 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3272 equation_balanced: balance_validation.is_balanced,
3273 max_imbalance: (balance_validation.total_debits
3274 - balance_validation.total_credits)
3275 .abs(),
3276 periods_evaluated: 1,
3277 periods_imbalanced: if balance_validation.is_balanced {
3278 0
3279 } else {
3280 1
3281 },
3282 period_results: Vec::new(),
3283 companies_evaluated: self.config.companies.len(),
3284 });
3285 }
3286
3287 eval.coherence.passes = balance_validation.is_balanced;
3289 if !balance_validation.is_balanced {
3290 eval.coherence
3291 .failures
3292 .push("Balance sheet equation not satisfied".to_string());
3293 }
3294
3295 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3297 eval.statistical.passes = !entries.is_empty();
3298
3299 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3302
3303 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3304 info!(
3305 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3306 profile_name, result.gates_passed, result.gates_total, result.summary
3307 );
3308 Some(result)
3309 }
3310 None => {
3311 warn!(
3312 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3313 profile_name
3314 );
3315 None
3316 }
3317 }
3318 } else {
3319 None
3320 };
3321
3322 let internal_controls = if self.config.internal_controls.enabled {
3324 InternalControl::standard_controls()
3325 } else {
3326 Vec::new()
3327 };
3328
3329 Ok(EnhancedGenerationResult {
3330 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3331 master_data: std::mem::take(&mut self.master_data),
3332 document_flows,
3333 subledger,
3334 ocpm,
3335 audit,
3336 banking,
3337 graph_export,
3338 sourcing,
3339 financial_reporting,
3340 hr,
3341 accounting_standards,
3342 manufacturing: manufacturing_snap,
3343 sales_kpi_budgets,
3344 tax,
3345 esg: esg_snap,
3346 treasury,
3347 project_accounting,
3348 process_evolution,
3349 organizational_events,
3350 disruption_events,
3351 intercompany,
3352 journal_entries: entries,
3353 anomaly_labels,
3354 balance_validation,
3355 data_quality_stats,
3356 quality_issues,
3357 statistics: stats,
3358 lineage: Some(lineage),
3359 gate_result,
3360 internal_controls,
3361 sod_violations,
3362 opening_balances,
3363 subledger_reconciliation,
3364 counterfactual_pairs,
3365 red_flags,
3366 collusion_rings,
3367 temporal_vendor_chains,
3368 entity_relationship_graph,
3369 cross_process_links,
3370 industry_output,
3371 compliance_regulations,
3372 })
3373 }
3374
3375 fn phase_chart_of_accounts(
3381 &mut self,
3382 stats: &mut EnhancedGenerationStatistics,
3383 ) -> SynthResult<Arc<ChartOfAccounts>> {
3384 info!("Phase 1: Generating Chart of Accounts");
3385 let coa = self.generate_coa()?;
3386 stats.accounts_count = coa.account_count();
3387 info!(
3388 "Chart of Accounts generated: {} accounts",
3389 stats.accounts_count
3390 );
3391 self.check_resources_with_log("post-coa")?;
3392 Ok(coa)
3393 }
3394
3395 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3397 if self.phase_config.generate_master_data {
3398 info!("Phase 2: Generating Master Data");
3399 self.generate_master_data()?;
3400 stats.vendor_count = self.master_data.vendors.len();
3401 stats.customer_count = self.master_data.customers.len();
3402 stats.material_count = self.master_data.materials.len();
3403 stats.asset_count = self.master_data.assets.len();
3404 stats.employee_count = self.master_data.employees.len();
3405 info!(
3406 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3407 stats.vendor_count, stats.customer_count, stats.material_count,
3408 stats.asset_count, stats.employee_count
3409 );
3410 self.check_resources_with_log("post-master-data")?;
3411 } else {
3412 debug!("Phase 2: Skipped (master data generation disabled)");
3413 }
3414 Ok(())
3415 }
3416
3417 fn phase_document_flows(
3419 &mut self,
3420 stats: &mut EnhancedGenerationStatistics,
3421 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3422 let mut document_flows = DocumentFlowSnapshot::default();
3423 let mut subledger = SubledgerSnapshot::default();
3424 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3427
3428 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3429 info!("Phase 3: Generating Document Flows");
3430 self.generate_document_flows(&mut document_flows)?;
3431 stats.p2p_chain_count = document_flows.p2p_chains.len();
3432 stats.o2c_chain_count = document_flows.o2c_chains.len();
3433 info!(
3434 "Document flows generated: {} P2P chains, {} O2C chains",
3435 stats.p2p_chain_count, stats.o2c_chain_count
3436 );
3437
3438 debug!("Phase 3b: Linking document flows to subledgers");
3440 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3441 stats.ap_invoice_count = subledger.ap_invoices.len();
3442 stats.ar_invoice_count = subledger.ar_invoices.len();
3443 debug!(
3444 "Subledgers linked: {} AP invoices, {} AR invoices",
3445 stats.ap_invoice_count, stats.ar_invoice_count
3446 );
3447
3448 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3453 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3454 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3455 debug!("Payment settlements applied to AP and AR subledgers");
3456
3457 if let Ok(start_date) =
3460 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3461 {
3462 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3463 - chrono::Days::new(1);
3464 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3465 for company in &self.config.companies {
3472 let ar_report = ARAgingReport::from_invoices(
3473 company.code.clone(),
3474 &subledger.ar_invoices,
3475 as_of_date,
3476 );
3477 subledger.ar_aging_reports.push(ar_report);
3478
3479 let ap_report = APAgingReport::from_invoices(
3480 company.code.clone(),
3481 &subledger.ap_invoices,
3482 as_of_date,
3483 );
3484 subledger.ap_aging_reports.push(ap_report);
3485 }
3486 debug!(
3487 "AR/AP aging reports built: {} AR, {} AP",
3488 subledger.ar_aging_reports.len(),
3489 subledger.ap_aging_reports.len()
3490 );
3491
3492 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3494 {
3495 use datasynth_generators::DunningGenerator;
3496 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3497 for company in &self.config.companies {
3498 let currency = company.currency.as_str();
3499 let mut company_invoices: Vec<
3502 datasynth_core::models::subledger::ar::ARInvoice,
3503 > = subledger
3504 .ar_invoices
3505 .iter()
3506 .filter(|inv| inv.company_code == company.code)
3507 .cloned()
3508 .collect();
3509
3510 if company_invoices.is_empty() {
3511 continue;
3512 }
3513
3514 let result = dunning_gen.execute_dunning_run(
3515 &company.code,
3516 as_of_date,
3517 &mut company_invoices,
3518 currency,
3519 );
3520
3521 for updated in &company_invoices {
3523 if let Some(orig) = subledger
3524 .ar_invoices
3525 .iter_mut()
3526 .find(|i| i.invoice_number == updated.invoice_number)
3527 {
3528 orig.dunning_info = updated.dunning_info.clone();
3529 }
3530 }
3531
3532 subledger.dunning_runs.push(result.dunning_run);
3533 subledger.dunning_letters.extend(result.letters);
3534 dunning_journal_entries.extend(result.journal_entries);
3536 }
3537 debug!(
3538 "Dunning runs complete: {} runs, {} letters",
3539 subledger.dunning_runs.len(),
3540 subledger.dunning_letters.len()
3541 );
3542 }
3543 }
3544
3545 self.check_resources_with_log("post-document-flows")?;
3546 } else {
3547 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3548 }
3549
3550 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3552 if !self.master_data.assets.is_empty() {
3553 debug!("Generating FA subledger records");
3554 let company_code = self
3555 .config
3556 .companies
3557 .first()
3558 .map(|c| c.code.as_str())
3559 .unwrap_or("1000");
3560 let currency = self
3561 .config
3562 .companies
3563 .first()
3564 .map(|c| c.currency.as_str())
3565 .unwrap_or("USD");
3566
3567 let mut fa_gen = datasynth_generators::FAGenerator::new(
3568 datasynth_generators::FAGeneratorConfig::default(),
3569 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3570 );
3571
3572 for asset in &self.master_data.assets {
3573 let (record, je) = fa_gen.generate_asset_acquisition(
3574 company_code,
3575 &format!("{:?}", asset.asset_class),
3576 &asset.description,
3577 asset.acquisition_date,
3578 currency,
3579 asset.cost_center.as_deref(),
3580 );
3581 subledger.fa_records.push(record);
3582 fa_journal_entries.push(je);
3583 }
3584
3585 stats.fa_subledger_count = subledger.fa_records.len();
3586 debug!(
3587 "FA subledger records generated: {} (with {} acquisition JEs)",
3588 stats.fa_subledger_count,
3589 fa_journal_entries.len()
3590 );
3591 }
3592
3593 if !self.master_data.materials.is_empty() {
3595 debug!("Generating Inventory subledger records");
3596 let first_company = self.config.companies.first();
3597 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3598 let inv_currency = first_company
3599 .map(|c| c.currency.clone())
3600 .unwrap_or_else(|| "USD".to_string());
3601
3602 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3603 datasynth_generators::InventoryGeneratorConfig::default(),
3604 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3605 inv_currency.clone(),
3606 );
3607
3608 for (i, material) in self.master_data.materials.iter().enumerate() {
3609 let plant = format!("PLANT{:02}", (i % 3) + 1);
3610 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3611 let initial_qty = rust_decimal::Decimal::from(
3612 material
3613 .safety_stock
3614 .to_string()
3615 .parse::<i64>()
3616 .unwrap_or(100),
3617 );
3618
3619 let position = inv_gen.generate_position(
3620 company_code,
3621 &plant,
3622 &storage_loc,
3623 &material.material_id,
3624 &material.description,
3625 initial_qty,
3626 Some(material.standard_cost),
3627 &inv_currency,
3628 );
3629 subledger.inventory_positions.push(position);
3630 }
3631
3632 stats.inventory_subledger_count = subledger.inventory_positions.len();
3633 debug!(
3634 "Inventory subledger records generated: {}",
3635 stats.inventory_subledger_count
3636 );
3637 }
3638
3639 if !subledger.fa_records.is_empty() {
3641 if let Ok(start_date) =
3642 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3643 {
3644 let company_code = self
3645 .config
3646 .companies
3647 .first()
3648 .map(|c| c.code.as_str())
3649 .unwrap_or("1000");
3650 let fiscal_year = start_date.year();
3651 let start_period = start_date.month();
3652 let end_period =
3653 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3654
3655 let depr_cfg = FaDepreciationScheduleConfig {
3656 fiscal_year,
3657 start_period,
3658 end_period,
3659 seed_offset: 800,
3660 };
3661 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3662 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3663 let run_count = runs.len();
3664 subledger.depreciation_runs = runs;
3665 debug!(
3666 "Depreciation runs generated: {} runs for {} periods",
3667 run_count, self.config.global.period_months
3668 );
3669 }
3670 }
3671
3672 if !subledger.inventory_positions.is_empty() {
3674 if let Ok(start_date) =
3675 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3676 {
3677 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3678 - chrono::Days::new(1);
3679
3680 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3681 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3682
3683 for company in &self.config.companies {
3684 let result = inv_val_gen.generate(
3685 &company.code,
3686 &subledger.inventory_positions,
3687 as_of_date,
3688 );
3689 subledger.inventory_valuations.push(result);
3690 }
3691 debug!(
3692 "Inventory valuations generated: {} company reports",
3693 subledger.inventory_valuations.len()
3694 );
3695 }
3696 }
3697
3698 Ok((document_flows, subledger, fa_journal_entries))
3699 }
3700
3701 #[allow(clippy::too_many_arguments)]
3703 fn phase_ocpm_events(
3704 &mut self,
3705 document_flows: &DocumentFlowSnapshot,
3706 sourcing: &SourcingSnapshot,
3707 hr: &HrSnapshot,
3708 manufacturing: &ManufacturingSnapshot,
3709 banking: &BankingSnapshot,
3710 audit: &AuditSnapshot,
3711 financial_reporting: &FinancialReportingSnapshot,
3712 stats: &mut EnhancedGenerationStatistics,
3713 ) -> SynthResult<OcpmSnapshot> {
3714 let degradation = self.check_resources()?;
3715 if degradation >= DegradationLevel::Reduced {
3716 debug!(
3717 "Phase skipped due to resource pressure (degradation: {:?})",
3718 degradation
3719 );
3720 return Ok(OcpmSnapshot::default());
3721 }
3722 if self.phase_config.generate_ocpm_events {
3723 info!("Phase 3c: Generating OCPM Events");
3724 let ocpm_snapshot = self.generate_ocpm_events(
3725 document_flows,
3726 sourcing,
3727 hr,
3728 manufacturing,
3729 banking,
3730 audit,
3731 financial_reporting,
3732 )?;
3733 stats.ocpm_event_count = ocpm_snapshot.event_count;
3734 stats.ocpm_object_count = ocpm_snapshot.object_count;
3735 stats.ocpm_case_count = ocpm_snapshot.case_count;
3736 info!(
3737 "OCPM events generated: {} events, {} objects, {} cases",
3738 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3739 );
3740 self.check_resources_with_log("post-ocpm")?;
3741 Ok(ocpm_snapshot)
3742 } else {
3743 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3744 Ok(OcpmSnapshot::default())
3745 }
3746 }
3747
3748 fn phase_journal_entries(
3750 &mut self,
3751 coa: &Arc<ChartOfAccounts>,
3752 document_flows: &DocumentFlowSnapshot,
3753 _stats: &mut EnhancedGenerationStatistics,
3754 ) -> SynthResult<Vec<JournalEntry>> {
3755 let mut entries = Vec::new();
3756
3757 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3759 debug!("Phase 4a: Generating JEs from document flows");
3760 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3761 debug!("Generated {} JEs from document flows", flow_entries.len());
3762 entries.extend(flow_entries);
3763 }
3764
3765 if self.phase_config.generate_journal_entries {
3767 info!("Phase 4: Generating Journal Entries");
3768 let je_entries = self.generate_journal_entries(coa)?;
3769 info!("Generated {} standalone journal entries", je_entries.len());
3770 entries.extend(je_entries);
3771 } else {
3772 debug!("Phase 4: Skipped (journal entry generation disabled)");
3773 }
3774
3775 if !entries.is_empty() {
3776 self.check_resources_with_log("post-journal-entries")?;
3779 }
3780
3781 Ok(entries)
3782 }
3783
3784 fn phase_anomaly_injection(
3786 &mut self,
3787 entries: &mut [JournalEntry],
3788 actions: &DegradationActions,
3789 stats: &mut EnhancedGenerationStatistics,
3790 ) -> SynthResult<AnomalyLabels> {
3791 if self.phase_config.inject_anomalies
3792 && !entries.is_empty()
3793 && !actions.skip_anomaly_injection
3794 {
3795 info!("Phase 5: Injecting Anomalies");
3796 let result = self.inject_anomalies(entries)?;
3797 stats.anomalies_injected = result.labels.len();
3798 info!("Injected {} anomalies", stats.anomalies_injected);
3799 self.check_resources_with_log("post-anomaly-injection")?;
3800 Ok(result)
3801 } else if actions.skip_anomaly_injection {
3802 warn!("Phase 5: Skipped due to resource degradation");
3803 Ok(AnomalyLabels::default())
3804 } else {
3805 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3806 Ok(AnomalyLabels::default())
3807 }
3808 }
3809
3810 fn phase_balance_validation(
3812 &mut self,
3813 entries: &[JournalEntry],
3814 ) -> SynthResult<BalanceValidationResult> {
3815 if self.phase_config.validate_balances && !entries.is_empty() {
3816 debug!("Phase 6: Validating Balances");
3817 let balance_validation = self.validate_journal_entries(entries)?;
3818 if balance_validation.is_balanced {
3819 debug!("Balance validation passed");
3820 } else {
3821 warn!(
3822 "Balance validation found {} errors",
3823 balance_validation.validation_errors.len()
3824 );
3825 }
3826 Ok(balance_validation)
3827 } else {
3828 Ok(BalanceValidationResult::default())
3829 }
3830 }
3831
3832 fn phase_data_quality_injection(
3834 &mut self,
3835 entries: &mut [JournalEntry],
3836 actions: &DegradationActions,
3837 stats: &mut EnhancedGenerationStatistics,
3838 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3839 if self.phase_config.inject_data_quality
3840 && !entries.is_empty()
3841 && !actions.skip_data_quality
3842 {
3843 info!("Phase 7: Injecting Data Quality Variations");
3844 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3845 stats.data_quality_issues = dq_stats.records_with_issues;
3846 info!("Injected {} data quality issues", stats.data_quality_issues);
3847 self.check_resources_with_log("post-data-quality")?;
3848 Ok((dq_stats, quality_issues))
3849 } else if actions.skip_data_quality {
3850 warn!("Phase 7: Skipped due to resource degradation");
3851 Ok((DataQualityStats::default(), Vec::new()))
3852 } else {
3853 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3854 Ok((DataQualityStats::default(), Vec::new()))
3855 }
3856 }
3857
3858 fn phase_period_close(
3868 &mut self,
3869 entries: &mut Vec<JournalEntry>,
3870 subledger: &SubledgerSnapshot,
3871 stats: &mut EnhancedGenerationStatistics,
3872 ) -> SynthResult<()> {
3873 if !self.phase_config.generate_period_close || entries.is_empty() {
3874 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3875 return Ok(());
3876 }
3877
3878 info!("Phase 10b: Generating period-close journal entries");
3879
3880 use datasynth_core::accounts::{
3881 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3882 };
3883 use rust_decimal::Decimal;
3884
3885 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3886 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3887 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3888 let close_date = end_date - chrono::Days::new(1);
3890
3891 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3896 .config
3897 .companies
3898 .iter()
3899 .map(|c| c.code.clone())
3900 .collect();
3901
3902 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3904 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3905
3906 let period_months = self.config.global.period_months;
3910 for asset in &subledger.fa_records {
3911 use datasynth_core::models::subledger::fa::AssetStatus;
3913 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3914 continue;
3915 }
3916 let useful_life_months = asset.useful_life_months();
3917 if useful_life_months == 0 {
3918 continue;
3920 }
3921 let salvage_value = asset.salvage_value();
3922 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3923 if depreciable_base == Decimal::ZERO {
3924 continue;
3925 }
3926 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3927 * Decimal::from(period_months))
3928 .round_dp(2);
3929 if period_depr <= Decimal::ZERO {
3930 continue;
3931 }
3932
3933 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3934 depr_header.document_type = "CL".to_string();
3935 depr_header.header_text = Some(format!(
3936 "Depreciation - {} {}",
3937 asset.asset_number, asset.description
3938 ));
3939 depr_header.created_by = "CLOSE_ENGINE".to_string();
3940 depr_header.source = TransactionSource::Automated;
3941 depr_header.business_process = Some(BusinessProcess::R2R);
3942
3943 let doc_id = depr_header.document_id;
3944 let mut depr_je = JournalEntry::new(depr_header);
3945
3946 depr_je.add_line(JournalEntryLine::debit(
3948 doc_id,
3949 1,
3950 expense_accounts::DEPRECIATION.to_string(),
3951 period_depr,
3952 ));
3953 depr_je.add_line(JournalEntryLine::credit(
3955 doc_id,
3956 2,
3957 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3958 period_depr,
3959 ));
3960
3961 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3962 close_jes.push(depr_je);
3963 }
3964
3965 if !subledger.fa_records.is_empty() {
3966 debug!(
3967 "Generated {} depreciation JEs from {} FA records",
3968 close_jes.len(),
3969 subledger.fa_records.len()
3970 );
3971 }
3972
3973 {
3977 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3978 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3979
3980 let accrual_items: &[(&str, &str, &str)] = &[
3982 ("Accrued Utilities", "6200", "2100"),
3983 ("Accrued Rent", "6300", "2100"),
3984 ("Accrued Interest", "6100", "2150"),
3985 ];
3986
3987 for company_code in &company_codes {
3988 let company_revenue: Decimal = entries
3990 .iter()
3991 .filter(|e| e.header.company_code == *company_code)
3992 .flat_map(|e| e.lines.iter())
3993 .filter(|l| l.gl_account.starts_with('4'))
3994 .map(|l| l.credit_amount - l.debit_amount)
3995 .fold(Decimal::ZERO, |acc, v| acc + v);
3996
3997 if company_revenue <= Decimal::ZERO {
3998 continue;
3999 }
4000
4001 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4003 if accrual_base <= Decimal::ZERO {
4004 continue;
4005 }
4006
4007 for (description, expense_acct, liability_acct) in accrual_items {
4008 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4009 company_code,
4010 description,
4011 accrual_base,
4012 expense_acct,
4013 liability_acct,
4014 close_date,
4015 None,
4016 );
4017 close_jes.push(accrual_je);
4018 if let Some(rev_je) = reversal_je {
4019 close_jes.push(rev_je);
4020 }
4021 }
4022 }
4023
4024 debug!(
4025 "Generated accrual entries for {} companies",
4026 company_codes.len()
4027 );
4028 }
4029
4030 for company_code in &company_codes {
4031 let mut total_revenue = Decimal::ZERO;
4036 let mut total_expenses = Decimal::ZERO;
4037
4038 for entry in entries.iter() {
4039 if entry.header.company_code != *company_code {
4040 continue;
4041 }
4042 for line in &entry.lines {
4043 let category = AccountCategory::from_account(&line.gl_account);
4044 match category {
4045 AccountCategory::Revenue => {
4046 total_revenue += line.credit_amount - line.debit_amount;
4048 }
4049 AccountCategory::Cogs
4050 | AccountCategory::OperatingExpense
4051 | AccountCategory::OtherIncomeExpense
4052 | AccountCategory::Tax => {
4053 total_expenses += line.debit_amount - line.credit_amount;
4055 }
4056 _ => {}
4057 }
4058 }
4059 }
4060
4061 let pre_tax_income = total_revenue - total_expenses;
4062
4063 if pre_tax_income == Decimal::ZERO {
4065 debug!(
4066 "Company {}: no pre-tax income, skipping period close",
4067 company_code
4068 );
4069 continue;
4070 }
4071
4072 if pre_tax_income > Decimal::ZERO {
4074 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4076
4077 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4078 tax_header.document_type = "CL".to_string();
4079 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4080 tax_header.created_by = "CLOSE_ENGINE".to_string();
4081 tax_header.source = TransactionSource::Automated;
4082 tax_header.business_process = Some(BusinessProcess::R2R);
4083
4084 let doc_id = tax_header.document_id;
4085 let mut tax_je = JournalEntry::new(tax_header);
4086
4087 tax_je.add_line(JournalEntryLine::debit(
4089 doc_id,
4090 1,
4091 tax_accounts::TAX_EXPENSE.to_string(),
4092 tax_amount,
4093 ));
4094 tax_je.add_line(JournalEntryLine::credit(
4096 doc_id,
4097 2,
4098 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4099 tax_amount,
4100 ));
4101
4102 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4103 close_jes.push(tax_je);
4104 } else {
4105 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4108 if dta_amount > Decimal::ZERO {
4109 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4110 dta_header.document_type = "CL".to_string();
4111 dta_header.header_text =
4112 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4113 dta_header.created_by = "CLOSE_ENGINE".to_string();
4114 dta_header.source = TransactionSource::Automated;
4115 dta_header.business_process = Some(BusinessProcess::R2R);
4116
4117 let doc_id = dta_header.document_id;
4118 let mut dta_je = JournalEntry::new(dta_header);
4119
4120 dta_je.add_line(JournalEntryLine::debit(
4122 doc_id,
4123 1,
4124 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4125 dta_amount,
4126 ));
4127 dta_je.add_line(JournalEntryLine::credit(
4130 doc_id,
4131 2,
4132 tax_accounts::TAX_EXPENSE.to_string(),
4133 dta_amount,
4134 ));
4135
4136 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4137 close_jes.push(dta_je);
4138 debug!(
4139 "Company {}: loss year — recognised DTA of {}",
4140 company_code, dta_amount
4141 );
4142 }
4143 }
4144
4145 let tax_provision = if pre_tax_income > Decimal::ZERO {
4151 (pre_tax_income * tax_rate).round_dp(2)
4152 } else {
4153 Decimal::ZERO
4154 };
4155 let net_income = pre_tax_income - tax_provision;
4156
4157 if net_income > Decimal::ZERO {
4158 use datasynth_generators::DividendGenerator;
4159 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4161 let currency_str = self
4162 .config
4163 .companies
4164 .iter()
4165 .find(|c| c.code == *company_code)
4166 .map(|c| c.currency.as_str())
4167 .unwrap_or("USD");
4168 let div_result = div_gen.generate(
4169 company_code,
4170 close_date,
4171 Decimal::new(1, 0), dividend_amount,
4173 currency_str,
4174 );
4175 let div_je_count = div_result.journal_entries.len();
4176 close_jes.extend(div_result.journal_entries);
4177 debug!(
4178 "Company {}: declared dividend of {} ({} JEs)",
4179 company_code, dividend_amount, div_je_count
4180 );
4181 }
4182
4183 if net_income != Decimal::ZERO {
4188 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4189 close_header.document_type = "CL".to_string();
4190 close_header.header_text =
4191 Some(format!("Income statement close - {}", company_code));
4192 close_header.created_by = "CLOSE_ENGINE".to_string();
4193 close_header.source = TransactionSource::Automated;
4194 close_header.business_process = Some(BusinessProcess::R2R);
4195
4196 let doc_id = close_header.document_id;
4197 let mut close_je = JournalEntry::new(close_header);
4198
4199 let abs_net_income = net_income.abs();
4200
4201 if net_income > Decimal::ZERO {
4202 close_je.add_line(JournalEntryLine::debit(
4204 doc_id,
4205 1,
4206 equity_accounts::INCOME_SUMMARY.to_string(),
4207 abs_net_income,
4208 ));
4209 close_je.add_line(JournalEntryLine::credit(
4210 doc_id,
4211 2,
4212 equity_accounts::RETAINED_EARNINGS.to_string(),
4213 abs_net_income,
4214 ));
4215 } else {
4216 close_je.add_line(JournalEntryLine::debit(
4218 doc_id,
4219 1,
4220 equity_accounts::RETAINED_EARNINGS.to_string(),
4221 abs_net_income,
4222 ));
4223 close_je.add_line(JournalEntryLine::credit(
4224 doc_id,
4225 2,
4226 equity_accounts::INCOME_SUMMARY.to_string(),
4227 abs_net_income,
4228 ));
4229 }
4230
4231 debug_assert!(
4232 close_je.is_balanced(),
4233 "Income statement closing JE must be balanced"
4234 );
4235 close_jes.push(close_je);
4236 }
4237 }
4238
4239 let close_count = close_jes.len();
4240 if close_count > 0 {
4241 info!("Generated {} period-close journal entries", close_count);
4242 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4243 entries.extend(close_jes);
4244 stats.period_close_je_count = close_count;
4245
4246 stats.total_entries = entries.len() as u64;
4248 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4249 } else {
4250 debug!("No period-close entries generated (no income statement activity)");
4251 }
4252
4253 Ok(())
4254 }
4255
4256 fn phase_audit_data(
4258 &mut self,
4259 entries: &[JournalEntry],
4260 stats: &mut EnhancedGenerationStatistics,
4261 ) -> SynthResult<AuditSnapshot> {
4262 if self.phase_config.generate_audit {
4263 info!("Phase 8: Generating Audit Data");
4264 let audit_snapshot = self.generate_audit_data(entries)?;
4265 stats.audit_engagement_count = audit_snapshot.engagements.len();
4266 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4267 stats.audit_evidence_count = audit_snapshot.evidence.len();
4268 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4269 stats.audit_finding_count = audit_snapshot.findings.len();
4270 stats.audit_judgment_count = audit_snapshot.judgments.len();
4271 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4272 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4273 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4274 stats.audit_sample_count = audit_snapshot.samples.len();
4275 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4276 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4277 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4278 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4279 stats.audit_related_party_transaction_count =
4280 audit_snapshot.related_party_transactions.len();
4281 info!(
4282 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4283 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4284 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4285 {} RP transactions",
4286 stats.audit_engagement_count,
4287 stats.audit_workpaper_count,
4288 stats.audit_evidence_count,
4289 stats.audit_risk_count,
4290 stats.audit_finding_count,
4291 stats.audit_judgment_count,
4292 stats.audit_confirmation_count,
4293 stats.audit_procedure_step_count,
4294 stats.audit_sample_count,
4295 stats.audit_analytical_result_count,
4296 stats.audit_ia_function_count,
4297 stats.audit_ia_report_count,
4298 stats.audit_related_party_count,
4299 stats.audit_related_party_transaction_count,
4300 );
4301 self.check_resources_with_log("post-audit")?;
4302 Ok(audit_snapshot)
4303 } else {
4304 debug!("Phase 8: Skipped (audit generation disabled)");
4305 Ok(AuditSnapshot::default())
4306 }
4307 }
4308
4309 fn phase_banking_data(
4311 &mut self,
4312 stats: &mut EnhancedGenerationStatistics,
4313 ) -> SynthResult<BankingSnapshot> {
4314 if self.phase_config.generate_banking {
4315 info!("Phase 9: Generating Banking KYC/AML Data");
4316 let banking_snapshot = self.generate_banking_data()?;
4317 stats.banking_customer_count = banking_snapshot.customers.len();
4318 stats.banking_account_count = banking_snapshot.accounts.len();
4319 stats.banking_transaction_count = banking_snapshot.transactions.len();
4320 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4321 info!(
4322 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4323 stats.banking_customer_count, stats.banking_account_count,
4324 stats.banking_transaction_count, stats.banking_suspicious_count
4325 );
4326 self.check_resources_with_log("post-banking")?;
4327 Ok(banking_snapshot)
4328 } else {
4329 debug!("Phase 9: Skipped (banking generation disabled)");
4330 Ok(BankingSnapshot::default())
4331 }
4332 }
4333
4334 fn phase_graph_export(
4336 &mut self,
4337 entries: &[JournalEntry],
4338 coa: &Arc<ChartOfAccounts>,
4339 stats: &mut EnhancedGenerationStatistics,
4340 ) -> SynthResult<GraphExportSnapshot> {
4341 if self.phase_config.generate_graph_export && !entries.is_empty() {
4342 info!("Phase 10: Exporting Accounting Network Graphs");
4343 match self.export_graphs(entries, coa, stats) {
4344 Ok(snapshot) => {
4345 info!(
4346 "Graph export complete: {} graphs ({} nodes, {} edges)",
4347 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4348 );
4349 Ok(snapshot)
4350 }
4351 Err(e) => {
4352 warn!("Phase 10: Graph export failed: {}", e);
4353 Ok(GraphExportSnapshot::default())
4354 }
4355 }
4356 } else {
4357 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4358 Ok(GraphExportSnapshot::default())
4359 }
4360 }
4361
4362 #[allow(clippy::too_many_arguments)]
4364 fn phase_hypergraph_export(
4365 &self,
4366 coa: &Arc<ChartOfAccounts>,
4367 entries: &[JournalEntry],
4368 document_flows: &DocumentFlowSnapshot,
4369 sourcing: &SourcingSnapshot,
4370 hr: &HrSnapshot,
4371 manufacturing: &ManufacturingSnapshot,
4372 banking: &BankingSnapshot,
4373 audit: &AuditSnapshot,
4374 financial_reporting: &FinancialReportingSnapshot,
4375 ocpm: &OcpmSnapshot,
4376 compliance: &ComplianceRegulationsSnapshot,
4377 stats: &mut EnhancedGenerationStatistics,
4378 ) -> SynthResult<()> {
4379 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4380 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4381 match self.export_hypergraph(
4382 coa,
4383 entries,
4384 document_flows,
4385 sourcing,
4386 hr,
4387 manufacturing,
4388 banking,
4389 audit,
4390 financial_reporting,
4391 ocpm,
4392 compliance,
4393 stats,
4394 ) {
4395 Ok(info) => {
4396 info!(
4397 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4398 info.node_count, info.edge_count, info.hyperedge_count
4399 );
4400 }
4401 Err(e) => {
4402 warn!("Phase 10b: Hypergraph export failed: {}", e);
4403 }
4404 }
4405 } else {
4406 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4407 }
4408 Ok(())
4409 }
4410
4411 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4417 if !self.config.llm.enabled {
4418 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4419 return;
4420 }
4421
4422 info!("Phase 11: Starting LLM Enrichment");
4423 let start = std::time::Instant::now();
4424
4425 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4426 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4429 let schema_provider = &self.config.llm.provider;
4430 let api_key_env = match schema_provider.as_str() {
4431 "openai" => Some("OPENAI_API_KEY"),
4432 "anthropic" => Some("ANTHROPIC_API_KEY"),
4433 "custom" => Some("LLM_API_KEY"),
4434 _ => None,
4435 };
4436 if let Some(key_env) = api_key_env {
4437 if std::env::var(key_env).is_ok() {
4438 let llm_config = datasynth_core::llm::LlmConfig {
4439 model: self.config.llm.model.clone(),
4440 api_key_env: key_env.to_string(),
4441 ..datasynth_core::llm::LlmConfig::default()
4442 };
4443 match HttpLlmProvider::new(llm_config) {
4444 Ok(p) => Arc::new(p),
4445 Err(e) => {
4446 warn!(
4447 "Failed to create HttpLlmProvider: {}; falling back to mock",
4448 e
4449 );
4450 Arc::new(MockLlmProvider::new(self.seed))
4451 }
4452 }
4453 } else {
4454 Arc::new(MockLlmProvider::new(self.seed))
4455 }
4456 } else {
4457 Arc::new(MockLlmProvider::new(self.seed))
4458 }
4459 };
4460 let enricher = VendorLlmEnricher::new(provider);
4461
4462 let industry = format!("{:?}", self.config.global.industry);
4463 let max_enrichments = self
4464 .config
4465 .llm
4466 .max_vendor_enrichments
4467 .min(self.master_data.vendors.len());
4468
4469 let mut enriched_count = 0usize;
4470 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4471 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4472 Ok(name) => {
4473 vendor.name = name;
4474 enriched_count += 1;
4475 }
4476 Err(e) => {
4477 warn!(
4478 "LLM vendor enrichment failed for {}: {}",
4479 vendor.vendor_id, e
4480 );
4481 }
4482 }
4483 }
4484
4485 enriched_count
4486 }));
4487
4488 match result {
4489 Ok(enriched_count) => {
4490 stats.llm_vendors_enriched = enriched_count;
4491 let elapsed = start.elapsed();
4492 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4493 info!(
4494 "Phase 11 complete: {} vendors enriched in {}ms",
4495 enriched_count, stats.llm_enrichment_ms
4496 );
4497 }
4498 Err(_) => {
4499 let elapsed = start.elapsed();
4500 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4501 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4502 }
4503 }
4504 }
4505
4506 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4512 if !self.config.diffusion.enabled {
4513 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4514 return;
4515 }
4516
4517 info!("Phase 12: Starting Diffusion Enhancement");
4518 let start = std::time::Instant::now();
4519
4520 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4521 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4524
4525 let diffusion_config = DiffusionConfig {
4526 n_steps: self.config.diffusion.n_steps,
4527 seed: self.seed,
4528 ..Default::default()
4529 };
4530
4531 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4532
4533 let n_samples = self.config.diffusion.sample_size;
4534 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
4536
4537 samples.len()
4538 }));
4539
4540 match result {
4541 Ok(sample_count) => {
4542 stats.diffusion_samples_generated = sample_count;
4543 let elapsed = start.elapsed();
4544 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4545 info!(
4546 "Phase 12 complete: {} diffusion samples generated in {}ms",
4547 sample_count, stats.diffusion_enhancement_ms
4548 );
4549 }
4550 Err(_) => {
4551 let elapsed = start.elapsed();
4552 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4553 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4554 }
4555 }
4556 }
4557
4558 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4565 if !self.config.causal.enabled {
4566 debug!("Phase 13: Skipped (causal generation disabled)");
4567 return;
4568 }
4569
4570 info!("Phase 13: Starting Causal Overlay");
4571 let start = std::time::Instant::now();
4572
4573 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4574 let graph = match self.config.causal.template.as_str() {
4576 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4577 _ => CausalGraph::fraud_detection_template(),
4578 };
4579
4580 let scm = StructuralCausalModel::new(graph.clone())
4581 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4582
4583 let n_samples = self.config.causal.sample_size;
4584 let samples = scm
4585 .generate(n_samples, self.seed)
4586 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4587
4588 let validation_passed = if self.config.causal.validate {
4590 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4591 if report.valid {
4592 info!(
4593 "Causal validation passed: all {} checks OK",
4594 report.checks.len()
4595 );
4596 } else {
4597 warn!(
4598 "Causal validation: {} violations detected: {:?}",
4599 report.violations.len(),
4600 report.violations
4601 );
4602 }
4603 Some(report.valid)
4604 } else {
4605 None
4606 };
4607
4608 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4609 }));
4610
4611 match result {
4612 Ok(Ok((sample_count, validation_passed))) => {
4613 stats.causal_samples_generated = sample_count;
4614 stats.causal_validation_passed = validation_passed;
4615 let elapsed = start.elapsed();
4616 stats.causal_generation_ms = elapsed.as_millis() as u64;
4617 info!(
4618 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4619 sample_count, stats.causal_generation_ms, validation_passed,
4620 );
4621 }
4622 Ok(Err(e)) => {
4623 let elapsed = start.elapsed();
4624 stats.causal_generation_ms = elapsed.as_millis() as u64;
4625 warn!("Phase 13: Causal generation failed: {}", e);
4626 }
4627 Err(_) => {
4628 let elapsed = start.elapsed();
4629 stats.causal_generation_ms = elapsed.as_millis() as u64;
4630 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4631 }
4632 }
4633 }
4634
4635 fn phase_sourcing_data(
4637 &mut self,
4638 stats: &mut EnhancedGenerationStatistics,
4639 ) -> SynthResult<SourcingSnapshot> {
4640 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4641 debug!("Phase 14: Skipped (sourcing generation disabled)");
4642 return Ok(SourcingSnapshot::default());
4643 }
4644 let degradation = self.check_resources()?;
4645 if degradation >= DegradationLevel::Reduced {
4646 debug!(
4647 "Phase skipped due to resource pressure (degradation: {:?})",
4648 degradation
4649 );
4650 return Ok(SourcingSnapshot::default());
4651 }
4652
4653 info!("Phase 14: Generating S2C Sourcing Data");
4654 let seed = self.seed;
4655
4656 let vendor_ids: Vec<String> = self
4658 .master_data
4659 .vendors
4660 .iter()
4661 .map(|v| v.vendor_id.clone())
4662 .collect();
4663 if vendor_ids.is_empty() {
4664 debug!("Phase 14: Skipped (no vendors available)");
4665 return Ok(SourcingSnapshot::default());
4666 }
4667
4668 let categories: Vec<(String, String)> = vec![
4669 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4670 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4671 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4672 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4673 ("CAT-LOG".to_string(), "Logistics".to_string()),
4674 ];
4675 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4676 .iter()
4677 .map(|(id, name)| {
4678 (
4679 id.clone(),
4680 name.clone(),
4681 rust_decimal::Decimal::from(100_000),
4682 )
4683 })
4684 .collect();
4685
4686 let company_code = self
4687 .config
4688 .companies
4689 .first()
4690 .map(|c| c.code.as_str())
4691 .unwrap_or("1000");
4692 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4693 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4694 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4695 let fiscal_year = start_date.year() as u16;
4696 let owner_ids: Vec<String> = self
4697 .master_data
4698 .employees
4699 .iter()
4700 .take(5)
4701 .map(|e| e.employee_id.clone())
4702 .collect();
4703 let owner_id = owner_ids
4704 .first()
4705 .map(std::string::String::as_str)
4706 .unwrap_or("BUYER-001");
4707
4708 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4710 let spend_analyses =
4711 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4712
4713 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4715 let sourcing_projects = if owner_ids.is_empty() {
4716 Vec::new()
4717 } else {
4718 project_gen.generate(
4719 company_code,
4720 &categories_with_spend,
4721 &owner_ids,
4722 start_date,
4723 self.config.global.period_months,
4724 )
4725 };
4726 stats.sourcing_project_count = sourcing_projects.len();
4727
4728 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4730 let mut qual_gen = QualificationGenerator::new(seed + 2);
4731 let qualifications = qual_gen.generate(
4732 company_code,
4733 &qual_vendor_ids,
4734 sourcing_projects.first().map(|p| p.project_id.as_str()),
4735 owner_id,
4736 start_date,
4737 );
4738
4739 let mut rfx_gen = RfxGenerator::new(seed + 3);
4741 let rfx_events: Vec<RfxEvent> = sourcing_projects
4742 .iter()
4743 .map(|proj| {
4744 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4745 rfx_gen.generate(
4746 company_code,
4747 &proj.project_id,
4748 &proj.category_id,
4749 &qualified_vids,
4750 owner_id,
4751 start_date,
4752 50000.0,
4753 )
4754 })
4755 .collect();
4756 stats.rfx_event_count = rfx_events.len();
4757
4758 let mut bid_gen = BidGenerator::new(seed + 4);
4760 let mut all_bids = Vec::new();
4761 for rfx in &rfx_events {
4762 let bidder_count = vendor_ids.len().clamp(2, 5);
4763 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4764 let bids = bid_gen.generate(rfx, &responding, start_date);
4765 all_bids.extend(bids);
4766 }
4767 stats.bid_count = all_bids.len();
4768
4769 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4771 let bid_evaluations: Vec<BidEvaluation> = rfx_events
4772 .iter()
4773 .map(|rfx| {
4774 let rfx_bids: Vec<SupplierBid> = all_bids
4775 .iter()
4776 .filter(|b| b.rfx_id == rfx.rfx_id)
4777 .cloned()
4778 .collect();
4779 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4780 })
4781 .collect();
4782
4783 let mut contract_gen = ContractGenerator::new(seed + 6);
4785 let contracts: Vec<ProcurementContract> = bid_evaluations
4786 .iter()
4787 .zip(rfx_events.iter())
4788 .filter_map(|(eval, rfx)| {
4789 eval.ranked_bids.first().and_then(|winner| {
4790 all_bids
4791 .iter()
4792 .find(|b| b.bid_id == winner.bid_id)
4793 .map(|winning_bid| {
4794 contract_gen.generate_from_bid(
4795 winning_bid,
4796 Some(&rfx.sourcing_project_id),
4797 &rfx.category_id,
4798 owner_id,
4799 start_date,
4800 )
4801 })
4802 })
4803 })
4804 .collect();
4805 stats.contract_count = contracts.len();
4806
4807 let mut catalog_gen = CatalogGenerator::new(seed + 7);
4809 let catalog_items = catalog_gen.generate(&contracts);
4810 stats.catalog_item_count = catalog_items.len();
4811
4812 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4814 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4815 .iter()
4816 .fold(
4817 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4818 |mut acc, c| {
4819 acc.entry(c.vendor_id.clone()).or_default().push(c);
4820 acc
4821 },
4822 )
4823 .into_iter()
4824 .collect();
4825 let scorecards = scorecard_gen.generate(
4826 company_code,
4827 &vendor_contracts,
4828 start_date,
4829 end_date,
4830 owner_id,
4831 );
4832 stats.scorecard_count = scorecards.len();
4833
4834 let mut sourcing_projects = sourcing_projects;
4837 for project in &mut sourcing_projects {
4838 project.rfx_ids = rfx_events
4840 .iter()
4841 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4842 .map(|rfx| rfx.rfx_id.clone())
4843 .collect();
4844
4845 project.contract_id = contracts
4847 .iter()
4848 .find(|c| {
4849 c.sourcing_project_id
4850 .as_deref()
4851 .is_some_and(|sp| sp == project.project_id)
4852 })
4853 .map(|c| c.contract_id.clone());
4854
4855 project.spend_analysis_id = spend_analyses
4857 .iter()
4858 .find(|sa| sa.category_id == project.category_id)
4859 .map(|sa| sa.category_id.clone());
4860 }
4861
4862 info!(
4863 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4864 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4865 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4866 );
4867 self.check_resources_with_log("post-sourcing")?;
4868
4869 Ok(SourcingSnapshot {
4870 spend_analyses,
4871 sourcing_projects,
4872 qualifications,
4873 rfx_events,
4874 bids: all_bids,
4875 bid_evaluations,
4876 contracts,
4877 catalog_items,
4878 scorecards,
4879 })
4880 }
4881
4882 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4888 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4889
4890 let parent_code = self
4891 .config
4892 .companies
4893 .first()
4894 .map(|c| c.code.clone())
4895 .unwrap_or_else(|| "PARENT".to_string());
4896
4897 let mut group = GroupStructure::new(parent_code);
4898
4899 for company in self.config.companies.iter().skip(1) {
4900 let sub =
4901 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4902 group.add_subsidiary(sub);
4903 }
4904
4905 group
4906 }
4907
4908 fn phase_intercompany(
4910 &mut self,
4911 journal_entries: &[JournalEntry],
4912 stats: &mut EnhancedGenerationStatistics,
4913 ) -> SynthResult<IntercompanySnapshot> {
4914 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4916 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4917 return Ok(IntercompanySnapshot::default());
4918 }
4919
4920 if self.config.companies.len() < 2 {
4922 debug!(
4923 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4924 self.config.companies.len()
4925 );
4926 return Ok(IntercompanySnapshot::default());
4927 }
4928
4929 info!("Phase 14b: Generating Intercompany Transactions");
4930
4931 let group_structure = self.build_group_structure();
4934 debug!(
4935 "Group structure built: parent={}, subsidiaries={}",
4936 group_structure.parent_entity,
4937 group_structure.subsidiaries.len()
4938 );
4939
4940 let seed = self.seed;
4941 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4942 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4943 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4944
4945 let parent_code = self.config.companies[0].code.clone();
4948 let mut ownership_structure =
4949 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4950
4951 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4952 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4953 format!("REL{:03}", i + 1),
4954 parent_code.clone(),
4955 company.code.clone(),
4956 rust_decimal::Decimal::from(100), start_date,
4958 );
4959 ownership_structure.add_relationship(relationship);
4960 }
4961
4962 let tp_method = match self.config.intercompany.transfer_pricing_method {
4964 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4965 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4966 }
4967 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4968 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4969 }
4970 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4971 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4972 }
4973 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4974 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4975 }
4976 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4977 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4978 }
4979 };
4980
4981 let ic_currency = self
4983 .config
4984 .companies
4985 .first()
4986 .map(|c| c.currency.clone())
4987 .unwrap_or_else(|| "USD".to_string());
4988 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4989 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4990 transfer_pricing_method: tp_method,
4991 markup_percent: rust_decimal::Decimal::from_f64_retain(
4992 self.config.intercompany.markup_percent,
4993 )
4994 .unwrap_or(rust_decimal::Decimal::from(5)),
4995 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4996 default_currency: ic_currency,
4997 ..Default::default()
4998 };
4999
5000 let mut ic_generator = datasynth_generators::ICGenerator::new(
5002 ic_gen_config,
5003 ownership_structure.clone(),
5004 seed + 50,
5005 );
5006
5007 let transactions_per_day = 3;
5010 let matched_pairs = ic_generator.generate_transactions_for_period(
5011 start_date,
5012 end_date,
5013 transactions_per_day,
5014 );
5015
5016 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5018 debug!(
5019 "Generated {} IC seller invoices, {} IC buyer POs",
5020 ic_doc_chains.seller_invoices.len(),
5021 ic_doc_chains.buyer_orders.len()
5022 );
5023
5024 let mut seller_entries = Vec::new();
5026 let mut buyer_entries = Vec::new();
5027 let fiscal_year = start_date.year();
5028
5029 for pair in &matched_pairs {
5030 let fiscal_period = pair.posting_date.month();
5031 let (seller_je, buyer_je) =
5032 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5033 seller_entries.push(seller_je);
5034 buyer_entries.push(buyer_je);
5035 }
5036
5037 let matching_config = datasynth_generators::ICMatchingConfig {
5039 base_currency: self
5040 .config
5041 .companies
5042 .first()
5043 .map(|c| c.currency.clone())
5044 .unwrap_or_else(|| "USD".to_string()),
5045 ..Default::default()
5046 };
5047 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5048 matching_engine.load_matched_pairs(&matched_pairs);
5049 let matching_result = matching_engine.run_matching(end_date);
5050
5051 let mut elimination_entries = Vec::new();
5053 if self.config.intercompany.generate_eliminations {
5054 let elim_config = datasynth_generators::EliminationConfig {
5055 consolidation_entity: "GROUP".to_string(),
5056 base_currency: self
5057 .config
5058 .companies
5059 .first()
5060 .map(|c| c.currency.clone())
5061 .unwrap_or_else(|| "USD".to_string()),
5062 ..Default::default()
5063 };
5064
5065 let mut elim_generator =
5066 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5067
5068 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5069 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5070 matching_result
5071 .matched_balances
5072 .iter()
5073 .chain(matching_result.unmatched_balances.iter())
5074 .cloned()
5075 .collect();
5076
5077 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5089 std::collections::HashMap::new();
5090 let mut equity_amounts: std::collections::HashMap<
5091 String,
5092 std::collections::HashMap<String, rust_decimal::Decimal>,
5093 > = std::collections::HashMap::new();
5094 {
5095 use rust_decimal::Decimal;
5096 let hundred = Decimal::from(100u32);
5097 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5101 for sub in &group_structure.subsidiaries {
5102 let net_assets = {
5103 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5104 if na > Decimal::ZERO {
5105 na
5106 } else {
5107 Decimal::from(1_000_000u64)
5108 }
5109 };
5110 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5112 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5113
5114 let mut eq_map = std::collections::HashMap::new();
5117 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5118 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5119 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5120 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5121 }
5122 }
5123
5124 let journal = elim_generator.generate_eliminations(
5125 &fiscal_period,
5126 end_date,
5127 &all_balances,
5128 &matched_pairs,
5129 &investment_amounts,
5130 &equity_amounts,
5131 );
5132
5133 elimination_entries = journal.entries.clone();
5134 }
5135
5136 let matched_pair_count = matched_pairs.len();
5137 let elimination_entry_count = elimination_entries.len();
5138 let match_rate = matching_result.match_rate;
5139
5140 stats.ic_matched_pair_count = matched_pair_count;
5141 stats.ic_elimination_count = elimination_entry_count;
5142 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5143
5144 info!(
5145 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5146 matched_pair_count,
5147 stats.ic_transaction_count,
5148 seller_entries.len(),
5149 buyer_entries.len(),
5150 elimination_entry_count,
5151 match_rate * 100.0
5152 );
5153 self.check_resources_with_log("post-intercompany")?;
5154
5155 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5159 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5160 use rust_decimal::Decimal;
5161
5162 let eight_pct = Decimal::new(8, 2); group_structure
5165 .subsidiaries
5166 .iter()
5167 .filter(|sub| {
5168 sub.nci_percentage > Decimal::ZERO
5169 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5170 })
5171 .map(|sub| {
5172 let net_assets_from_jes =
5176 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5177
5178 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5179 net_assets_from_jes.round_dp(2)
5180 } else {
5181 Decimal::from(1_000_000u64)
5183 };
5184
5185 let net_income = (net_assets * eight_pct).round_dp(2);
5187
5188 NciMeasurement::compute(
5189 sub.entity_code.clone(),
5190 sub.nci_percentage,
5191 net_assets,
5192 net_income,
5193 )
5194 })
5195 .collect()
5196 };
5197
5198 if !nci_measurements.is_empty() {
5199 info!(
5200 "NCI measurements: {} subsidiaries with non-controlling interests",
5201 nci_measurements.len()
5202 );
5203 }
5204
5205 Ok(IntercompanySnapshot {
5206 group_structure: Some(group_structure),
5207 matched_pairs,
5208 seller_journal_entries: seller_entries,
5209 buyer_journal_entries: buyer_entries,
5210 elimination_entries,
5211 nci_measurements,
5212 ic_document_chains: Some(ic_doc_chains),
5213 matched_pair_count,
5214 elimination_entry_count,
5215 match_rate,
5216 })
5217 }
5218
5219 fn phase_financial_reporting(
5221 &mut self,
5222 document_flows: &DocumentFlowSnapshot,
5223 journal_entries: &[JournalEntry],
5224 coa: &Arc<ChartOfAccounts>,
5225 _hr: &HrSnapshot,
5226 _audit: &AuditSnapshot,
5227 stats: &mut EnhancedGenerationStatistics,
5228 ) -> SynthResult<FinancialReportingSnapshot> {
5229 let fs_enabled = self.phase_config.generate_financial_statements
5230 || self.config.financial_reporting.enabled;
5231 let br_enabled = self.phase_config.generate_bank_reconciliation;
5232
5233 if !fs_enabled && !br_enabled {
5234 debug!("Phase 15: Skipped (financial reporting disabled)");
5235 return Ok(FinancialReportingSnapshot::default());
5236 }
5237
5238 info!("Phase 15: Generating Financial Reporting Data");
5239
5240 let seed = self.seed;
5241 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5242 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5243
5244 let mut financial_statements = Vec::new();
5245 let mut bank_reconciliations = Vec::new();
5246 let mut trial_balances = Vec::new();
5247 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5248 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5249 Vec::new();
5250 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5252 std::collections::HashMap::new();
5253 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5255 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5257
5258 if fs_enabled {
5266 let has_journal_entries = !journal_entries.is_empty();
5267
5268 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5271 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5273
5274 let elimination_entries: Vec<&JournalEntry> = journal_entries
5276 .iter()
5277 .filter(|je| je.header.is_elimination)
5278 .collect();
5279
5280 for period in 0..self.config.global.period_months {
5282 let period_start = start_date + chrono::Months::new(period);
5283 let period_end =
5284 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5285 let fiscal_year = period_end.year() as u16;
5286 let fiscal_period = period_end.month() as u8;
5287 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5288
5289 let mut entity_tb_map: std::collections::HashMap<
5292 String,
5293 std::collections::HashMap<String, rust_decimal::Decimal>,
5294 > = std::collections::HashMap::new();
5295
5296 for (company_idx, company) in self.config.companies.iter().enumerate() {
5298 let company_code = company.code.as_str();
5299 let currency = company.currency.as_str();
5300 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5303 let mut company_fs_gen =
5304 FinancialStatementGenerator::new(seed + company_seed_offset);
5305
5306 if has_journal_entries {
5307 let tb_entries = Self::build_cumulative_trial_balance(
5308 journal_entries,
5309 coa,
5310 company_code,
5311 start_date,
5312 period_end,
5313 fiscal_year,
5314 fiscal_period,
5315 );
5316
5317 let entity_cat_map =
5319 entity_tb_map.entry(company_code.to_string()).or_default();
5320 for tb_entry in &tb_entries {
5321 let net = tb_entry.debit_balance - tb_entry.credit_balance;
5322 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5323 }
5324
5325 let stmts = company_fs_gen.generate(
5326 company_code,
5327 currency,
5328 &tb_entries,
5329 period_start,
5330 period_end,
5331 fiscal_year,
5332 fiscal_period,
5333 None,
5334 "SYS-AUTOCLOSE",
5335 );
5336
5337 let mut entity_stmts = Vec::new();
5338 for stmt in stmts {
5339 if stmt.statement_type == StatementType::CashFlowStatement {
5340 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5341 let cf_items = Self::build_cash_flow_from_trial_balances(
5342 &tb_entries,
5343 None,
5344 net_income,
5345 );
5346 entity_stmts.push(FinancialStatement {
5347 cash_flow_items: cf_items,
5348 ..stmt
5349 });
5350 } else {
5351 entity_stmts.push(stmt);
5352 }
5353 }
5354
5355 financial_statements.extend(entity_stmts.clone());
5357
5358 standalone_statements
5360 .entry(company_code.to_string())
5361 .or_default()
5362 .extend(entity_stmts);
5363
5364 if company_idx == 0 {
5367 trial_balances.push(PeriodTrialBalance {
5368 fiscal_year,
5369 fiscal_period,
5370 period_start,
5371 period_end,
5372 entries: tb_entries,
5373 });
5374 }
5375 } else {
5376 let tb_entries = Self::build_trial_balance_from_entries(
5378 journal_entries,
5379 coa,
5380 company_code,
5381 fiscal_year,
5382 fiscal_period,
5383 );
5384
5385 let stmts = company_fs_gen.generate(
5386 company_code,
5387 currency,
5388 &tb_entries,
5389 period_start,
5390 period_end,
5391 fiscal_year,
5392 fiscal_period,
5393 None,
5394 "SYS-AUTOCLOSE",
5395 );
5396 financial_statements.extend(stmts.clone());
5397 standalone_statements
5398 .entry(company_code.to_string())
5399 .or_default()
5400 .extend(stmts);
5401
5402 if company_idx == 0 && !tb_entries.is_empty() {
5403 trial_balances.push(PeriodTrialBalance {
5404 fiscal_year,
5405 fiscal_period,
5406 period_start,
5407 period_end,
5408 entries: tb_entries,
5409 });
5410 }
5411 }
5412 }
5413
5414 let group_currency = self
5417 .config
5418 .companies
5419 .first()
5420 .map(|c| c.currency.as_str())
5421 .unwrap_or("USD");
5422
5423 let period_eliminations: Vec<JournalEntry> = elimination_entries
5425 .iter()
5426 .filter(|je| {
5427 je.header.fiscal_year == fiscal_year
5428 && je.header.fiscal_period == fiscal_period
5429 })
5430 .map(|je| (*je).clone())
5431 .collect();
5432
5433 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5434 &entity_tb_map,
5435 &period_eliminations,
5436 &period_label,
5437 );
5438
5439 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5442 .line_items
5443 .iter()
5444 .map(|li| {
5445 let net = li.post_elimination_total;
5446 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5447 (net, rust_decimal::Decimal::ZERO)
5448 } else {
5449 (rust_decimal::Decimal::ZERO, -net)
5450 };
5451 datasynth_generators::TrialBalanceEntry {
5452 account_code: li.account_category.clone(),
5453 account_name: li.account_category.clone(),
5454 category: li.account_category.clone(),
5455 debit_balance: debit,
5456 credit_balance: credit,
5457 }
5458 })
5459 .collect();
5460
5461 let mut cons_stmts = cons_gen.generate(
5462 "GROUP",
5463 group_currency,
5464 &cons_tb,
5465 period_start,
5466 period_end,
5467 fiscal_year,
5468 fiscal_period,
5469 None,
5470 "SYS-AUTOCLOSE",
5471 );
5472
5473 let bs_categories: &[&str] = &[
5477 "CASH",
5478 "RECEIVABLES",
5479 "INVENTORY",
5480 "FIXEDASSETS",
5481 "PAYABLES",
5482 "ACCRUEDLIABILITIES",
5483 "LONGTERMDEBT",
5484 "EQUITY",
5485 ];
5486 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5487 cons_line_items.into_iter().partition(|li| {
5488 let upper = li.label.to_uppercase();
5489 bs_categories.iter().any(|c| upper == *c)
5490 });
5491
5492 for stmt in &mut cons_stmts {
5493 stmt.is_consolidated = true;
5494 match stmt.statement_type {
5495 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5496 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5497 _ => {} }
5499 }
5500
5501 consolidated_statements.extend(cons_stmts);
5502 consolidation_schedules.push(schedule);
5503 }
5504
5505 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5511 info!(
5512 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5513 stats.financial_statement_count,
5514 consolidated_statements.len(),
5515 has_journal_entries
5516 );
5517
5518 let entity_seeds: Vec<SegmentSeed> = self
5523 .config
5524 .companies
5525 .iter()
5526 .map(|c| SegmentSeed {
5527 code: c.code.clone(),
5528 name: c.name.clone(),
5529 currency: c.currency.clone(),
5530 })
5531 .collect();
5532
5533 let mut seg_gen = SegmentGenerator::new(seed + 30);
5534
5535 for period in 0..self.config.global.period_months {
5540 let period_end =
5541 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5542 let fiscal_year = period_end.year() as u16;
5543 let fiscal_period = period_end.month() as u8;
5544 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5545
5546 use datasynth_core::models::StatementType;
5547
5548 let cons_is = consolidated_statements.iter().find(|s| {
5550 s.fiscal_year == fiscal_year
5551 && s.fiscal_period == fiscal_period
5552 && s.statement_type == StatementType::IncomeStatement
5553 });
5554 let cons_bs = consolidated_statements.iter().find(|s| {
5555 s.fiscal_year == fiscal_year
5556 && s.fiscal_period == fiscal_period
5557 && s.statement_type == StatementType::BalanceSheet
5558 });
5559
5560 let is_stmt = cons_is.or_else(|| {
5562 financial_statements.iter().find(|s| {
5563 s.fiscal_year == fiscal_year
5564 && s.fiscal_period == fiscal_period
5565 && s.statement_type == StatementType::IncomeStatement
5566 })
5567 });
5568 let bs_stmt = cons_bs.or_else(|| {
5569 financial_statements.iter().find(|s| {
5570 s.fiscal_year == fiscal_year
5571 && s.fiscal_period == fiscal_period
5572 && s.statement_type == StatementType::BalanceSheet
5573 })
5574 });
5575
5576 let consolidated_revenue = is_stmt
5577 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5578 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5580
5581 let consolidated_profit = is_stmt
5582 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5583 .map(|li| li.amount)
5584 .unwrap_or(rust_decimal::Decimal::ZERO);
5585
5586 let consolidated_assets = bs_stmt
5587 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5588 .map(|li| li.amount)
5589 .unwrap_or(rust_decimal::Decimal::ZERO);
5590
5591 if consolidated_revenue == rust_decimal::Decimal::ZERO
5593 && consolidated_assets == rust_decimal::Decimal::ZERO
5594 {
5595 continue;
5596 }
5597
5598 let group_code = self
5599 .config
5600 .companies
5601 .first()
5602 .map(|c| c.code.as_str())
5603 .unwrap_or("GROUP");
5604
5605 let total_depr: rust_decimal::Decimal = journal_entries
5608 .iter()
5609 .filter(|je| je.header.document_type == "CL")
5610 .flat_map(|je| je.lines.iter())
5611 .filter(|l| l.gl_account.starts_with("6000"))
5612 .map(|l| l.debit_amount)
5613 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5614 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5615 Some(total_depr)
5616 } else {
5617 None
5618 };
5619
5620 let (segs, recon) = seg_gen.generate(
5621 group_code,
5622 &period_label,
5623 consolidated_revenue,
5624 consolidated_profit,
5625 consolidated_assets,
5626 &entity_seeds,
5627 depr_param,
5628 );
5629 segment_reports.extend(segs);
5630 segment_reconciliations.push(recon);
5631 }
5632
5633 info!(
5634 "Segment reports generated: {} segments, {} reconciliations",
5635 segment_reports.len(),
5636 segment_reconciliations.len()
5637 );
5638 }
5639
5640 if br_enabled && !document_flows.payments.is_empty() {
5642 let employee_ids: Vec<String> = self
5643 .master_data
5644 .employees
5645 .iter()
5646 .map(|e| e.employee_id.clone())
5647 .collect();
5648 let mut br_gen =
5649 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5650
5651 for company in &self.config.companies {
5653 let company_payments: Vec<PaymentReference> = document_flows
5654 .payments
5655 .iter()
5656 .filter(|p| p.header.company_code == company.code)
5657 .map(|p| PaymentReference {
5658 id: p.header.document_id.clone(),
5659 amount: if p.is_vendor { p.amount } else { -p.amount },
5660 date: p.header.document_date,
5661 reference: p
5662 .check_number
5663 .clone()
5664 .or_else(|| p.wire_reference.clone())
5665 .unwrap_or_else(|| p.header.document_id.clone()),
5666 })
5667 .collect();
5668
5669 if company_payments.is_empty() {
5670 continue;
5671 }
5672
5673 let bank_account_id = format!("{}-MAIN", company.code);
5674
5675 for period in 0..self.config.global.period_months {
5677 let period_start = start_date + chrono::Months::new(period);
5678 let period_end =
5679 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5680
5681 let period_payments: Vec<PaymentReference> = company_payments
5682 .iter()
5683 .filter(|p| p.date >= period_start && p.date <= period_end)
5684 .cloned()
5685 .collect();
5686
5687 let recon = br_gen.generate(
5688 &company.code,
5689 &bank_account_id,
5690 period_start,
5691 period_end,
5692 &company.currency,
5693 &period_payments,
5694 );
5695 bank_reconciliations.push(recon);
5696 }
5697 }
5698 info!(
5699 "Bank reconciliations generated: {} reconciliations",
5700 bank_reconciliations.len()
5701 );
5702 }
5703
5704 stats.bank_reconciliation_count = bank_reconciliations.len();
5705 self.check_resources_with_log("post-financial-reporting")?;
5706
5707 if !trial_balances.is_empty() {
5708 info!(
5709 "Period-close trial balances captured: {} periods",
5710 trial_balances.len()
5711 );
5712 }
5713
5714 let notes_to_financial_statements = Vec::new();
5718
5719 Ok(FinancialReportingSnapshot {
5720 financial_statements,
5721 standalone_statements,
5722 consolidated_statements,
5723 consolidation_schedules,
5724 bank_reconciliations,
5725 trial_balances,
5726 segment_reports,
5727 segment_reconciliations,
5728 notes_to_financial_statements,
5729 })
5730 }
5731
5732 fn generate_notes_to_financial_statements(
5739 &self,
5740 financial_reporting: &mut FinancialReportingSnapshot,
5741 accounting_standards: &AccountingStandardsSnapshot,
5742 tax: &TaxSnapshot,
5743 hr: &HrSnapshot,
5744 audit: &AuditSnapshot,
5745 treasury: &TreasurySnapshot,
5746 ) {
5747 use datasynth_config::schema::AccountingFrameworkConfig;
5748 use datasynth_core::models::StatementType;
5749 use datasynth_generators::period_close::notes_generator::{
5750 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5751 };
5752
5753 let seed = self.seed;
5754 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5755 {
5756 Ok(d) => d,
5757 Err(_) => return,
5758 };
5759
5760 let mut notes_gen = NotesGenerator::new(seed + 4235);
5761
5762 for company in &self.config.companies {
5763 let last_period_end = start_date
5764 + chrono::Months::new(self.config.global.period_months)
5765 - chrono::Days::new(1);
5766 let fiscal_year = last_period_end.year() as u16;
5767
5768 let entity_is = financial_reporting
5770 .standalone_statements
5771 .get(&company.code)
5772 .and_then(|stmts| {
5773 stmts.iter().find(|s| {
5774 s.fiscal_year == fiscal_year
5775 && s.statement_type == StatementType::IncomeStatement
5776 })
5777 });
5778 let entity_bs = financial_reporting
5779 .standalone_statements
5780 .get(&company.code)
5781 .and_then(|stmts| {
5782 stmts.iter().find(|s| {
5783 s.fiscal_year == fiscal_year
5784 && s.statement_type == StatementType::BalanceSheet
5785 })
5786 });
5787
5788 let revenue_amount = entity_is
5790 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5791 .map(|li| li.amount);
5792 let ppe_gross = entity_bs
5793 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5794 .map(|li| li.amount);
5795
5796 let framework = match self
5797 .config
5798 .accounting_standards
5799 .framework
5800 .unwrap_or_default()
5801 {
5802 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5803 "IFRS".to_string()
5804 }
5805 _ => "US GAAP".to_string(),
5806 };
5807
5808 let (entity_dta, entity_dtl) = {
5811 let mut dta = rust_decimal::Decimal::ZERO;
5812 let mut dtl = rust_decimal::Decimal::ZERO;
5813 for rf in &tax.deferred_tax.rollforwards {
5814 if rf.entity_code == company.code {
5815 dta += rf.closing_dta;
5816 dtl += rf.closing_dtl;
5817 }
5818 }
5819 (
5820 if dta > rust_decimal::Decimal::ZERO {
5821 Some(dta)
5822 } else {
5823 None
5824 },
5825 if dtl > rust_decimal::Decimal::ZERO {
5826 Some(dtl)
5827 } else {
5828 None
5829 },
5830 )
5831 };
5832
5833 let entity_provisions: Vec<_> = accounting_standards
5836 .provisions
5837 .iter()
5838 .filter(|p| p.entity_code == company.code)
5839 .collect();
5840 let provision_count = entity_provisions.len();
5841 let total_provisions = if provision_count > 0 {
5842 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5843 } else {
5844 None
5845 };
5846
5847 let entity_pension_plan_count = hr
5849 .pension_plans
5850 .iter()
5851 .filter(|p| p.entity_code == company.code)
5852 .count();
5853 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5854 let sum: rust_decimal::Decimal = hr
5855 .pension_disclosures
5856 .iter()
5857 .filter(|d| {
5858 hr.pension_plans
5859 .iter()
5860 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5861 })
5862 .map(|d| d.net_pension_liability)
5863 .sum();
5864 let plan_assets_sum: rust_decimal::Decimal = hr
5865 .pension_plan_assets
5866 .iter()
5867 .filter(|a| {
5868 hr.pension_plans
5869 .iter()
5870 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5871 })
5872 .map(|a| a.fair_value_closing)
5873 .sum();
5874 if entity_pension_plan_count > 0 {
5875 Some(sum + plan_assets_sum)
5876 } else {
5877 None
5878 }
5879 };
5880 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5881 let sum: rust_decimal::Decimal = hr
5882 .pension_plan_assets
5883 .iter()
5884 .filter(|a| {
5885 hr.pension_plans
5886 .iter()
5887 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5888 })
5889 .map(|a| a.fair_value_closing)
5890 .sum();
5891 if entity_pension_plan_count > 0 {
5892 Some(sum)
5893 } else {
5894 None
5895 }
5896 };
5897
5898 let rp_count = audit.related_party_transactions.len();
5901 let se_count = audit.subsequent_events.len();
5902 let adjusting_count = audit
5903 .subsequent_events
5904 .iter()
5905 .filter(|e| {
5906 matches!(
5907 e.classification,
5908 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5909 )
5910 })
5911 .count();
5912
5913 let ctx = NotesGeneratorContext {
5914 entity_code: company.code.clone(),
5915 framework,
5916 period: format!("FY{}", fiscal_year),
5917 period_end: last_period_end,
5918 currency: company.currency.clone(),
5919 revenue_amount,
5920 total_ppe_gross: ppe_gross,
5921 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5922 deferred_tax_asset: entity_dta,
5924 deferred_tax_liability: entity_dtl,
5925 provision_count,
5927 total_provisions,
5928 pension_plan_count: entity_pension_plan_count,
5930 total_dbo: entity_total_dbo,
5931 total_plan_assets: entity_total_plan_assets,
5932 related_party_transaction_count: rp_count,
5934 subsequent_event_count: se_count,
5935 adjusting_event_count: adjusting_count,
5936 ..NotesGeneratorContext::default()
5937 };
5938
5939 let entity_notes = notes_gen.generate(&ctx);
5940 let standard_note_count = entity_notes.len() as u32;
5941 info!(
5942 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5943 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5944 );
5945 financial_reporting
5946 .notes_to_financial_statements
5947 .extend(entity_notes);
5948
5949 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5951 .debt_instruments
5952 .iter()
5953 .filter(|d| d.entity_id == company.code)
5954 .map(|d| {
5955 (
5956 format!("{:?}", d.instrument_type),
5957 d.principal,
5958 d.maturity_date.to_string(),
5959 )
5960 })
5961 .collect();
5962
5963 let hedge_count = treasury.hedge_relationships.len();
5964 let effective_hedges = treasury
5965 .hedge_relationships
5966 .iter()
5967 .filter(|h| h.is_effective)
5968 .count();
5969 let total_notional: rust_decimal::Decimal = treasury
5970 .hedging_instruments
5971 .iter()
5972 .map(|h| h.notional_amount)
5973 .sum();
5974 let total_fair_value: rust_decimal::Decimal = treasury
5975 .hedging_instruments
5976 .iter()
5977 .map(|h| h.fair_value)
5978 .sum();
5979
5980 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5982 .provisions
5983 .iter()
5984 .filter(|p| p.entity_code == company.code)
5985 .map(|p| p.id.as_str())
5986 .collect();
5987 let provision_movements: Vec<(
5988 String,
5989 rust_decimal::Decimal,
5990 rust_decimal::Decimal,
5991 rust_decimal::Decimal,
5992 )> = accounting_standards
5993 .provision_movements
5994 .iter()
5995 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5996 .map(|m| {
5997 let prov_type = accounting_standards
5998 .provisions
5999 .iter()
6000 .find(|p| p.id == m.provision_id)
6001 .map(|p| format!("{:?}", p.provision_type))
6002 .unwrap_or_else(|| "Unknown".to_string());
6003 (prov_type, m.opening, m.additions, m.closing)
6004 })
6005 .collect();
6006
6007 let enhanced_ctx = EnhancedNotesContext {
6008 entity_code: company.code.clone(),
6009 period: format!("FY{}", fiscal_year),
6010 currency: company.currency.clone(),
6011 finished_goods_value: rust_decimal::Decimal::ZERO,
6013 wip_value: rust_decimal::Decimal::ZERO,
6014 raw_materials_value: rust_decimal::Decimal::ZERO,
6015 debt_instruments,
6016 hedge_count,
6017 effective_hedges,
6018 total_notional,
6019 total_fair_value,
6020 provision_movements,
6021 };
6022
6023 let enhanced_notes =
6024 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6025 if !enhanced_notes.is_empty() {
6026 info!(
6027 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6028 company.code,
6029 enhanced_notes.len(),
6030 enhanced_ctx.debt_instruments.len(),
6031 hedge_count,
6032 enhanced_ctx.provision_movements.len(),
6033 );
6034 financial_reporting
6035 .notes_to_financial_statements
6036 .extend(enhanced_notes);
6037 }
6038 }
6039 }
6040
6041 fn build_trial_balance_from_entries(
6047 journal_entries: &[JournalEntry],
6048 coa: &ChartOfAccounts,
6049 company_code: &str,
6050 fiscal_year: u16,
6051 fiscal_period: u8,
6052 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6053 use rust_decimal::Decimal;
6054
6055 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6057 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6058
6059 for je in journal_entries {
6060 if je.header.company_code != company_code
6062 || je.header.fiscal_year != fiscal_year
6063 || je.header.fiscal_period != fiscal_period
6064 {
6065 continue;
6066 }
6067
6068 for line in &je.lines {
6069 let acct = &line.gl_account;
6070 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6071 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6072 }
6073 }
6074
6075 let mut all_accounts: Vec<&String> = account_debits
6077 .keys()
6078 .chain(account_credits.keys())
6079 .collect::<std::collections::HashSet<_>>()
6080 .into_iter()
6081 .collect();
6082 all_accounts.sort();
6083
6084 let mut entries = Vec::new();
6085
6086 for acct_number in all_accounts {
6087 let debit = account_debits
6088 .get(acct_number)
6089 .copied()
6090 .unwrap_or(Decimal::ZERO);
6091 let credit = account_credits
6092 .get(acct_number)
6093 .copied()
6094 .unwrap_or(Decimal::ZERO);
6095
6096 if debit.is_zero() && credit.is_zero() {
6097 continue;
6098 }
6099
6100 let account_name = coa
6102 .get_account(acct_number)
6103 .map(|gl| gl.short_description.clone())
6104 .unwrap_or_else(|| format!("Account {acct_number}"));
6105
6106 let category = Self::category_from_account_code(acct_number);
6111
6112 entries.push(datasynth_generators::TrialBalanceEntry {
6113 account_code: acct_number.clone(),
6114 account_name,
6115 category,
6116 debit_balance: debit,
6117 credit_balance: credit,
6118 });
6119 }
6120
6121 entries
6122 }
6123
6124 fn build_cumulative_trial_balance(
6131 journal_entries: &[JournalEntry],
6132 coa: &ChartOfAccounts,
6133 company_code: &str,
6134 start_date: NaiveDate,
6135 period_end: NaiveDate,
6136 fiscal_year: u16,
6137 fiscal_period: u8,
6138 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6139 use rust_decimal::Decimal;
6140
6141 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6143 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6144
6145 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6147 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6148
6149 for je in journal_entries {
6150 if je.header.company_code != company_code {
6151 continue;
6152 }
6153
6154 for line in &je.lines {
6155 let acct = &line.gl_account;
6156 let category = Self::category_from_account_code(acct);
6157 let is_bs_account = matches!(
6158 category.as_str(),
6159 "Cash"
6160 | "Receivables"
6161 | "Inventory"
6162 | "FixedAssets"
6163 | "Payables"
6164 | "AccruedLiabilities"
6165 | "LongTermDebt"
6166 | "Equity"
6167 );
6168
6169 if is_bs_account {
6170 if je.header.document_date <= period_end
6172 && je.header.document_date >= start_date
6173 {
6174 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6175 line.debit_amount;
6176 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6177 line.credit_amount;
6178 }
6179 } else {
6180 if je.header.fiscal_year == fiscal_year
6182 && je.header.fiscal_period == fiscal_period
6183 {
6184 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6185 line.debit_amount;
6186 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6187 line.credit_amount;
6188 }
6189 }
6190 }
6191 }
6192
6193 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6195 all_accounts.extend(bs_debits.keys().cloned());
6196 all_accounts.extend(bs_credits.keys().cloned());
6197 all_accounts.extend(is_debits.keys().cloned());
6198 all_accounts.extend(is_credits.keys().cloned());
6199
6200 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6201 sorted_accounts.sort();
6202
6203 let mut entries = Vec::new();
6204
6205 for acct_number in &sorted_accounts {
6206 let category = Self::category_from_account_code(acct_number);
6207 let is_bs_account = matches!(
6208 category.as_str(),
6209 "Cash"
6210 | "Receivables"
6211 | "Inventory"
6212 | "FixedAssets"
6213 | "Payables"
6214 | "AccruedLiabilities"
6215 | "LongTermDebt"
6216 | "Equity"
6217 );
6218
6219 let (debit, credit) = if is_bs_account {
6220 (
6221 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6222 bs_credits
6223 .get(acct_number)
6224 .copied()
6225 .unwrap_or(Decimal::ZERO),
6226 )
6227 } else {
6228 (
6229 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6230 is_credits
6231 .get(acct_number)
6232 .copied()
6233 .unwrap_or(Decimal::ZERO),
6234 )
6235 };
6236
6237 if debit.is_zero() && credit.is_zero() {
6238 continue;
6239 }
6240
6241 let account_name = coa
6242 .get_account(acct_number)
6243 .map(|gl| gl.short_description.clone())
6244 .unwrap_or_else(|| format!("Account {acct_number}"));
6245
6246 entries.push(datasynth_generators::TrialBalanceEntry {
6247 account_code: acct_number.clone(),
6248 account_name,
6249 category,
6250 debit_balance: debit,
6251 credit_balance: credit,
6252 });
6253 }
6254
6255 entries
6256 }
6257
6258 fn build_cash_flow_from_trial_balances(
6263 current_tb: &[datasynth_generators::TrialBalanceEntry],
6264 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6265 net_income: rust_decimal::Decimal,
6266 ) -> Vec<CashFlowItem> {
6267 use rust_decimal::Decimal;
6268
6269 let aggregate =
6271 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6272 let mut map: HashMap<String, Decimal> = HashMap::new();
6273 for entry in tb {
6274 let net = entry.debit_balance - entry.credit_balance;
6275 *map.entry(entry.category.clone()).or_default() += net;
6276 }
6277 map
6278 };
6279
6280 let current = aggregate(current_tb);
6281 let prior = prior_tb.map(aggregate);
6282
6283 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6285 *map.get(key).unwrap_or(&Decimal::ZERO)
6286 };
6287
6288 let change = |key: &str| -> Decimal {
6290 let curr = get(¤t, key);
6291 match &prior {
6292 Some(p) => curr - get(p, key),
6293 None => curr,
6294 }
6295 };
6296
6297 let fixed_asset_change = change("FixedAssets");
6300 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6301 -fixed_asset_change
6302 } else {
6303 Decimal::ZERO
6304 };
6305
6306 let ar_change = change("Receivables");
6308 let inventory_change = change("Inventory");
6309 let ap_change = change("Payables");
6311 let accrued_change = change("AccruedLiabilities");
6312
6313 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6314 + (-ap_change)
6315 + (-accrued_change);
6316
6317 let capex = if fixed_asset_change > Decimal::ZERO {
6319 -fixed_asset_change
6320 } else {
6321 Decimal::ZERO
6322 };
6323 let investing_cf = capex;
6324
6325 let debt_change = -change("LongTermDebt");
6327 let equity_change = -change("Equity");
6328 let financing_cf = debt_change + equity_change;
6329
6330 let net_change = operating_cf + investing_cf + financing_cf;
6331
6332 vec![
6333 CashFlowItem {
6334 item_code: "CF-NI".to_string(),
6335 label: "Net Income".to_string(),
6336 category: CashFlowCategory::Operating,
6337 amount: net_income,
6338 amount_prior: None,
6339 sort_order: 1,
6340 is_total: false,
6341 },
6342 CashFlowItem {
6343 item_code: "CF-DEP".to_string(),
6344 label: "Depreciation & Amortization".to_string(),
6345 category: CashFlowCategory::Operating,
6346 amount: depreciation_addback,
6347 amount_prior: None,
6348 sort_order: 2,
6349 is_total: false,
6350 },
6351 CashFlowItem {
6352 item_code: "CF-AR".to_string(),
6353 label: "Change in Accounts Receivable".to_string(),
6354 category: CashFlowCategory::Operating,
6355 amount: -ar_change,
6356 amount_prior: None,
6357 sort_order: 3,
6358 is_total: false,
6359 },
6360 CashFlowItem {
6361 item_code: "CF-AP".to_string(),
6362 label: "Change in Accounts Payable".to_string(),
6363 category: CashFlowCategory::Operating,
6364 amount: -ap_change,
6365 amount_prior: None,
6366 sort_order: 4,
6367 is_total: false,
6368 },
6369 CashFlowItem {
6370 item_code: "CF-INV".to_string(),
6371 label: "Change in Inventory".to_string(),
6372 category: CashFlowCategory::Operating,
6373 amount: -inventory_change,
6374 amount_prior: None,
6375 sort_order: 5,
6376 is_total: false,
6377 },
6378 CashFlowItem {
6379 item_code: "CF-OP".to_string(),
6380 label: "Net Cash from Operating Activities".to_string(),
6381 category: CashFlowCategory::Operating,
6382 amount: operating_cf,
6383 amount_prior: None,
6384 sort_order: 6,
6385 is_total: true,
6386 },
6387 CashFlowItem {
6388 item_code: "CF-CAPEX".to_string(),
6389 label: "Capital Expenditures".to_string(),
6390 category: CashFlowCategory::Investing,
6391 amount: capex,
6392 amount_prior: None,
6393 sort_order: 7,
6394 is_total: false,
6395 },
6396 CashFlowItem {
6397 item_code: "CF-INV-T".to_string(),
6398 label: "Net Cash from Investing Activities".to_string(),
6399 category: CashFlowCategory::Investing,
6400 amount: investing_cf,
6401 amount_prior: None,
6402 sort_order: 8,
6403 is_total: true,
6404 },
6405 CashFlowItem {
6406 item_code: "CF-DEBT".to_string(),
6407 label: "Net Borrowings / (Repayments)".to_string(),
6408 category: CashFlowCategory::Financing,
6409 amount: debt_change,
6410 amount_prior: None,
6411 sort_order: 9,
6412 is_total: false,
6413 },
6414 CashFlowItem {
6415 item_code: "CF-EQ".to_string(),
6416 label: "Equity Changes".to_string(),
6417 category: CashFlowCategory::Financing,
6418 amount: equity_change,
6419 amount_prior: None,
6420 sort_order: 10,
6421 is_total: false,
6422 },
6423 CashFlowItem {
6424 item_code: "CF-FIN-T".to_string(),
6425 label: "Net Cash from Financing Activities".to_string(),
6426 category: CashFlowCategory::Financing,
6427 amount: financing_cf,
6428 amount_prior: None,
6429 sort_order: 11,
6430 is_total: true,
6431 },
6432 CashFlowItem {
6433 item_code: "CF-NET".to_string(),
6434 label: "Net Change in Cash".to_string(),
6435 category: CashFlowCategory::Operating,
6436 amount: net_change,
6437 amount_prior: None,
6438 sort_order: 12,
6439 is_total: true,
6440 },
6441 ]
6442 }
6443
6444 fn calculate_net_income_from_tb(
6448 tb: &[datasynth_generators::TrialBalanceEntry],
6449 ) -> rust_decimal::Decimal {
6450 use rust_decimal::Decimal;
6451
6452 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6453 for entry in tb {
6454 let net = entry.debit_balance - entry.credit_balance;
6455 *aggregated.entry(entry.category.clone()).or_default() += net;
6456 }
6457
6458 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6459 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6460 let opex = *aggregated
6461 .get("OperatingExpenses")
6462 .unwrap_or(&Decimal::ZERO);
6463 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6464 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6465
6466 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6469 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6471 operating_income - tax
6472 }
6473
6474 fn category_from_account_code(code: &str) -> String {
6481 let prefix: String = code.chars().take(2).collect();
6482 match prefix.as_str() {
6483 "10" => "Cash",
6484 "11" => "Receivables",
6485 "12" | "13" | "14" => "Inventory",
6486 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6487 "20" => "Payables",
6488 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6489 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6490 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6491 "40" | "41" | "42" | "43" | "44" => "Revenue",
6492 "50" | "51" | "52" => "CostOfSales",
6493 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6494 "OperatingExpenses"
6495 }
6496 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6497 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6498 _ => "OperatingExpenses",
6499 }
6500 .to_string()
6501 }
6502
6503 fn phase_hr_data(
6505 &mut self,
6506 stats: &mut EnhancedGenerationStatistics,
6507 ) -> SynthResult<HrSnapshot> {
6508 if !self.phase_config.generate_hr {
6509 debug!("Phase 16: Skipped (HR generation disabled)");
6510 return Ok(HrSnapshot::default());
6511 }
6512
6513 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6514
6515 let seed = self.seed;
6516 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6517 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6518 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6519 let company_code = self
6520 .config
6521 .companies
6522 .first()
6523 .map(|c| c.code.as_str())
6524 .unwrap_or("1000");
6525 let currency = self
6526 .config
6527 .companies
6528 .first()
6529 .map(|c| c.currency.as_str())
6530 .unwrap_or("USD");
6531
6532 let employee_ids: Vec<String> = self
6533 .master_data
6534 .employees
6535 .iter()
6536 .map(|e| e.employee_id.clone())
6537 .collect();
6538
6539 if employee_ids.is_empty() {
6540 debug!("Phase 16: Skipped (no employees available)");
6541 return Ok(HrSnapshot::default());
6542 }
6543
6544 let cost_center_ids: Vec<String> = self
6547 .master_data
6548 .employees
6549 .iter()
6550 .filter_map(|e| e.cost_center.clone())
6551 .collect::<std::collections::HashSet<_>>()
6552 .into_iter()
6553 .collect();
6554
6555 let mut snapshot = HrSnapshot::default();
6556
6557 if self.config.hr.payroll.enabled {
6559 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6560 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6561
6562 let payroll_pack = self.primary_pack();
6564
6565 payroll_gen.set_country_pack(payroll_pack.clone());
6568
6569 let employees_with_salary: Vec<(
6570 String,
6571 rust_decimal::Decimal,
6572 Option<String>,
6573 Option<String>,
6574 )> = self
6575 .master_data
6576 .employees
6577 .iter()
6578 .map(|e| {
6579 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6582 e.base_salary
6583 } else {
6584 rust_decimal::Decimal::from(60_000)
6585 };
6586 (
6587 e.employee_id.clone(),
6588 annual, e.cost_center.clone(),
6590 e.department_id.clone(),
6591 )
6592 })
6593 .collect();
6594
6595 let change_history = &self.master_data.employee_change_history;
6598 let has_changes = !change_history.is_empty();
6599 if has_changes {
6600 debug!(
6601 "Payroll will incorporate {} employee change events",
6602 change_history.len()
6603 );
6604 }
6605
6606 for month in 0..self.config.global.period_months {
6607 let period_start = start_date + chrono::Months::new(month);
6608 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6609 let (run, items) = if has_changes {
6610 payroll_gen.generate_with_changes(
6611 company_code,
6612 &employees_with_salary,
6613 period_start,
6614 period_end,
6615 currency,
6616 change_history,
6617 )
6618 } else {
6619 payroll_gen.generate(
6620 company_code,
6621 &employees_with_salary,
6622 period_start,
6623 period_end,
6624 currency,
6625 )
6626 };
6627 snapshot.payroll_runs.push(run);
6628 snapshot.payroll_run_count += 1;
6629 snapshot.payroll_line_item_count += items.len();
6630 snapshot.payroll_line_items.extend(items);
6631 }
6632 }
6633
6634 if self.config.hr.time_attendance.enabled {
6636 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6637 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6638 let entries = time_gen.generate(
6639 &employee_ids,
6640 start_date,
6641 end_date,
6642 &self.config.hr.time_attendance,
6643 );
6644 snapshot.time_entry_count = entries.len();
6645 snapshot.time_entries = entries;
6646 }
6647
6648 if self.config.hr.expenses.enabled {
6650 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6651 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6652 expense_gen.set_country_pack(self.primary_pack().clone());
6653 let company_currency = self
6654 .config
6655 .companies
6656 .first()
6657 .map(|c| c.currency.as_str())
6658 .unwrap_or("USD");
6659 let reports = expense_gen.generate_with_currency(
6660 &employee_ids,
6661 start_date,
6662 end_date,
6663 &self.config.hr.expenses,
6664 company_currency,
6665 );
6666 snapshot.expense_report_count = reports.len();
6667 snapshot.expense_reports = reports;
6668 }
6669
6670 if self.config.hr.payroll.enabled {
6672 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6673 let employee_pairs: Vec<(String, String)> = self
6674 .master_data
6675 .employees
6676 .iter()
6677 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6678 .collect();
6679 let enrollments =
6680 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6681 snapshot.benefit_enrollment_count = enrollments.len();
6682 snapshot.benefit_enrollments = enrollments;
6683 }
6684
6685 if self.phase_config.generate_hr {
6687 let entity_name = self
6688 .config
6689 .companies
6690 .first()
6691 .map(|c| c.name.as_str())
6692 .unwrap_or("Entity");
6693 let period_months = self.config.global.period_months;
6694 let period_label = {
6695 let y = start_date.year();
6696 let m = start_date.month();
6697 if period_months >= 12 {
6698 format!("FY{y}")
6699 } else {
6700 format!("{y}-{m:02}")
6701 }
6702 };
6703 let reporting_date =
6704 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6705
6706 let avg_salary: Option<rust_decimal::Decimal> = {
6711 let employee_count = employee_ids.len();
6712 if self.config.hr.payroll.enabled
6713 && employee_count > 0
6714 && !snapshot.payroll_runs.is_empty()
6715 {
6716 let total_gross: rust_decimal::Decimal = snapshot
6718 .payroll_runs
6719 .iter()
6720 .filter(|r| r.company_code == company_code)
6721 .map(|r| r.total_gross)
6722 .sum();
6723 if total_gross > rust_decimal::Decimal::ZERO {
6724 let annual_total = if period_months > 0 && period_months < 12 {
6726 total_gross * rust_decimal::Decimal::from(12u32)
6727 / rust_decimal::Decimal::from(period_months)
6728 } else {
6729 total_gross
6730 };
6731 Some(
6732 (annual_total / rust_decimal::Decimal::from(employee_count))
6733 .round_dp(2),
6734 )
6735 } else {
6736 None
6737 }
6738 } else {
6739 None
6740 }
6741 };
6742
6743 let mut pension_gen =
6744 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6745 let pension_snap = pension_gen.generate(
6746 company_code,
6747 entity_name,
6748 &period_label,
6749 reporting_date,
6750 employee_ids.len(),
6751 currency,
6752 avg_salary,
6753 period_months,
6754 );
6755 snapshot.pension_plan_count = pension_snap.plans.len();
6756 snapshot.pension_plans = pension_snap.plans;
6757 snapshot.pension_obligations = pension_snap.obligations;
6758 snapshot.pension_plan_assets = pension_snap.plan_assets;
6759 snapshot.pension_disclosures = pension_snap.disclosures;
6760 snapshot.pension_journal_entries = pension_snap.journal_entries;
6765 }
6766
6767 if self.phase_config.generate_hr && !employee_ids.is_empty() {
6769 let period_months = self.config.global.period_months;
6770 let period_label = {
6771 let y = start_date.year();
6772 let m = start_date.month();
6773 if period_months >= 12 {
6774 format!("FY{y}")
6775 } else {
6776 format!("{y}-{m:02}")
6777 }
6778 };
6779 let reporting_date =
6780 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6781
6782 let mut stock_comp_gen =
6783 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6784 let stock_snap = stock_comp_gen.generate(
6785 company_code,
6786 &employee_ids,
6787 start_date,
6788 &period_label,
6789 reporting_date,
6790 currency,
6791 );
6792 snapshot.stock_grant_count = stock_snap.grants.len();
6793 snapshot.stock_grants = stock_snap.grants;
6794 snapshot.stock_comp_expenses = stock_snap.expenses;
6795 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6796 }
6797
6798 stats.payroll_run_count = snapshot.payroll_run_count;
6799 stats.time_entry_count = snapshot.time_entry_count;
6800 stats.expense_report_count = snapshot.expense_report_count;
6801 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6802 stats.pension_plan_count = snapshot.pension_plan_count;
6803 stats.stock_grant_count = snapshot.stock_grant_count;
6804
6805 info!(
6806 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6807 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6808 snapshot.time_entry_count, snapshot.expense_report_count,
6809 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6810 snapshot.stock_grant_count
6811 );
6812 self.check_resources_with_log("post-hr")?;
6813
6814 Ok(snapshot)
6815 }
6816
6817 fn phase_accounting_standards(
6819 &mut self,
6820 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6821 journal_entries: &[JournalEntry],
6822 stats: &mut EnhancedGenerationStatistics,
6823 ) -> SynthResult<AccountingStandardsSnapshot> {
6824 if !self.phase_config.generate_accounting_standards {
6825 debug!("Phase 17: Skipped (accounting standards generation disabled)");
6826 return Ok(AccountingStandardsSnapshot::default());
6827 }
6828 info!("Phase 17: Generating Accounting Standards Data");
6829
6830 let seed = self.seed;
6831 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6832 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6833 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6834 let company_code = self
6835 .config
6836 .companies
6837 .first()
6838 .map(|c| c.code.as_str())
6839 .unwrap_or("1000");
6840 let currency = self
6841 .config
6842 .companies
6843 .first()
6844 .map(|c| c.currency.as_str())
6845 .unwrap_or("USD");
6846
6847 let framework = match self.config.accounting_standards.framework {
6852 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6853 datasynth_standards::framework::AccountingFramework::UsGaap
6854 }
6855 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6856 datasynth_standards::framework::AccountingFramework::Ifrs
6857 }
6858 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6859 datasynth_standards::framework::AccountingFramework::DualReporting
6860 }
6861 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6862 datasynth_standards::framework::AccountingFramework::FrenchGaap
6863 }
6864 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6865 datasynth_standards::framework::AccountingFramework::GermanGaap
6866 }
6867 None => {
6868 let pack = self.primary_pack();
6870 let pack_fw = pack.accounting.framework.as_str();
6871 match pack_fw {
6872 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6873 "dual_reporting" => {
6874 datasynth_standards::framework::AccountingFramework::DualReporting
6875 }
6876 "french_gaap" => {
6877 datasynth_standards::framework::AccountingFramework::FrenchGaap
6878 }
6879 "german_gaap" | "hgb" => {
6880 datasynth_standards::framework::AccountingFramework::GermanGaap
6881 }
6882 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6884 }
6885 }
6886 };
6887
6888 let mut snapshot = AccountingStandardsSnapshot::default();
6889
6890 if self.config.accounting_standards.revenue_recognition.enabled {
6892 let customer_ids: Vec<String> = self
6893 .master_data
6894 .customers
6895 .iter()
6896 .map(|c| c.customer_id.clone())
6897 .collect();
6898
6899 if !customer_ids.is_empty() {
6900 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6901 let contracts = rev_gen.generate(
6902 company_code,
6903 &customer_ids,
6904 start_date,
6905 end_date,
6906 currency,
6907 &self.config.accounting_standards.revenue_recognition,
6908 framework,
6909 );
6910 snapshot.revenue_contract_count = contracts.len();
6911 snapshot.contracts = contracts;
6912 }
6913 }
6914
6915 if self.config.accounting_standards.impairment.enabled {
6917 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6918 .master_data
6919 .assets
6920 .iter()
6921 .map(|a| {
6922 (
6923 a.asset_id.clone(),
6924 a.description.clone(),
6925 a.acquisition_cost,
6926 )
6927 })
6928 .collect();
6929
6930 if !asset_data.is_empty() {
6931 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6932 let tests = imp_gen.generate(
6933 company_code,
6934 &asset_data,
6935 end_date,
6936 &self.config.accounting_standards.impairment,
6937 framework,
6938 );
6939 snapshot.impairment_test_count = tests.len();
6940 snapshot.impairment_tests = tests;
6941 }
6942 }
6943
6944 if self
6946 .config
6947 .accounting_standards
6948 .business_combinations
6949 .enabled
6950 {
6951 let bc_config = &self.config.accounting_standards.business_combinations;
6952 let framework_str = match framework {
6953 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6954 _ => "US_GAAP",
6955 };
6956 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6957 let bc_snap = bc_gen.generate(
6958 company_code,
6959 currency,
6960 start_date,
6961 end_date,
6962 bc_config.acquisition_count,
6963 framework_str,
6964 );
6965 snapshot.business_combination_count = bc_snap.combinations.len();
6966 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6967 snapshot.business_combinations = bc_snap.combinations;
6968 }
6969
6970 if self
6972 .config
6973 .accounting_standards
6974 .expected_credit_loss
6975 .enabled
6976 {
6977 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6978 let framework_str = match framework {
6979 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6980 _ => "ASC_326",
6981 };
6982
6983 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6986
6987 let mut ecl_gen = EclGenerator::new(seed + 43);
6988
6989 let bucket_exposures: Vec<(
6991 datasynth_core::models::subledger::ar::AgingBucket,
6992 rust_decimal::Decimal,
6993 )> = if ar_aging_reports.is_empty() {
6994 use datasynth_core::models::subledger::ar::AgingBucket;
6996 vec![
6997 (
6998 AgingBucket::Current,
6999 rust_decimal::Decimal::from(500_000_u32),
7000 ),
7001 (
7002 AgingBucket::Days1To30,
7003 rust_decimal::Decimal::from(120_000_u32),
7004 ),
7005 (
7006 AgingBucket::Days31To60,
7007 rust_decimal::Decimal::from(45_000_u32),
7008 ),
7009 (
7010 AgingBucket::Days61To90,
7011 rust_decimal::Decimal::from(15_000_u32),
7012 ),
7013 (
7014 AgingBucket::Over90Days,
7015 rust_decimal::Decimal::from(8_000_u32),
7016 ),
7017 ]
7018 } else {
7019 use datasynth_core::models::subledger::ar::AgingBucket;
7020 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7022 std::collections::HashMap::new();
7023 for report in ar_aging_reports {
7024 for (bucket, amount) in &report.bucket_totals {
7025 *totals.entry(*bucket).or_default() += amount;
7026 }
7027 }
7028 AgingBucket::all()
7029 .into_iter()
7030 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7031 .collect()
7032 };
7033
7034 let ecl_snap = ecl_gen.generate(
7035 company_code,
7036 end_date,
7037 &bucket_exposures,
7038 ecl_config,
7039 &period_label,
7040 framework_str,
7041 );
7042
7043 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7044 snapshot.ecl_models = ecl_snap.ecl_models;
7045 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7046 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7047 }
7048
7049 {
7051 let framework_str = match framework {
7052 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7053 _ => "US_GAAP",
7054 };
7055
7056 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7061 .max(rust_decimal::Decimal::from(100_000_u32));
7062
7063 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7064
7065 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7066 let prov_snap = prov_gen.generate(
7067 company_code,
7068 currency,
7069 revenue_proxy,
7070 end_date,
7071 &period_label,
7072 framework_str,
7073 None, );
7075
7076 snapshot.provision_count = prov_snap.provisions.len();
7077 snapshot.provisions = prov_snap.provisions;
7078 snapshot.provision_movements = prov_snap.movements;
7079 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7080 snapshot.provision_journal_entries = prov_snap.journal_entries;
7081 }
7082
7083 {
7087 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7088
7089 let presentation_currency = self
7090 .config
7091 .global
7092 .presentation_currency
7093 .clone()
7094 .unwrap_or_else(|| self.config.global.group_currency.clone());
7095
7096 let mut rate_table = FxRateTable::new(&presentation_currency);
7099
7100 let base_rates = base_rates_usd();
7104 for (ccy, rate) in &base_rates {
7105 rate_table.add_rate(FxRate::new(
7106 ccy,
7107 "USD",
7108 RateType::Closing,
7109 end_date,
7110 *rate,
7111 "SYNTHETIC",
7112 ));
7113 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7116 rate_table.add_rate(FxRate::new(
7117 ccy,
7118 "USD",
7119 RateType::Average,
7120 end_date,
7121 avg,
7122 "SYNTHETIC",
7123 ));
7124 }
7125
7126 let mut translation_results = Vec::new();
7127 for company in &self.config.companies {
7128 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7131 .max(rust_decimal::Decimal::from(100_000_u32));
7132
7133 let func_ccy = company
7134 .functional_currency
7135 .clone()
7136 .unwrap_or_else(|| company.currency.clone());
7137
7138 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7139 &company.code,
7140 &func_ccy,
7141 &presentation_currency,
7142 &ias21_period_label,
7143 end_date,
7144 company_revenue,
7145 &rate_table,
7146 );
7147 translation_results.push(result);
7148 }
7149
7150 snapshot.currency_translation_count = translation_results.len();
7151 snapshot.currency_translation_results = translation_results;
7152 }
7153
7154 stats.revenue_contract_count = snapshot.revenue_contract_count;
7155 stats.impairment_test_count = snapshot.impairment_test_count;
7156 stats.business_combination_count = snapshot.business_combination_count;
7157 stats.ecl_model_count = snapshot.ecl_model_count;
7158 stats.provision_count = snapshot.provision_count;
7159
7160 info!(
7161 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
7162 snapshot.revenue_contract_count,
7163 snapshot.impairment_test_count,
7164 snapshot.business_combination_count,
7165 snapshot.ecl_model_count,
7166 snapshot.provision_count,
7167 snapshot.currency_translation_count
7168 );
7169 self.check_resources_with_log("post-accounting-standards")?;
7170
7171 Ok(snapshot)
7172 }
7173
7174 fn phase_manufacturing(
7176 &mut self,
7177 stats: &mut EnhancedGenerationStatistics,
7178 ) -> SynthResult<ManufacturingSnapshot> {
7179 if !self.phase_config.generate_manufacturing {
7180 debug!("Phase 18: Skipped (manufacturing generation disabled)");
7181 return Ok(ManufacturingSnapshot::default());
7182 }
7183 info!("Phase 18: Generating Manufacturing Data");
7184
7185 let seed = self.seed;
7186 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7187 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7188 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7189 let company_code = self
7190 .config
7191 .companies
7192 .first()
7193 .map(|c| c.code.as_str())
7194 .unwrap_or("1000");
7195
7196 let material_data: Vec<(String, String)> = self
7197 .master_data
7198 .materials
7199 .iter()
7200 .map(|m| (m.material_id.clone(), m.description.clone()))
7201 .collect();
7202
7203 if material_data.is_empty() {
7204 debug!("Phase 18: Skipped (no materials available)");
7205 return Ok(ManufacturingSnapshot::default());
7206 }
7207
7208 let mut snapshot = ManufacturingSnapshot::default();
7209
7210 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7212 let production_orders = prod_gen.generate(
7213 company_code,
7214 &material_data,
7215 start_date,
7216 end_date,
7217 &self.config.manufacturing.production_orders,
7218 &self.config.manufacturing.costing,
7219 &self.config.manufacturing.routing,
7220 );
7221 snapshot.production_order_count = production_orders.len();
7222
7223 let inspection_data: Vec<(String, String, String)> = production_orders
7225 .iter()
7226 .map(|po| {
7227 (
7228 po.order_id.clone(),
7229 po.material_id.clone(),
7230 po.material_description.clone(),
7231 )
7232 })
7233 .collect();
7234
7235 snapshot.production_orders = production_orders;
7236
7237 if !inspection_data.is_empty() {
7238 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7239 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7240 snapshot.quality_inspection_count = inspections.len();
7241 snapshot.quality_inspections = inspections;
7242 }
7243
7244 let storage_locations: Vec<(String, String)> = material_data
7246 .iter()
7247 .enumerate()
7248 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7249 .collect();
7250
7251 let employee_ids: Vec<String> = self
7252 .master_data
7253 .employees
7254 .iter()
7255 .map(|e| e.employee_id.clone())
7256 .collect();
7257 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7258 .with_employee_pool(employee_ids);
7259 let mut cycle_count_total = 0usize;
7260 for month in 0..self.config.global.period_months {
7261 let count_date = start_date + chrono::Months::new(month);
7262 let items_per_count = storage_locations.len().clamp(10, 50);
7263 let cc = cc_gen.generate(
7264 company_code,
7265 &storage_locations,
7266 count_date,
7267 items_per_count,
7268 );
7269 snapshot.cycle_counts.push(cc);
7270 cycle_count_total += 1;
7271 }
7272 snapshot.cycle_count_count = cycle_count_total;
7273
7274 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7276 let bom_components = bom_gen.generate(company_code, &material_data);
7277 snapshot.bom_component_count = bom_components.len();
7278 snapshot.bom_components = bom_components;
7279
7280 let currency = self
7282 .config
7283 .companies
7284 .first()
7285 .map(|c| c.currency.as_str())
7286 .unwrap_or("USD");
7287 let production_order_ids: Vec<String> = snapshot
7288 .production_orders
7289 .iter()
7290 .map(|po| po.order_id.clone())
7291 .collect();
7292 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7293 let inventory_movements = inv_mov_gen.generate_with_production_orders(
7294 company_code,
7295 &material_data,
7296 start_date,
7297 end_date,
7298 2,
7299 currency,
7300 &production_order_ids,
7301 );
7302 snapshot.inventory_movement_count = inventory_movements.len();
7303 snapshot.inventory_movements = inventory_movements;
7304
7305 stats.production_order_count = snapshot.production_order_count;
7306 stats.quality_inspection_count = snapshot.quality_inspection_count;
7307 stats.cycle_count_count = snapshot.cycle_count_count;
7308 stats.bom_component_count = snapshot.bom_component_count;
7309 stats.inventory_movement_count = snapshot.inventory_movement_count;
7310
7311 info!(
7312 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7313 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7314 snapshot.bom_component_count, snapshot.inventory_movement_count
7315 );
7316 self.check_resources_with_log("post-manufacturing")?;
7317
7318 Ok(snapshot)
7319 }
7320
7321 fn phase_sales_kpi_budgets(
7323 &mut self,
7324 coa: &Arc<ChartOfAccounts>,
7325 financial_reporting: &FinancialReportingSnapshot,
7326 stats: &mut EnhancedGenerationStatistics,
7327 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7328 if !self.phase_config.generate_sales_kpi_budgets {
7329 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7330 return Ok(SalesKpiBudgetsSnapshot::default());
7331 }
7332 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7333
7334 let seed = self.seed;
7335 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7336 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7337 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7338 let company_code = self
7339 .config
7340 .companies
7341 .first()
7342 .map(|c| c.code.as_str())
7343 .unwrap_or("1000");
7344
7345 let mut snapshot = SalesKpiBudgetsSnapshot::default();
7346
7347 if self.config.sales_quotes.enabled {
7349 let customer_data: Vec<(String, String)> = self
7350 .master_data
7351 .customers
7352 .iter()
7353 .map(|c| (c.customer_id.clone(), c.name.clone()))
7354 .collect();
7355 let material_data: Vec<(String, String)> = self
7356 .master_data
7357 .materials
7358 .iter()
7359 .map(|m| (m.material_id.clone(), m.description.clone()))
7360 .collect();
7361
7362 if !customer_data.is_empty() && !material_data.is_empty() {
7363 let employee_ids: Vec<String> = self
7364 .master_data
7365 .employees
7366 .iter()
7367 .map(|e| e.employee_id.clone())
7368 .collect();
7369 let customer_ids: Vec<String> = self
7370 .master_data
7371 .customers
7372 .iter()
7373 .map(|c| c.customer_id.clone())
7374 .collect();
7375 let company_currency = self
7376 .config
7377 .companies
7378 .first()
7379 .map(|c| c.currency.as_str())
7380 .unwrap_or("USD");
7381
7382 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7383 .with_pools(employee_ids, customer_ids);
7384 let quotes = quote_gen.generate_with_currency(
7385 company_code,
7386 &customer_data,
7387 &material_data,
7388 start_date,
7389 end_date,
7390 &self.config.sales_quotes,
7391 company_currency,
7392 );
7393 snapshot.sales_quote_count = quotes.len();
7394 snapshot.sales_quotes = quotes;
7395 }
7396 }
7397
7398 if self.config.financial_reporting.management_kpis.enabled {
7400 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7401 let mut kpis = kpi_gen.generate(
7402 company_code,
7403 start_date,
7404 end_date,
7405 &self.config.financial_reporting.management_kpis,
7406 );
7407
7408 {
7410 use rust_decimal::Decimal;
7411
7412 if let Some(income_stmt) =
7413 financial_reporting.financial_statements.iter().find(|fs| {
7414 fs.statement_type == StatementType::IncomeStatement
7415 && fs.company_code == company_code
7416 })
7417 {
7418 let total_revenue: Decimal = income_stmt
7420 .line_items
7421 .iter()
7422 .filter(|li| li.section.contains("Revenue") && !li.is_total)
7423 .map(|li| li.amount)
7424 .sum();
7425 let total_cogs: Decimal = income_stmt
7426 .line_items
7427 .iter()
7428 .filter(|li| {
7429 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7430 && !li.is_total
7431 })
7432 .map(|li| li.amount.abs())
7433 .sum();
7434 let total_opex: Decimal = income_stmt
7435 .line_items
7436 .iter()
7437 .filter(|li| {
7438 li.section.contains("Expense")
7439 && !li.is_total
7440 && !li.section.contains("Cost")
7441 })
7442 .map(|li| li.amount.abs())
7443 .sum();
7444
7445 if total_revenue > Decimal::ZERO {
7446 let hundred = Decimal::from(100);
7447 let gross_margin_pct =
7448 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7449 let operating_income = total_revenue - total_cogs - total_opex;
7450 let op_margin_pct =
7451 (operating_income * hundred / total_revenue).round_dp(2);
7452
7453 for kpi in &mut kpis {
7455 if kpi.name == "Gross Margin" {
7456 kpi.value = gross_margin_pct;
7457 } else if kpi.name == "Operating Margin" {
7458 kpi.value = op_margin_pct;
7459 }
7460 }
7461 }
7462 }
7463
7464 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7466 fs.statement_type == StatementType::BalanceSheet
7467 && fs.company_code == company_code
7468 }) {
7469 let current_assets: Decimal = bs
7470 .line_items
7471 .iter()
7472 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7473 .map(|li| li.amount)
7474 .sum();
7475 let current_liabilities: Decimal = bs
7476 .line_items
7477 .iter()
7478 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7479 .map(|li| li.amount.abs())
7480 .sum();
7481
7482 if current_liabilities > Decimal::ZERO {
7483 let current_ratio = (current_assets / current_liabilities).round_dp(2);
7484 for kpi in &mut kpis {
7485 if kpi.name == "Current Ratio" {
7486 kpi.value = current_ratio;
7487 }
7488 }
7489 }
7490 }
7491 }
7492
7493 snapshot.kpi_count = kpis.len();
7494 snapshot.kpis = kpis;
7495 }
7496
7497 if self.config.financial_reporting.budgets.enabled {
7499 let account_data: Vec<(String, String)> = coa
7500 .accounts
7501 .iter()
7502 .map(|a| (a.account_number.clone(), a.short_description.clone()))
7503 .collect();
7504
7505 if !account_data.is_empty() {
7506 let fiscal_year = start_date.year() as u32;
7507 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7508 let budget = budget_gen.generate(
7509 company_code,
7510 fiscal_year,
7511 &account_data,
7512 &self.config.financial_reporting.budgets,
7513 );
7514 snapshot.budget_line_count = budget.line_items.len();
7515 snapshot.budgets.push(budget);
7516 }
7517 }
7518
7519 stats.sales_quote_count = snapshot.sales_quote_count;
7520 stats.kpi_count = snapshot.kpi_count;
7521 stats.budget_line_count = snapshot.budget_line_count;
7522
7523 info!(
7524 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7525 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7526 );
7527 self.check_resources_with_log("post-sales-kpi-budgets")?;
7528
7529 Ok(snapshot)
7530 }
7531
7532 fn compute_pre_tax_income(
7539 company_code: &str,
7540 journal_entries: &[JournalEntry],
7541 ) -> rust_decimal::Decimal {
7542 use datasynth_core::accounts::AccountCategory;
7543 use rust_decimal::Decimal;
7544
7545 let mut total_revenue = Decimal::ZERO;
7546 let mut total_expenses = Decimal::ZERO;
7547
7548 for je in journal_entries {
7549 if je.header.company_code != company_code {
7550 continue;
7551 }
7552 for line in &je.lines {
7553 let cat = AccountCategory::from_account(&line.gl_account);
7554 match cat {
7555 AccountCategory::Revenue => {
7556 total_revenue += line.credit_amount - line.debit_amount;
7557 }
7558 AccountCategory::Cogs
7559 | AccountCategory::OperatingExpense
7560 | AccountCategory::OtherIncomeExpense => {
7561 total_expenses += line.debit_amount - line.credit_amount;
7562 }
7563 _ => {}
7564 }
7565 }
7566 }
7567
7568 let pti = (total_revenue - total_expenses).round_dp(2);
7569 if pti == rust_decimal::Decimal::ZERO {
7570 rust_decimal::Decimal::from(1_000_000u32)
7573 } else {
7574 pti
7575 }
7576 }
7577
7578 fn phase_tax_generation(
7580 &mut self,
7581 document_flows: &DocumentFlowSnapshot,
7582 journal_entries: &[JournalEntry],
7583 stats: &mut EnhancedGenerationStatistics,
7584 ) -> SynthResult<TaxSnapshot> {
7585 if !self.phase_config.generate_tax {
7586 debug!("Phase 20: Skipped (tax generation disabled)");
7587 return Ok(TaxSnapshot::default());
7588 }
7589 info!("Phase 20: Generating Tax Data");
7590
7591 let seed = self.seed;
7592 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7593 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7594 let fiscal_year = start_date.year();
7595 let company_code = self
7596 .config
7597 .companies
7598 .first()
7599 .map(|c| c.code.as_str())
7600 .unwrap_or("1000");
7601
7602 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7603 seed + 370,
7604 self.config.tax.clone(),
7605 );
7606
7607 let pack = self.primary_pack().clone();
7608 let (jurisdictions, codes) =
7609 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7610
7611 let mut provisions = Vec::new();
7613 if self.config.tax.provisions.enabled {
7614 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7615 for company in &self.config.companies {
7616 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7617 let statutory_rate = rust_decimal::Decimal::new(
7618 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7619 2,
7620 );
7621 let provision = provision_gen.generate(
7622 &company.code,
7623 start_date,
7624 pre_tax_income,
7625 statutory_rate,
7626 );
7627 provisions.push(provision);
7628 }
7629 }
7630
7631 let mut tax_lines = Vec::new();
7633 if !codes.is_empty() {
7634 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7635 datasynth_generators::TaxLineGeneratorConfig::default(),
7636 codes.clone(),
7637 seed + 372,
7638 );
7639
7640 let buyer_country = self
7643 .config
7644 .companies
7645 .first()
7646 .map(|c| c.country.as_str())
7647 .unwrap_or("US");
7648 for vi in &document_flows.vendor_invoices {
7649 let lines = tax_line_gen.generate_for_document(
7650 datasynth_core::models::TaxableDocumentType::VendorInvoice,
7651 &vi.header.document_id,
7652 buyer_country, buyer_country,
7654 vi.payable_amount,
7655 vi.header.document_date,
7656 None,
7657 );
7658 tax_lines.extend(lines);
7659 }
7660
7661 for ci in &document_flows.customer_invoices {
7663 let lines = tax_line_gen.generate_for_document(
7664 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7665 &ci.header.document_id,
7666 buyer_country, buyer_country,
7668 ci.total_gross_amount,
7669 ci.header.document_date,
7670 None,
7671 );
7672 tax_lines.extend(lines);
7673 }
7674 }
7675
7676 let deferred_tax = {
7678 let companies: Vec<(&str, &str)> = self
7679 .config
7680 .companies
7681 .iter()
7682 .map(|c| (c.code.as_str(), c.country.as_str()))
7683 .collect();
7684 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7685 deferred_gen.generate(&companies, start_date, journal_entries)
7686 };
7687
7688 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7691 std::collections::HashMap::new();
7692 for vi in &document_flows.vendor_invoices {
7693 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7694 }
7695 for ci in &document_flows.customer_invoices {
7696 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7697 }
7698
7699 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7701 let tax_posting_journal_entries = if !tax_lines.is_empty() {
7702 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7703 &tax_lines,
7704 company_code,
7705 &doc_dates,
7706 end_date,
7707 );
7708 debug!("Generated {} tax posting JEs", jes.len());
7709 jes
7710 } else {
7711 Vec::new()
7712 };
7713
7714 let snapshot = TaxSnapshot {
7715 jurisdiction_count: jurisdictions.len(),
7716 code_count: codes.len(),
7717 jurisdictions,
7718 codes,
7719 tax_provisions: provisions,
7720 tax_lines,
7721 tax_returns: Vec::new(),
7722 withholding_records: Vec::new(),
7723 tax_anomaly_labels: Vec::new(),
7724 deferred_tax,
7725 tax_posting_journal_entries,
7726 };
7727
7728 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7729 stats.tax_code_count = snapshot.code_count;
7730 stats.tax_provision_count = snapshot.tax_provisions.len();
7731 stats.tax_line_count = snapshot.tax_lines.len();
7732
7733 info!(
7734 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7735 snapshot.jurisdiction_count,
7736 snapshot.code_count,
7737 snapshot.tax_provisions.len(),
7738 snapshot.deferred_tax.temporary_differences.len(),
7739 snapshot.deferred_tax.journal_entries.len(),
7740 snapshot.tax_posting_journal_entries.len(),
7741 );
7742 self.check_resources_with_log("post-tax")?;
7743
7744 Ok(snapshot)
7745 }
7746
7747 fn phase_esg_generation(
7749 &mut self,
7750 document_flows: &DocumentFlowSnapshot,
7751 manufacturing: &ManufacturingSnapshot,
7752 stats: &mut EnhancedGenerationStatistics,
7753 ) -> SynthResult<EsgSnapshot> {
7754 if !self.phase_config.generate_esg {
7755 debug!("Phase 21: Skipped (ESG generation disabled)");
7756 return Ok(EsgSnapshot::default());
7757 }
7758 let degradation = self.check_resources()?;
7759 if degradation >= DegradationLevel::Reduced {
7760 debug!(
7761 "Phase skipped due to resource pressure (degradation: {:?})",
7762 degradation
7763 );
7764 return Ok(EsgSnapshot::default());
7765 }
7766 info!("Phase 21: Generating ESG Data");
7767
7768 let seed = self.seed;
7769 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7770 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7771 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7772 let entity_id = self
7773 .config
7774 .companies
7775 .first()
7776 .map(|c| c.code.as_str())
7777 .unwrap_or("1000");
7778
7779 let esg_cfg = &self.config.esg;
7780 let mut snapshot = EsgSnapshot::default();
7781
7782 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7784 esg_cfg.environmental.energy.clone(),
7785 seed + 80,
7786 );
7787 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7788
7789 let facility_count = esg_cfg.environmental.energy.facility_count;
7791 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7792 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7793
7794 let mut waste_gen = datasynth_generators::WasteGenerator::new(
7796 seed + 82,
7797 esg_cfg.environmental.waste.diversion_target,
7798 facility_count,
7799 );
7800 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7801
7802 let mut emission_gen =
7804 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7805
7806 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7808 .iter()
7809 .map(|e| datasynth_generators::EnergyInput {
7810 facility_id: e.facility_id.clone(),
7811 energy_type: match e.energy_source {
7812 EnergySourceType::NaturalGas => {
7813 datasynth_generators::EnergyInputType::NaturalGas
7814 }
7815 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7816 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7817 _ => datasynth_generators::EnergyInputType::Electricity,
7818 },
7819 consumption_kwh: e.consumption_kwh,
7820 period: e.period,
7821 })
7822 .collect();
7823
7824 if !manufacturing.production_orders.is_empty() {
7826 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7827 &manufacturing.production_orders,
7828 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
7831 if !mfg_energy.is_empty() {
7832 info!(
7833 "ESG: {} energy inputs derived from {} production orders",
7834 mfg_energy.len(),
7835 manufacturing.production_orders.len(),
7836 );
7837 energy_inputs.extend(mfg_energy);
7838 }
7839 }
7840
7841 let mut emissions = Vec::new();
7842 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7843 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7844
7845 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7847 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7848 for payment in &document_flows.payments {
7849 if payment.is_vendor {
7850 *totals
7851 .entry(payment.business_partner_id.clone())
7852 .or_default() += payment.amount;
7853 }
7854 }
7855 totals
7856 };
7857 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7858 .master_data
7859 .vendors
7860 .iter()
7861 .map(|v| {
7862 let spend = vendor_payment_totals
7863 .get(&v.vendor_id)
7864 .copied()
7865 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7866 datasynth_generators::VendorSpendInput {
7867 vendor_id: v.vendor_id.clone(),
7868 category: format!("{:?}", v.vendor_type).to_lowercase(),
7869 spend,
7870 country: v.country.clone(),
7871 }
7872 })
7873 .collect();
7874 if !vendor_spend.is_empty() {
7875 emissions.extend(emission_gen.generate_scope3_purchased_goods(
7876 entity_id,
7877 &vendor_spend,
7878 start_date,
7879 end_date,
7880 ));
7881 }
7882
7883 let headcount = self.master_data.employees.len() as u32;
7885 if headcount > 0 {
7886 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7887 emissions.extend(emission_gen.generate_scope3_business_travel(
7888 entity_id,
7889 travel_spend,
7890 start_date,
7891 ));
7892 emissions
7893 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7894 }
7895
7896 snapshot.emission_count = emissions.len();
7897 snapshot.emissions = emissions;
7898 snapshot.energy = energy_records;
7899
7900 let mut workforce_gen =
7902 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7903 let total_headcount = headcount.max(100);
7904 snapshot.diversity =
7905 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7906 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7907
7908 if !self.master_data.employees.is_empty() {
7910 let hr_diversity = workforce_gen.generate_diversity_from_employees(
7911 entity_id,
7912 &self.master_data.employees,
7913 end_date,
7914 );
7915 if !hr_diversity.is_empty() {
7916 info!(
7917 "ESG: {} diversity metrics derived from {} actual employees",
7918 hr_diversity.len(),
7919 self.master_data.employees.len(),
7920 );
7921 snapshot.diversity.extend(hr_diversity);
7922 }
7923 }
7924
7925 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7926 entity_id,
7927 facility_count,
7928 start_date,
7929 end_date,
7930 );
7931
7932 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
7935 entity_id,
7936 &snapshot.safety_incidents,
7937 total_hours,
7938 start_date,
7939 );
7940 snapshot.safety_metrics = vec![safety_metric];
7941
7942 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7944 seed + 85,
7945 esg_cfg.governance.board_size,
7946 esg_cfg.governance.independence_target,
7947 );
7948 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7949
7950 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7952 esg_cfg.supply_chain_esg.clone(),
7953 seed + 86,
7954 );
7955 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7956 .master_data
7957 .vendors
7958 .iter()
7959 .map(|v| datasynth_generators::VendorInput {
7960 vendor_id: v.vendor_id.clone(),
7961 country: v.country.clone(),
7962 industry: format!("{:?}", v.vendor_type).to_lowercase(),
7963 quality_score: None,
7964 })
7965 .collect();
7966 snapshot.supplier_assessments =
7967 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7968
7969 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7971 seed + 87,
7972 esg_cfg.reporting.clone(),
7973 esg_cfg.climate_scenarios.clone(),
7974 );
7975 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7976 snapshot.disclosures = disclosure_gen.generate_disclosures(
7977 entity_id,
7978 &snapshot.materiality,
7979 start_date,
7980 end_date,
7981 );
7982 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7983 snapshot.disclosure_count = snapshot.disclosures.len();
7984
7985 if esg_cfg.anomaly_rate > 0.0 {
7987 let mut anomaly_injector =
7988 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7989 let mut labels = Vec::new();
7990 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7991 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7992 labels.extend(
7993 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7994 );
7995 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7996 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7997 snapshot.anomaly_labels = labels;
7998 }
7999
8000 stats.esg_emission_count = snapshot.emission_count;
8001 stats.esg_disclosure_count = snapshot.disclosure_count;
8002
8003 info!(
8004 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8005 snapshot.emission_count,
8006 snapshot.disclosure_count,
8007 snapshot.supplier_assessments.len()
8008 );
8009 self.check_resources_with_log("post-esg")?;
8010
8011 Ok(snapshot)
8012 }
8013
8014 fn phase_treasury_data(
8016 &mut self,
8017 document_flows: &DocumentFlowSnapshot,
8018 subledger: &SubledgerSnapshot,
8019 intercompany: &IntercompanySnapshot,
8020 stats: &mut EnhancedGenerationStatistics,
8021 ) -> SynthResult<TreasurySnapshot> {
8022 if !self.phase_config.generate_treasury {
8023 debug!("Phase 22: Skipped (treasury generation disabled)");
8024 return Ok(TreasurySnapshot::default());
8025 }
8026 let degradation = self.check_resources()?;
8027 if degradation >= DegradationLevel::Reduced {
8028 debug!(
8029 "Phase skipped due to resource pressure (degradation: {:?})",
8030 degradation
8031 );
8032 return Ok(TreasurySnapshot::default());
8033 }
8034 info!("Phase 22: Generating Treasury Data");
8035
8036 let seed = self.seed;
8037 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8038 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8039 let currency = self
8040 .config
8041 .companies
8042 .first()
8043 .map(|c| c.currency.as_str())
8044 .unwrap_or("USD");
8045 let entity_id = self
8046 .config
8047 .companies
8048 .first()
8049 .map(|c| c.code.as_str())
8050 .unwrap_or("1000");
8051
8052 let mut snapshot = TreasurySnapshot::default();
8053
8054 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8056 self.config.treasury.debt.clone(),
8057 seed + 90,
8058 );
8059 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8060
8061 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8063 self.config.treasury.hedging.clone(),
8064 seed + 91,
8065 );
8066 for debt in &snapshot.debt_instruments {
8067 if debt.rate_type == InterestRateType::Variable {
8068 let swap = hedge_gen.generate_ir_swap(
8069 currency,
8070 debt.principal,
8071 debt.origination_date,
8072 debt.maturity_date,
8073 );
8074 snapshot.hedging_instruments.push(swap);
8075 }
8076 }
8077
8078 {
8081 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8082 for payment in &document_flows.payments {
8083 if payment.currency != currency {
8084 let entry = fx_map
8085 .entry(payment.currency.clone())
8086 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8087 entry.0 += payment.amount;
8088 if payment.header.document_date > entry.1 {
8090 entry.1 = payment.header.document_date;
8091 }
8092 }
8093 }
8094 if !fx_map.is_empty() {
8095 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8096 .into_iter()
8097 .map(|(foreign_ccy, (net_amount, settlement_date))| {
8098 datasynth_generators::treasury::FxExposure {
8099 currency_pair: format!("{foreign_ccy}/{currency}"),
8100 foreign_currency: foreign_ccy,
8101 net_amount,
8102 settlement_date,
8103 description: "AP payment FX exposure".to_string(),
8104 }
8105 })
8106 .collect();
8107 let (fx_instruments, fx_relationships) =
8108 hedge_gen.generate(start_date, &fx_exposures);
8109 snapshot.hedging_instruments.extend(fx_instruments);
8110 snapshot.hedge_relationships.extend(fx_relationships);
8111 }
8112 }
8113
8114 if self.config.treasury.anomaly_rate > 0.0 {
8116 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8117 seed + 92,
8118 self.config.treasury.anomaly_rate,
8119 );
8120 let mut labels = Vec::new();
8121 labels.extend(
8122 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8123 );
8124 snapshot.treasury_anomaly_labels = labels;
8125 }
8126
8127 if self.config.treasury.cash_positioning.enabled {
8129 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8130
8131 for payment in &document_flows.payments {
8133 cash_flows.push(datasynth_generators::treasury::CashFlow {
8134 date: payment.header.document_date,
8135 account_id: format!("{entity_id}-MAIN"),
8136 amount: payment.amount,
8137 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8138 });
8139 }
8140
8141 for chain in &document_flows.o2c_chains {
8143 if let Some(ref receipt) = chain.customer_receipt {
8144 cash_flows.push(datasynth_generators::treasury::CashFlow {
8145 date: receipt.header.document_date,
8146 account_id: format!("{entity_id}-MAIN"),
8147 amount: receipt.amount,
8148 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8149 });
8150 }
8151 for receipt in &chain.remainder_receipts {
8153 cash_flows.push(datasynth_generators::treasury::CashFlow {
8154 date: receipt.header.document_date,
8155 account_id: format!("{entity_id}-MAIN"),
8156 amount: receipt.amount,
8157 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8158 });
8159 }
8160 }
8161
8162 if !cash_flows.is_empty() {
8163 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8164 self.config.treasury.cash_positioning.clone(),
8165 seed + 93,
8166 );
8167 let account_id = format!("{entity_id}-MAIN");
8168 snapshot.cash_positions = cash_gen.generate(
8169 entity_id,
8170 &account_id,
8171 currency,
8172 &cash_flows,
8173 start_date,
8174 start_date + chrono::Months::new(self.config.global.period_months),
8175 rust_decimal::Decimal::new(1_000_000, 0), );
8177 }
8178 }
8179
8180 if self.config.treasury.cash_forecasting.enabled {
8182 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8183
8184 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8186 .ar_invoices
8187 .iter()
8188 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8189 .map(|inv| {
8190 let days_past_due = if inv.due_date < end_date {
8191 (end_date - inv.due_date).num_days().max(0) as u32
8192 } else {
8193 0
8194 };
8195 datasynth_generators::treasury::ArAgingItem {
8196 expected_date: inv.due_date,
8197 amount: inv.amount_remaining,
8198 days_past_due,
8199 document_id: inv.invoice_number.clone(),
8200 }
8201 })
8202 .collect();
8203
8204 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8206 .ap_invoices
8207 .iter()
8208 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8209 .map(|inv| datasynth_generators::treasury::ApAgingItem {
8210 payment_date: inv.due_date,
8211 amount: inv.amount_remaining,
8212 document_id: inv.invoice_number.clone(),
8213 })
8214 .collect();
8215
8216 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8217 self.config.treasury.cash_forecasting.clone(),
8218 seed + 94,
8219 );
8220 let forecast = forecast_gen.generate(
8221 entity_id,
8222 currency,
8223 end_date,
8224 &ar_items,
8225 &ap_items,
8226 &[], );
8228 snapshot.cash_forecasts.push(forecast);
8229 }
8230
8231 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8233 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8234 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8235 self.config.treasury.cash_pooling.clone(),
8236 seed + 95,
8237 );
8238
8239 let account_ids: Vec<String> = snapshot
8241 .cash_positions
8242 .iter()
8243 .map(|cp| cp.bank_account_id.clone())
8244 .collect::<std::collections::HashSet<_>>()
8245 .into_iter()
8246 .collect();
8247
8248 if let Some(pool) =
8249 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8250 {
8251 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8253 for cp in &snapshot.cash_positions {
8254 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8255 }
8256
8257 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8258 latest_balances
8259 .into_iter()
8260 .filter(|(id, _)| pool.participant_accounts.contains(id))
8261 .map(
8262 |(id, balance)| datasynth_generators::treasury::AccountBalance {
8263 account_id: id,
8264 balance,
8265 },
8266 )
8267 .collect();
8268
8269 let sweeps =
8270 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8271 snapshot.cash_pool_sweeps = sweeps;
8272 snapshot.cash_pools.push(pool);
8273 }
8274 }
8275
8276 if self.config.treasury.bank_guarantees.enabled {
8278 let vendor_names: Vec<String> = self
8279 .master_data
8280 .vendors
8281 .iter()
8282 .map(|v| v.name.clone())
8283 .collect();
8284 if !vendor_names.is_empty() {
8285 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8286 self.config.treasury.bank_guarantees.clone(),
8287 seed + 96,
8288 );
8289 snapshot.bank_guarantees =
8290 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8291 }
8292 }
8293
8294 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8296 let entity_ids: Vec<String> = self
8297 .config
8298 .companies
8299 .iter()
8300 .map(|c| c.code.clone())
8301 .collect();
8302 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8303 .matched_pairs
8304 .iter()
8305 .map(|mp| {
8306 (
8307 mp.seller_company.clone(),
8308 mp.buyer_company.clone(),
8309 mp.amount,
8310 )
8311 })
8312 .collect();
8313 if entity_ids.len() >= 2 {
8314 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8315 self.config.treasury.netting.clone(),
8316 seed + 97,
8317 );
8318 snapshot.netting_runs = netting_gen.generate(
8319 &entity_ids,
8320 currency,
8321 start_date,
8322 self.config.global.period_months,
8323 &ic_amounts,
8324 );
8325 }
8326 }
8327
8328 {
8330 use datasynth_generators::treasury::TreasuryAccounting;
8331
8332 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8333 let mut treasury_jes = Vec::new();
8334
8335 if !snapshot.debt_instruments.is_empty() {
8337 let debt_jes =
8338 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8339 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8340 treasury_jes.extend(debt_jes);
8341 }
8342
8343 if !snapshot.hedging_instruments.is_empty() {
8345 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8346 &snapshot.hedging_instruments,
8347 &snapshot.hedge_relationships,
8348 end_date,
8349 entity_id,
8350 );
8351 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8352 treasury_jes.extend(hedge_jes);
8353 }
8354
8355 if !snapshot.cash_pool_sweeps.is_empty() {
8357 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8358 &snapshot.cash_pool_sweeps,
8359 entity_id,
8360 );
8361 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8362 treasury_jes.extend(sweep_jes);
8363 }
8364
8365 if !treasury_jes.is_empty() {
8366 debug!("Total treasury journal entries: {}", treasury_jes.len());
8367 }
8368 snapshot.journal_entries = treasury_jes;
8369 }
8370
8371 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8372 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8373 stats.cash_position_count = snapshot.cash_positions.len();
8374 stats.cash_forecast_count = snapshot.cash_forecasts.len();
8375 stats.cash_pool_count = snapshot.cash_pools.len();
8376
8377 info!(
8378 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8379 snapshot.debt_instruments.len(),
8380 snapshot.hedging_instruments.len(),
8381 snapshot.cash_positions.len(),
8382 snapshot.cash_forecasts.len(),
8383 snapshot.cash_pools.len(),
8384 snapshot.bank_guarantees.len(),
8385 snapshot.netting_runs.len(),
8386 snapshot.journal_entries.len(),
8387 );
8388 self.check_resources_with_log("post-treasury")?;
8389
8390 Ok(snapshot)
8391 }
8392
8393 fn phase_project_accounting(
8395 &mut self,
8396 document_flows: &DocumentFlowSnapshot,
8397 hr: &HrSnapshot,
8398 stats: &mut EnhancedGenerationStatistics,
8399 ) -> SynthResult<ProjectAccountingSnapshot> {
8400 if !self.phase_config.generate_project_accounting {
8401 debug!("Phase 23: Skipped (project accounting disabled)");
8402 return Ok(ProjectAccountingSnapshot::default());
8403 }
8404 let degradation = self.check_resources()?;
8405 if degradation >= DegradationLevel::Reduced {
8406 debug!(
8407 "Phase skipped due to resource pressure (degradation: {:?})",
8408 degradation
8409 );
8410 return Ok(ProjectAccountingSnapshot::default());
8411 }
8412 info!("Phase 23: Generating Project Accounting Data");
8413
8414 let seed = self.seed;
8415 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8416 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8417 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8418 let company_code = self
8419 .config
8420 .companies
8421 .first()
8422 .map(|c| c.code.as_str())
8423 .unwrap_or("1000");
8424
8425 let mut snapshot = ProjectAccountingSnapshot::default();
8426
8427 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8429 self.config.project_accounting.clone(),
8430 seed + 95,
8431 );
8432 let pool = project_gen.generate(company_code, start_date, end_date);
8433 snapshot.projects = pool.projects.clone();
8434
8435 {
8437 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8438 Vec::new();
8439
8440 for te in &hr.time_entries {
8442 let total_hours = te.hours_regular + te.hours_overtime;
8443 if total_hours > 0.0 {
8444 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8445 id: te.entry_id.clone(),
8446 entity_id: company_code.to_string(),
8447 date: te.date,
8448 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8449 .unwrap_or(rust_decimal::Decimal::ZERO),
8450 source_type: CostSourceType::TimeEntry,
8451 hours: Some(
8452 rust_decimal::Decimal::from_f64_retain(total_hours)
8453 .unwrap_or(rust_decimal::Decimal::ZERO),
8454 ),
8455 });
8456 }
8457 }
8458
8459 for er in &hr.expense_reports {
8461 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8462 id: er.report_id.clone(),
8463 entity_id: company_code.to_string(),
8464 date: er.submission_date,
8465 amount: er.total_amount,
8466 source_type: CostSourceType::ExpenseReport,
8467 hours: None,
8468 });
8469 }
8470
8471 for po in &document_flows.purchase_orders {
8473 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8474 id: po.header.document_id.clone(),
8475 entity_id: company_code.to_string(),
8476 date: po.header.document_date,
8477 amount: po.total_net_amount,
8478 source_type: CostSourceType::PurchaseOrder,
8479 hours: None,
8480 });
8481 }
8482
8483 for vi in &document_flows.vendor_invoices {
8485 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8486 id: vi.header.document_id.clone(),
8487 entity_id: company_code.to_string(),
8488 date: vi.header.document_date,
8489 amount: vi.payable_amount,
8490 source_type: CostSourceType::VendorInvoice,
8491 hours: None,
8492 });
8493 }
8494
8495 if !source_docs.is_empty() && !pool.projects.is_empty() {
8496 let mut cost_gen =
8497 datasynth_generators::project_accounting::ProjectCostGenerator::new(
8498 self.config.project_accounting.cost_allocation.clone(),
8499 seed + 99,
8500 );
8501 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8502 }
8503 }
8504
8505 if self.config.project_accounting.change_orders.enabled {
8507 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8508 self.config.project_accounting.change_orders.clone(),
8509 seed + 96,
8510 );
8511 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8512 }
8513
8514 if self.config.project_accounting.milestones.enabled {
8516 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8517 self.config.project_accounting.milestones.clone(),
8518 seed + 97,
8519 );
8520 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8521 }
8522
8523 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8525 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8526 self.config.project_accounting.earned_value.clone(),
8527 seed + 98,
8528 );
8529 snapshot.earned_value_metrics =
8530 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8531 }
8532
8533 if self.config.project_accounting.revenue_recognition.enabled
8535 && !snapshot.projects.is_empty()
8536 && !snapshot.cost_lines.is_empty()
8537 {
8538 use datasynth_generators::project_accounting::RevenueGenerator;
8539 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8540 let avg_contract_value =
8541 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8542 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8543
8544 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8547 snapshot
8548 .projects
8549 .iter()
8550 .filter(|p| {
8551 matches!(
8552 p.project_type,
8553 datasynth_core::models::ProjectType::Customer
8554 )
8555 })
8556 .map(|p| {
8557 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8558 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8559 } else {
8561 avg_contract_value
8562 };
8563 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8565 })
8566 .collect();
8567
8568 if !contract_values.is_empty() {
8569 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8570 snapshot.revenue_records = rev_gen.generate(
8571 &snapshot.projects,
8572 &snapshot.cost_lines,
8573 &contract_values,
8574 start_date,
8575 end_date,
8576 );
8577 debug!(
8578 "Generated {} revenue recognition records for {} customer projects",
8579 snapshot.revenue_records.len(),
8580 contract_values.len()
8581 );
8582 }
8583 }
8584
8585 stats.project_count = snapshot.projects.len();
8586 stats.project_change_order_count = snapshot.change_orders.len();
8587 stats.project_cost_line_count = snapshot.cost_lines.len();
8588
8589 info!(
8590 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8591 snapshot.projects.len(),
8592 snapshot.change_orders.len(),
8593 snapshot.milestones.len(),
8594 snapshot.earned_value_metrics.len()
8595 );
8596 self.check_resources_with_log("post-project-accounting")?;
8597
8598 Ok(snapshot)
8599 }
8600
8601 fn phase_evolution_events(
8603 &mut self,
8604 stats: &mut EnhancedGenerationStatistics,
8605 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8606 if !self.phase_config.generate_evolution_events {
8607 debug!("Phase 24: Skipped (evolution events disabled)");
8608 return Ok((Vec::new(), Vec::new()));
8609 }
8610 info!("Phase 24: Generating Process Evolution + Organizational Events");
8611
8612 let seed = self.seed;
8613 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8614 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8615 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8616
8617 let mut proc_gen =
8619 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8620 seed + 100,
8621 );
8622 let process_events = proc_gen.generate_events(start_date, end_date);
8623
8624 let company_codes: Vec<String> = self
8626 .config
8627 .companies
8628 .iter()
8629 .map(|c| c.code.clone())
8630 .collect();
8631 let mut org_gen =
8632 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8633 seed + 101,
8634 );
8635 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8636
8637 stats.process_evolution_event_count = process_events.len();
8638 stats.organizational_event_count = org_events.len();
8639
8640 info!(
8641 "Evolution events generated: {} process evolution, {} organizational",
8642 process_events.len(),
8643 org_events.len()
8644 );
8645 self.check_resources_with_log("post-evolution-events")?;
8646
8647 Ok((process_events, org_events))
8648 }
8649
8650 fn phase_disruption_events(
8653 &self,
8654 stats: &mut EnhancedGenerationStatistics,
8655 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8656 if !self.config.organizational_events.enabled {
8657 debug!("Phase 24b: Skipped (organizational events disabled)");
8658 return Ok(Vec::new());
8659 }
8660 info!("Phase 24b: Generating Disruption Events");
8661
8662 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8663 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8664 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8665
8666 let company_codes: Vec<String> = self
8667 .config
8668 .companies
8669 .iter()
8670 .map(|c| c.code.clone())
8671 .collect();
8672
8673 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8674 let events = gen.generate(start_date, end_date, &company_codes);
8675
8676 stats.disruption_event_count = events.len();
8677 info!("Disruption events generated: {} events", events.len());
8678 self.check_resources_with_log("post-disruption-events")?;
8679
8680 Ok(events)
8681 }
8682
8683 fn phase_counterfactuals(
8690 &self,
8691 journal_entries: &[JournalEntry],
8692 stats: &mut EnhancedGenerationStatistics,
8693 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8694 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8695 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8696 return Ok(Vec::new());
8697 }
8698 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8699
8700 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8701
8702 let mut gen = CounterfactualGenerator::new(self.seed + 110);
8703
8704 let specs = [
8706 CounterfactualSpec::ScaleAmount { factor: 2.5 },
8707 CounterfactualSpec::ShiftDate { days: -14 },
8708 CounterfactualSpec::SelfApprove,
8709 CounterfactualSpec::SplitTransaction { split_count: 3 },
8710 ];
8711
8712 let pairs: Vec<_> = journal_entries
8713 .iter()
8714 .enumerate()
8715 .map(|(i, je)| {
8716 let spec = &specs[i % specs.len()];
8717 gen.generate(je, spec)
8718 })
8719 .collect();
8720
8721 stats.counterfactual_pair_count = pairs.len();
8722 info!(
8723 "Counterfactual pairs generated: {} pairs from {} journal entries",
8724 pairs.len(),
8725 journal_entries.len()
8726 );
8727 self.check_resources_with_log("post-counterfactuals")?;
8728
8729 Ok(pairs)
8730 }
8731
8732 fn phase_red_flags(
8739 &self,
8740 anomaly_labels: &AnomalyLabels,
8741 document_flows: &DocumentFlowSnapshot,
8742 stats: &mut EnhancedGenerationStatistics,
8743 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8744 if !self.config.fraud.enabled {
8745 debug!("Phase 26: Skipped (fraud generation disabled)");
8746 return Ok(Vec::new());
8747 }
8748 info!("Phase 26: Generating Fraud Red-Flag Indicators");
8749
8750 use datasynth_generators::fraud::RedFlagGenerator;
8751
8752 let generator = RedFlagGenerator::new();
8753 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8754
8755 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8757 .labels
8758 .iter()
8759 .filter(|label| label.anomaly_type.is_intentional())
8760 .map(|label| label.document_id.as_str())
8761 .collect();
8762
8763 let mut flags = Vec::new();
8764
8765 for chain in &document_flows.p2p_chains {
8767 let doc_id = &chain.purchase_order.header.document_id;
8768 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8769 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8770 }
8771
8772 for chain in &document_flows.o2c_chains {
8774 let doc_id = &chain.sales_order.header.document_id;
8775 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8776 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8777 }
8778
8779 stats.red_flag_count = flags.len();
8780 info!(
8781 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8782 flags.len(),
8783 document_flows.p2p_chains.len(),
8784 document_flows.o2c_chains.len(),
8785 fraud_doc_ids.len()
8786 );
8787 self.check_resources_with_log("post-red-flags")?;
8788
8789 Ok(flags)
8790 }
8791
8792 fn phase_collusion_rings(
8798 &mut self,
8799 stats: &mut EnhancedGenerationStatistics,
8800 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8801 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8802 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8803 return Ok(Vec::new());
8804 }
8805 info!("Phase 26b: Generating Collusion Rings");
8806
8807 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8808 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8809 let months = self.config.global.period_months;
8810
8811 let employee_ids: Vec<String> = self
8812 .master_data
8813 .employees
8814 .iter()
8815 .map(|e| e.employee_id.clone())
8816 .collect();
8817 let vendor_ids: Vec<String> = self
8818 .master_data
8819 .vendors
8820 .iter()
8821 .map(|v| v.vendor_id.clone())
8822 .collect();
8823
8824 let mut generator =
8825 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8826 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8827
8828 stats.collusion_ring_count = rings.len();
8829 info!(
8830 "Collusion rings generated: {} rings, total members: {}",
8831 rings.len(),
8832 rings
8833 .iter()
8834 .map(datasynth_generators::fraud::CollusionRing::size)
8835 .sum::<usize>()
8836 );
8837 self.check_resources_with_log("post-collusion-rings")?;
8838
8839 Ok(rings)
8840 }
8841
8842 fn phase_temporal_attributes(
8847 &mut self,
8848 stats: &mut EnhancedGenerationStatistics,
8849 ) -> SynthResult<
8850 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8851 > {
8852 if !self.config.temporal_attributes.enabled {
8853 debug!("Phase 27: Skipped (temporal attributes disabled)");
8854 return Ok(Vec::new());
8855 }
8856 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8857
8858 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8859 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8860
8861 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8865 || self.config.temporal_attributes.enabled;
8866 let temporal_config = {
8867 let ta = &self.config.temporal_attributes;
8868 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8869 .enabled(ta.enabled)
8870 .closed_probability(ta.valid_time.closed_probability)
8871 .avg_validity_days(ta.valid_time.avg_validity_days)
8872 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8873 .with_version_chains(if generate_version_chains {
8874 ta.avg_versions_per_entity
8875 } else {
8876 1.0
8877 })
8878 .build()
8879 };
8880 let temporal_config = if self
8882 .config
8883 .temporal_attributes
8884 .transaction_time
8885 .allow_backdating
8886 {
8887 let mut c = temporal_config;
8888 c.transaction_time.allow_backdating = true;
8889 c.transaction_time.backdating_probability = self
8890 .config
8891 .temporal_attributes
8892 .transaction_time
8893 .backdating_probability;
8894 c.transaction_time.max_backdate_days = self
8895 .config
8896 .temporal_attributes
8897 .transaction_time
8898 .max_backdate_days;
8899 c
8900 } else {
8901 temporal_config
8902 };
8903 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8904 temporal_config,
8905 self.seed + 130,
8906 start_date,
8907 );
8908
8909 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8910 self.seed + 130,
8911 datasynth_core::GeneratorType::Vendor,
8912 );
8913
8914 let chains: Vec<_> = self
8915 .master_data
8916 .vendors
8917 .iter()
8918 .map(|vendor| {
8919 let id = uuid_factory.next();
8920 gen.generate_version_chain(vendor.clone(), id)
8921 })
8922 .collect();
8923
8924 stats.temporal_version_chain_count = chains.len();
8925 info!("Temporal version chains generated: {} chains", chains.len());
8926 self.check_resources_with_log("post-temporal-attributes")?;
8927
8928 Ok(chains)
8929 }
8930
8931 fn phase_entity_relationships(
8941 &self,
8942 journal_entries: &[JournalEntry],
8943 document_flows: &DocumentFlowSnapshot,
8944 stats: &mut EnhancedGenerationStatistics,
8945 ) -> SynthResult<(
8946 Option<datasynth_core::models::EntityGraph>,
8947 Vec<datasynth_core::models::CrossProcessLink>,
8948 )> {
8949 use datasynth_generators::relationships::{
8950 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8951 TransactionSummary,
8952 };
8953
8954 let rs_enabled = self.config.relationship_strength.enabled;
8955 let cpl_enabled = self.config.cross_process_links.enabled
8956 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8957
8958 if !rs_enabled && !cpl_enabled {
8959 debug!(
8960 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8961 );
8962 return Ok((None, Vec::new()));
8963 }
8964
8965 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8966
8967 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8968 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8969
8970 let company_code = self
8971 .config
8972 .companies
8973 .first()
8974 .map(|c| c.code.as_str())
8975 .unwrap_or("1000");
8976
8977 let gen_config = EntityGraphConfig {
8979 enabled: rs_enabled,
8980 cross_process: datasynth_generators::relationships::CrossProcessConfig {
8981 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8982 enable_return_flows: false,
8983 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8984 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8985 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8987 1.0
8988 } else {
8989 0.30
8990 },
8991 ..Default::default()
8992 },
8993 strength_config: datasynth_generators::relationships::StrengthConfig {
8994 transaction_volume_weight: self
8995 .config
8996 .relationship_strength
8997 .calculation
8998 .transaction_volume_weight,
8999 transaction_count_weight: self
9000 .config
9001 .relationship_strength
9002 .calculation
9003 .transaction_count_weight,
9004 duration_weight: self
9005 .config
9006 .relationship_strength
9007 .calculation
9008 .relationship_duration_weight,
9009 recency_weight: self.config.relationship_strength.calculation.recency_weight,
9010 mutual_connections_weight: self
9011 .config
9012 .relationship_strength
9013 .calculation
9014 .mutual_connections_weight,
9015 recency_half_life_days: self
9016 .config
9017 .relationship_strength
9018 .calculation
9019 .recency_half_life_days,
9020 },
9021 ..Default::default()
9022 };
9023
9024 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9025
9026 let entity_graph = if rs_enabled {
9028 let vendor_summaries: Vec<EntitySummary> = self
9030 .master_data
9031 .vendors
9032 .iter()
9033 .map(|v| {
9034 EntitySummary::new(
9035 &v.vendor_id,
9036 &v.name,
9037 datasynth_core::models::GraphEntityType::Vendor,
9038 start_date,
9039 )
9040 })
9041 .collect();
9042
9043 let customer_summaries: Vec<EntitySummary> = self
9044 .master_data
9045 .customers
9046 .iter()
9047 .map(|c| {
9048 EntitySummary::new(
9049 &c.customer_id,
9050 &c.name,
9051 datasynth_core::models::GraphEntityType::Customer,
9052 start_date,
9053 )
9054 })
9055 .collect();
9056
9057 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9062 std::collections::HashMap::new();
9063
9064 for je in journal_entries {
9065 let cc = je.header.company_code.clone();
9066 let posting_date = je.header.posting_date;
9067 for line in &je.lines {
9068 if let Some(ref tp) = line.trading_partner {
9069 let amount = if line.debit_amount > line.credit_amount {
9070 line.debit_amount
9071 } else {
9072 line.credit_amount
9073 };
9074 let entry = txn_summaries
9075 .entry((cc.clone(), tp.clone()))
9076 .or_insert_with(|| TransactionSummary {
9077 total_volume: rust_decimal::Decimal::ZERO,
9078 transaction_count: 0,
9079 first_transaction_date: posting_date,
9080 last_transaction_date: posting_date,
9081 related_entities: std::collections::HashSet::new(),
9082 });
9083 entry.total_volume += amount;
9084 entry.transaction_count += 1;
9085 if posting_date < entry.first_transaction_date {
9086 entry.first_transaction_date = posting_date;
9087 }
9088 if posting_date > entry.last_transaction_date {
9089 entry.last_transaction_date = posting_date;
9090 }
9091 entry.related_entities.insert(cc.clone());
9092 }
9093 }
9094 }
9095
9096 for chain in &document_flows.p2p_chains {
9099 let cc = chain.purchase_order.header.company_code.clone();
9100 let vendor_id = chain.purchase_order.vendor_id.clone();
9101 let po_date = chain.purchase_order.header.document_date;
9102 let amount = chain.purchase_order.total_net_amount;
9103
9104 let entry = txn_summaries
9105 .entry((cc.clone(), vendor_id))
9106 .or_insert_with(|| TransactionSummary {
9107 total_volume: rust_decimal::Decimal::ZERO,
9108 transaction_count: 0,
9109 first_transaction_date: po_date,
9110 last_transaction_date: po_date,
9111 related_entities: std::collections::HashSet::new(),
9112 });
9113 entry.total_volume += amount;
9114 entry.transaction_count += 1;
9115 if po_date < entry.first_transaction_date {
9116 entry.first_transaction_date = po_date;
9117 }
9118 if po_date > entry.last_transaction_date {
9119 entry.last_transaction_date = po_date;
9120 }
9121 entry.related_entities.insert(cc);
9122 }
9123
9124 for chain in &document_flows.o2c_chains {
9126 let cc = chain.sales_order.header.company_code.clone();
9127 let customer_id = chain.sales_order.customer_id.clone();
9128 let so_date = chain.sales_order.header.document_date;
9129 let amount = chain.sales_order.total_net_amount;
9130
9131 let entry = txn_summaries
9132 .entry((cc.clone(), customer_id))
9133 .or_insert_with(|| TransactionSummary {
9134 total_volume: rust_decimal::Decimal::ZERO,
9135 transaction_count: 0,
9136 first_transaction_date: so_date,
9137 last_transaction_date: so_date,
9138 related_entities: std::collections::HashSet::new(),
9139 });
9140 entry.total_volume += amount;
9141 entry.transaction_count += 1;
9142 if so_date < entry.first_transaction_date {
9143 entry.first_transaction_date = so_date;
9144 }
9145 if so_date > entry.last_transaction_date {
9146 entry.last_transaction_date = so_date;
9147 }
9148 entry.related_entities.insert(cc);
9149 }
9150
9151 let as_of_date = journal_entries
9152 .last()
9153 .map(|je| je.header.posting_date)
9154 .unwrap_or(start_date);
9155
9156 let graph = gen.generate_entity_graph(
9157 company_code,
9158 as_of_date,
9159 &vendor_summaries,
9160 &customer_summaries,
9161 &txn_summaries,
9162 );
9163
9164 info!(
9165 "Entity relationship graph: {} nodes, {} edges",
9166 graph.nodes.len(),
9167 graph.edges.len()
9168 );
9169 stats.entity_relationship_node_count = graph.nodes.len();
9170 stats.entity_relationship_edge_count = graph.edges.len();
9171 Some(graph)
9172 } else {
9173 None
9174 };
9175
9176 let cross_process_links = if cpl_enabled {
9178 let gr_refs: Vec<GoodsReceiptRef> = document_flows
9180 .p2p_chains
9181 .iter()
9182 .flat_map(|chain| {
9183 let vendor_id = chain.purchase_order.vendor_id.clone();
9184 let cc = chain.purchase_order.header.company_code.clone();
9185 chain.goods_receipts.iter().flat_map(move |gr| {
9186 gr.items.iter().filter_map({
9187 let doc_id = gr.header.document_id.clone();
9188 let v_id = vendor_id.clone();
9189 let company = cc.clone();
9190 let receipt_date = gr.header.document_date;
9191 move |item| {
9192 item.base
9193 .material_id
9194 .as_ref()
9195 .map(|mat_id| GoodsReceiptRef {
9196 document_id: doc_id.clone(),
9197 material_id: mat_id.clone(),
9198 quantity: item.base.quantity,
9199 receipt_date,
9200 vendor_id: v_id.clone(),
9201 company_code: company.clone(),
9202 })
9203 }
9204 })
9205 })
9206 })
9207 .collect();
9208
9209 let del_refs: Vec<DeliveryRef> = document_flows
9211 .o2c_chains
9212 .iter()
9213 .flat_map(|chain| {
9214 let customer_id = chain.sales_order.customer_id.clone();
9215 let cc = chain.sales_order.header.company_code.clone();
9216 chain.deliveries.iter().flat_map(move |del| {
9217 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9218 del.items.iter().filter_map({
9219 let doc_id = del.header.document_id.clone();
9220 let c_id = customer_id.clone();
9221 let company = cc.clone();
9222 move |item| {
9223 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9224 document_id: doc_id.clone(),
9225 material_id: mat_id.clone(),
9226 quantity: item.base.quantity,
9227 delivery_date,
9228 customer_id: c_id.clone(),
9229 company_code: company.clone(),
9230 })
9231 }
9232 })
9233 })
9234 })
9235 .collect();
9236
9237 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9238 info!("Cross-process links generated: {} links", links.len());
9239 stats.cross_process_link_count = links.len();
9240 links
9241 } else {
9242 Vec::new()
9243 };
9244
9245 self.check_resources_with_log("post-entity-relationships")?;
9246 Ok((entity_graph, cross_process_links))
9247 }
9248
9249 fn phase_industry_data(
9251 &self,
9252 stats: &mut EnhancedGenerationStatistics,
9253 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9254 if !self.config.industry_specific.enabled {
9255 return None;
9256 }
9257 info!("Phase 29: Generating industry-specific data");
9258 let output = datasynth_generators::industry::factory::generate_industry_output(
9259 self.config.global.industry,
9260 );
9261 stats.industry_gl_account_count = output.gl_accounts.len();
9262 info!(
9263 "Industry data generated: {} GL accounts for {:?}",
9264 output.gl_accounts.len(),
9265 self.config.global.industry
9266 );
9267 Some(output)
9268 }
9269
9270 fn phase_opening_balances(
9272 &mut self,
9273 coa: &Arc<ChartOfAccounts>,
9274 stats: &mut EnhancedGenerationStatistics,
9275 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9276 if !self.config.balance.generate_opening_balances {
9277 debug!("Phase 3b: Skipped (opening balance generation disabled)");
9278 return Ok(Vec::new());
9279 }
9280 info!("Phase 3b: Generating Opening Balances");
9281
9282 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9283 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9284 let fiscal_year = start_date.year();
9285
9286 let industry = match self.config.global.industry {
9287 IndustrySector::Manufacturing => IndustryType::Manufacturing,
9288 IndustrySector::Retail => IndustryType::Retail,
9289 IndustrySector::FinancialServices => IndustryType::Financial,
9290 IndustrySector::Healthcare => IndustryType::Healthcare,
9291 IndustrySector::Technology => IndustryType::Technology,
9292 _ => IndustryType::Manufacturing,
9293 };
9294
9295 let config = datasynth_generators::OpeningBalanceConfig {
9296 industry,
9297 ..Default::default()
9298 };
9299 let mut gen =
9300 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9301
9302 let mut results = Vec::new();
9303 for company in &self.config.companies {
9304 let spec = OpeningBalanceSpec::new(
9305 company.code.clone(),
9306 start_date,
9307 fiscal_year,
9308 company.currency.clone(),
9309 rust_decimal::Decimal::new(10_000_000, 0),
9310 industry,
9311 );
9312 let ob = gen.generate(&spec, coa, start_date, &company.code);
9313 results.push(ob);
9314 }
9315
9316 stats.opening_balance_count = results.len();
9317 info!("Opening balances generated: {} companies", results.len());
9318 self.check_resources_with_log("post-opening-balances")?;
9319
9320 Ok(results)
9321 }
9322
9323 fn phase_subledger_reconciliation(
9325 &mut self,
9326 subledger: &SubledgerSnapshot,
9327 entries: &[JournalEntry],
9328 stats: &mut EnhancedGenerationStatistics,
9329 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9330 if !self.config.balance.reconcile_subledgers {
9331 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9332 return Ok(Vec::new());
9333 }
9334 info!("Phase 9b: Reconciling GL to subledger balances");
9335
9336 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9337 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9338 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9339
9340 let tracker_config = BalanceTrackerConfig {
9342 validate_on_each_entry: false,
9343 track_history: false,
9344 fail_on_validation_error: false,
9345 ..Default::default()
9346 };
9347 let recon_currency = self
9348 .config
9349 .companies
9350 .first()
9351 .map(|c| c.currency.clone())
9352 .unwrap_or_else(|| "USD".to_string());
9353 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9354 let validation_errors = tracker.apply_entries(entries);
9355 if !validation_errors.is_empty() {
9356 warn!(
9357 error_count = validation_errors.len(),
9358 "Balance tracker encountered validation errors during subledger reconciliation"
9359 );
9360 for err in &validation_errors {
9361 debug!("Balance validation error: {:?}", err);
9362 }
9363 }
9364
9365 let mut engine = datasynth_generators::ReconciliationEngine::new(
9366 datasynth_generators::ReconciliationConfig::default(),
9367 );
9368
9369 let mut results = Vec::new();
9370 let company_code = self
9371 .config
9372 .companies
9373 .first()
9374 .map(|c| c.code.as_str())
9375 .unwrap_or("1000");
9376
9377 if !subledger.ar_invoices.is_empty() {
9379 let gl_balance = tracker
9380 .get_account_balance(
9381 company_code,
9382 datasynth_core::accounts::control_accounts::AR_CONTROL,
9383 )
9384 .map(|b| b.closing_balance)
9385 .unwrap_or_default();
9386 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9387 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9388 }
9389
9390 if !subledger.ap_invoices.is_empty() {
9392 let gl_balance = tracker
9393 .get_account_balance(
9394 company_code,
9395 datasynth_core::accounts::control_accounts::AP_CONTROL,
9396 )
9397 .map(|b| b.closing_balance)
9398 .unwrap_or_default();
9399 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9400 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9401 }
9402
9403 if !subledger.fa_records.is_empty() {
9405 let gl_asset_balance = tracker
9406 .get_account_balance(
9407 company_code,
9408 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9409 )
9410 .map(|b| b.closing_balance)
9411 .unwrap_or_default();
9412 let gl_accum_depr_balance = tracker
9413 .get_account_balance(
9414 company_code,
9415 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9416 )
9417 .map(|b| b.closing_balance)
9418 .unwrap_or_default();
9419 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9420 subledger.fa_records.iter().collect();
9421 let (asset_recon, depr_recon) = engine.reconcile_fa(
9422 company_code,
9423 end_date,
9424 gl_asset_balance,
9425 gl_accum_depr_balance,
9426 &fa_refs,
9427 );
9428 results.push(asset_recon);
9429 results.push(depr_recon);
9430 }
9431
9432 if !subledger.inventory_positions.is_empty() {
9434 let gl_balance = tracker
9435 .get_account_balance(
9436 company_code,
9437 datasynth_core::accounts::control_accounts::INVENTORY,
9438 )
9439 .map(|b| b.closing_balance)
9440 .unwrap_or_default();
9441 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9442 subledger.inventory_positions.iter().collect();
9443 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9444 }
9445
9446 stats.subledger_reconciliation_count = results.len();
9447 let passed = results.iter().filter(|r| r.is_balanced()).count();
9448 let failed = results.len() - passed;
9449 info!(
9450 "Subledger reconciliation: {} checks, {} passed, {} failed",
9451 results.len(),
9452 passed,
9453 failed
9454 );
9455 self.check_resources_with_log("post-subledger-reconciliation")?;
9456
9457 Ok(results)
9458 }
9459
9460 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9462 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9463
9464 let coa_framework = self.resolve_coa_framework();
9465
9466 let mut gen = ChartOfAccountsGenerator::new(
9467 self.config.chart_of_accounts.complexity,
9468 self.config.global.industry,
9469 self.seed,
9470 )
9471 .with_coa_framework(coa_framework);
9472
9473 let coa = Arc::new(gen.generate());
9474 self.coa = Some(Arc::clone(&coa));
9475
9476 if let Some(pb) = pb {
9477 pb.finish_with_message("Chart of Accounts complete");
9478 }
9479
9480 Ok(coa)
9481 }
9482
9483 fn generate_master_data(&mut self) -> SynthResult<()> {
9485 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9486 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9487 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9488
9489 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
9491
9492 let pack = self.primary_pack().clone();
9494
9495 let vendors_per_company = self.phase_config.vendors_per_company;
9497 let customers_per_company = self.phase_config.customers_per_company;
9498 let materials_per_company = self.phase_config.materials_per_company;
9499 let assets_per_company = self.phase_config.assets_per_company;
9500 let coa_framework = self.resolve_coa_framework();
9501
9502 let per_company_results: Vec<_> = self
9505 .config
9506 .companies
9507 .par_iter()
9508 .enumerate()
9509 .map(|(i, company)| {
9510 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9511 let pack = pack.clone();
9512
9513 let mut vendor_gen = VendorGenerator::new(company_seed);
9515 vendor_gen.set_country_pack(pack.clone());
9516 vendor_gen.set_coa_framework(coa_framework);
9517 vendor_gen.set_counter_offset(i * vendors_per_company);
9518 if self.config.vendor_network.enabled {
9520 let vn = &self.config.vendor_network;
9521 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9522 enabled: true,
9523 depth: vn.depth,
9524 tier1_count: datasynth_generators::TierCountConfig::new(
9525 vn.tier1.min,
9526 vn.tier1.max,
9527 ),
9528 tier2_per_parent: datasynth_generators::TierCountConfig::new(
9529 vn.tier2_per_parent.min,
9530 vn.tier2_per_parent.max,
9531 ),
9532 tier3_per_parent: datasynth_generators::TierCountConfig::new(
9533 vn.tier3_per_parent.min,
9534 vn.tier3_per_parent.max,
9535 ),
9536 cluster_distribution: datasynth_generators::ClusterDistribution {
9537 reliable_strategic: vn.clusters.reliable_strategic,
9538 standard_operational: vn.clusters.standard_operational,
9539 transactional: vn.clusters.transactional,
9540 problematic: vn.clusters.problematic,
9541 },
9542 concentration_limits: datasynth_generators::ConcentrationLimits {
9543 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9544 max_top5: vn.dependencies.top_5_concentration,
9545 },
9546 ..datasynth_generators::VendorNetworkConfig::default()
9547 });
9548 }
9549 let vendor_pool =
9550 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9551
9552 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9554 customer_gen.set_country_pack(pack.clone());
9555 customer_gen.set_coa_framework(coa_framework);
9556 customer_gen.set_counter_offset(i * customers_per_company);
9557 if self.config.customer_segmentation.enabled {
9559 let cs = &self.config.customer_segmentation;
9560 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9561 enabled: true,
9562 segment_distribution: datasynth_generators::SegmentDistribution {
9563 enterprise: cs.value_segments.enterprise.customer_share,
9564 mid_market: cs.value_segments.mid_market.customer_share,
9565 smb: cs.value_segments.smb.customer_share,
9566 consumer: cs.value_segments.consumer.customer_share,
9567 },
9568 referral_config: datasynth_generators::ReferralConfig {
9569 enabled: cs.networks.referrals.enabled,
9570 referral_rate: cs.networks.referrals.referral_rate,
9571 ..Default::default()
9572 },
9573 hierarchy_config: datasynth_generators::HierarchyConfig {
9574 enabled: cs.networks.corporate_hierarchies.enabled,
9575 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9576 ..Default::default()
9577 },
9578 ..Default::default()
9579 };
9580 customer_gen.set_segmentation_config(seg_cfg);
9581 }
9582 let customer_pool = customer_gen.generate_customer_pool(
9583 customers_per_company,
9584 &company.code,
9585 start_date,
9586 );
9587
9588 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9590 material_gen.set_country_pack(pack.clone());
9591 material_gen.set_counter_offset(i * materials_per_company);
9592 let material_pool = material_gen.generate_material_pool(
9593 materials_per_company,
9594 &company.code,
9595 start_date,
9596 );
9597
9598 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9600 let asset_pool = asset_gen.generate_asset_pool(
9601 assets_per_company,
9602 &company.code,
9603 (start_date, end_date),
9604 );
9605
9606 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9608 employee_gen.set_country_pack(pack);
9609 let employee_pool =
9610 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9611
9612 let employee_change_history =
9614 employee_gen.generate_all_change_history(&employee_pool, end_date);
9615
9616 let employee_ids: Vec<String> = employee_pool
9618 .employees
9619 .iter()
9620 .map(|e| e.employee_id.clone())
9621 .collect();
9622 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9623 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9624
9625 (
9626 vendor_pool.vendors,
9627 customer_pool.customers,
9628 material_pool.materials,
9629 asset_pool.assets,
9630 employee_pool.employees,
9631 employee_change_history,
9632 cost_centers,
9633 )
9634 })
9635 .collect();
9636
9637 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9639 per_company_results
9640 {
9641 self.master_data.vendors.extend(vendors);
9642 self.master_data.customers.extend(customers);
9643 self.master_data.materials.extend(materials);
9644 self.master_data.assets.extend(assets);
9645 self.master_data.employees.extend(employees);
9646 self.master_data.cost_centers.extend(cost_centers);
9647 self.master_data
9648 .employee_change_history
9649 .extend(change_history);
9650 }
9651
9652 if let Some(pb) = &pb {
9653 pb.inc(total);
9654 }
9655 if let Some(pb) = pb {
9656 pb.finish_with_message("Master data generation complete");
9657 }
9658
9659 Ok(())
9660 }
9661
9662 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9664 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9665 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9666
9667 let months = (self.config.global.period_months as usize).max(1);
9670 let p2p_count = self
9671 .phase_config
9672 .p2p_chains
9673 .min(self.master_data.vendors.len() * 2 * months);
9674 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9675
9676 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9678 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9679 p2p_gen.set_country_pack(self.primary_pack().clone());
9680
9681 for i in 0..p2p_count {
9682 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9683 let materials: Vec<&Material> = self
9684 .master_data
9685 .materials
9686 .iter()
9687 .skip(i % self.master_data.materials.len().max(1))
9688 .take(2.min(self.master_data.materials.len()))
9689 .collect();
9690
9691 if materials.is_empty() {
9692 continue;
9693 }
9694
9695 let company = &self.config.companies[i % self.config.companies.len()];
9696 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9697 let fiscal_period = po_date.month() as u8;
9698 let created_by = if self.master_data.employees.is_empty() {
9699 "SYSTEM"
9700 } else {
9701 self.master_data.employees[i % self.master_data.employees.len()]
9702 .user_id
9703 .as_str()
9704 };
9705
9706 let chain = p2p_gen.generate_chain(
9707 &company.code,
9708 vendor,
9709 &materials,
9710 po_date,
9711 start_date.year() as u16,
9712 fiscal_period,
9713 created_by,
9714 );
9715
9716 flows.purchase_orders.push(chain.purchase_order.clone());
9718 flows.goods_receipts.extend(chain.goods_receipts.clone());
9719 if let Some(vi) = &chain.vendor_invoice {
9720 flows.vendor_invoices.push(vi.clone());
9721 }
9722 if let Some(payment) = &chain.payment {
9723 flows.payments.push(payment.clone());
9724 }
9725 for remainder in &chain.remainder_payments {
9726 flows.payments.push(remainder.clone());
9727 }
9728 flows.p2p_chains.push(chain);
9729
9730 if let Some(pb) = &pb {
9731 pb.inc(1);
9732 }
9733 }
9734
9735 if let Some(pb) = pb {
9736 pb.finish_with_message("P2P document flows complete");
9737 }
9738
9739 let o2c_count = self
9742 .phase_config
9743 .o2c_chains
9744 .min(self.master_data.customers.len() * 2 * months);
9745 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9746
9747 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9749 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9750 o2c_gen.set_country_pack(self.primary_pack().clone());
9751
9752 for i in 0..o2c_count {
9753 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9754 let materials: Vec<&Material> = self
9755 .master_data
9756 .materials
9757 .iter()
9758 .skip(i % self.master_data.materials.len().max(1))
9759 .take(2.min(self.master_data.materials.len()))
9760 .collect();
9761
9762 if materials.is_empty() {
9763 continue;
9764 }
9765
9766 let company = &self.config.companies[i % self.config.companies.len()];
9767 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9768 let fiscal_period = so_date.month() as u8;
9769 let created_by = if self.master_data.employees.is_empty() {
9770 "SYSTEM"
9771 } else {
9772 self.master_data.employees[i % self.master_data.employees.len()]
9773 .user_id
9774 .as_str()
9775 };
9776
9777 let chain = o2c_gen.generate_chain(
9778 &company.code,
9779 customer,
9780 &materials,
9781 so_date,
9782 start_date.year() as u16,
9783 fiscal_period,
9784 created_by,
9785 );
9786
9787 flows.sales_orders.push(chain.sales_order.clone());
9789 flows.deliveries.extend(chain.deliveries.clone());
9790 if let Some(ci) = &chain.customer_invoice {
9791 flows.customer_invoices.push(ci.clone());
9792 }
9793 if let Some(receipt) = &chain.customer_receipt {
9794 flows.payments.push(receipt.clone());
9795 }
9796 for receipt in &chain.remainder_receipts {
9798 flows.payments.push(receipt.clone());
9799 }
9800 flows.o2c_chains.push(chain);
9801
9802 if let Some(pb) = &pb {
9803 pb.inc(1);
9804 }
9805 }
9806
9807 if let Some(pb) = pb {
9808 pb.finish_with_message("O2C document flows complete");
9809 }
9810
9811 {
9815 let mut refs = Vec::new();
9816 for doc in &flows.purchase_orders {
9817 refs.extend(doc.header.document_references.iter().cloned());
9818 }
9819 for doc in &flows.goods_receipts {
9820 refs.extend(doc.header.document_references.iter().cloned());
9821 }
9822 for doc in &flows.vendor_invoices {
9823 refs.extend(doc.header.document_references.iter().cloned());
9824 }
9825 for doc in &flows.sales_orders {
9826 refs.extend(doc.header.document_references.iter().cloned());
9827 }
9828 for doc in &flows.deliveries {
9829 refs.extend(doc.header.document_references.iter().cloned());
9830 }
9831 for doc in &flows.customer_invoices {
9832 refs.extend(doc.header.document_references.iter().cloned());
9833 }
9834 for doc in &flows.payments {
9835 refs.extend(doc.header.document_references.iter().cloned());
9836 }
9837 debug!(
9838 "Collected {} document cross-references from document headers",
9839 refs.len()
9840 );
9841 flows.document_references = refs;
9842 }
9843
9844 Ok(())
9845 }
9846
9847 fn generate_journal_entries(
9849 &mut self,
9850 coa: &Arc<ChartOfAccounts>,
9851 ) -> SynthResult<Vec<JournalEntry>> {
9852 use datasynth_core::traits::ParallelGenerator;
9853
9854 let total = self.calculate_total_transactions();
9855 let pb = self.create_progress_bar(total, "Generating Journal Entries");
9856
9857 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9858 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9859 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9860
9861 let company_codes: Vec<String> = self
9862 .config
9863 .companies
9864 .iter()
9865 .map(|c| c.code.clone())
9866 .collect();
9867
9868 let mut generator = JournalEntryGenerator::new_with_params(
9869 self.config.transactions.clone(),
9870 Arc::clone(coa),
9871 company_codes,
9872 start_date,
9873 end_date,
9874 self.seed,
9875 );
9876 let bp = &self.config.business_processes;
9879 generator.set_business_process_weights(
9880 bp.o2c_weight,
9881 bp.p2p_weight,
9882 bp.r2r_weight,
9883 bp.h2r_weight,
9884 bp.a2r_weight,
9885 );
9886 let generator = generator;
9887
9888 let je_pack = self.primary_pack();
9892
9893 let mut generator = generator
9894 .with_master_data(
9895 &self.master_data.vendors,
9896 &self.master_data.customers,
9897 &self.master_data.materials,
9898 )
9899 .with_country_pack_names(je_pack)
9900 .with_country_pack_temporal(
9901 self.config.temporal_patterns.clone(),
9902 self.seed + 200,
9903 je_pack,
9904 )
9905 .with_persona_errors(true)
9906 .with_fraud_config(self.config.fraud.clone());
9907
9908 if self.config.temporal.enabled {
9910 let drift_config = self.config.temporal.to_core_config();
9911 generator = generator.with_drift_config(drift_config, self.seed + 100);
9912 }
9913
9914 self.check_memory_limit()?;
9916
9917 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9919
9920 let entries = if total >= 10_000 && num_threads > 1 {
9924 let sub_generators = generator.split(num_threads);
9927 let entries_per_thread = total as usize / num_threads;
9928 let remainder = total as usize % num_threads;
9929
9930 let batches: Vec<Vec<JournalEntry>> = sub_generators
9931 .into_par_iter()
9932 .enumerate()
9933 .map(|(i, mut gen)| {
9934 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9935 gen.generate_batch(count)
9936 })
9937 .collect();
9938
9939 let entries = JournalEntryGenerator::merge_results(batches);
9941
9942 if let Some(pb) = &pb {
9943 pb.inc(total);
9944 }
9945 entries
9946 } else {
9947 let mut entries = Vec::with_capacity(total as usize);
9949 for _ in 0..total {
9950 let entry = generator.generate();
9951 entries.push(entry);
9952 if let Some(pb) = &pb {
9953 pb.inc(1);
9954 }
9955 }
9956 entries
9957 };
9958
9959 if let Some(pb) = pb {
9960 pb.finish_with_message("Journal entries complete");
9961 }
9962
9963 Ok(entries)
9964 }
9965
9966 fn generate_jes_from_document_flows(
9971 &mut self,
9972 flows: &DocumentFlowSnapshot,
9973 ) -> SynthResult<Vec<JournalEntry>> {
9974 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9975 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9976
9977 let je_config = match self.resolve_coa_framework() {
9978 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9979 CoAFramework::GermanSkr04 => {
9980 let fa = datasynth_core::FrameworkAccounts::german_gaap();
9981 DocumentFlowJeConfig::from(&fa)
9982 }
9983 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9984 };
9985
9986 let populate_fec = je_config.populate_fec_fields;
9987 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9988
9989 if populate_fec {
9993 let mut aux_lookup = std::collections::HashMap::new();
9994 for vendor in &self.master_data.vendors {
9995 if let Some(ref aux) = vendor.auxiliary_gl_account {
9996 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9997 }
9998 }
9999 for customer in &self.master_data.customers {
10000 if let Some(ref aux) = customer.auxiliary_gl_account {
10001 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
10002 }
10003 }
10004 if !aux_lookup.is_empty() {
10005 generator.set_auxiliary_account_lookup(aux_lookup);
10006 }
10007 }
10008
10009 let mut entries = Vec::new();
10010
10011 for chain in &flows.p2p_chains {
10013 let chain_entries = generator.generate_from_p2p_chain(chain);
10014 entries.extend(chain_entries);
10015 if let Some(pb) = &pb {
10016 pb.inc(1);
10017 }
10018 }
10019
10020 for chain in &flows.o2c_chains {
10022 let chain_entries = generator.generate_from_o2c_chain(chain);
10023 entries.extend(chain_entries);
10024 if let Some(pb) = &pb {
10025 pb.inc(1);
10026 }
10027 }
10028
10029 if let Some(pb) = pb {
10030 pb.finish_with_message(format!(
10031 "Generated {} JEs from document flows",
10032 entries.len()
10033 ));
10034 }
10035
10036 Ok(entries)
10037 }
10038
10039 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10045 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10046
10047 let mut jes = Vec::with_capacity(payroll_runs.len());
10048
10049 for run in payroll_runs {
10050 let mut je = JournalEntry::new_simple(
10051 format!("JE-PAYROLL-{}", run.payroll_id),
10052 run.company_code.clone(),
10053 run.run_date,
10054 format!("Payroll {}", run.payroll_id),
10055 );
10056
10057 je.add_line(JournalEntryLine {
10059 line_number: 1,
10060 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10061 debit_amount: run.total_gross,
10062 reference: Some(run.payroll_id.clone()),
10063 text: Some(format!(
10064 "Payroll {} ({} employees)",
10065 run.payroll_id, run.employee_count
10066 )),
10067 ..Default::default()
10068 });
10069
10070 je.add_line(JournalEntryLine {
10072 line_number: 2,
10073 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10074 credit_amount: run.total_gross,
10075 reference: Some(run.payroll_id.clone()),
10076 ..Default::default()
10077 });
10078
10079 jes.push(je);
10080 }
10081
10082 jes
10083 }
10084
10085 fn link_document_flows_to_subledgers(
10090 &mut self,
10091 flows: &DocumentFlowSnapshot,
10092 ) -> SynthResult<SubledgerSnapshot> {
10093 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10094 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10095
10096 let vendor_names: std::collections::HashMap<String, String> = self
10098 .master_data
10099 .vendors
10100 .iter()
10101 .map(|v| (v.vendor_id.clone(), v.name.clone()))
10102 .collect();
10103 let customer_names: std::collections::HashMap<String, String> = self
10104 .master_data
10105 .customers
10106 .iter()
10107 .map(|c| (c.customer_id.clone(), c.name.clone()))
10108 .collect();
10109
10110 let mut linker = DocumentFlowLinker::new()
10111 .with_vendor_names(vendor_names)
10112 .with_customer_names(customer_names);
10113
10114 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10116 if let Some(pb) = &pb {
10117 pb.inc(flows.vendor_invoices.len() as u64);
10118 }
10119
10120 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10122 if let Some(pb) = &pb {
10123 pb.inc(flows.customer_invoices.len() as u64);
10124 }
10125
10126 if let Some(pb) = pb {
10127 pb.finish_with_message(format!(
10128 "Linked {} AP and {} AR invoices",
10129 ap_invoices.len(),
10130 ar_invoices.len()
10131 ));
10132 }
10133
10134 Ok(SubledgerSnapshot {
10135 ap_invoices,
10136 ar_invoices,
10137 fa_records: Vec::new(),
10138 inventory_positions: Vec::new(),
10139 inventory_movements: Vec::new(),
10140 ar_aging_reports: Vec::new(),
10142 ap_aging_reports: Vec::new(),
10143 depreciation_runs: Vec::new(),
10145 inventory_valuations: Vec::new(),
10146 dunning_runs: Vec::new(),
10148 dunning_letters: Vec::new(),
10149 })
10150 }
10151
10152 #[allow(clippy::too_many_arguments)]
10157 fn generate_ocpm_events(
10158 &mut self,
10159 flows: &DocumentFlowSnapshot,
10160 sourcing: &SourcingSnapshot,
10161 hr: &HrSnapshot,
10162 manufacturing: &ManufacturingSnapshot,
10163 banking: &BankingSnapshot,
10164 audit: &AuditSnapshot,
10165 financial_reporting: &FinancialReportingSnapshot,
10166 ) -> SynthResult<OcpmSnapshot> {
10167 let total_chains = flows.p2p_chains.len()
10168 + flows.o2c_chains.len()
10169 + sourcing.sourcing_projects.len()
10170 + hr.payroll_runs.len()
10171 + manufacturing.production_orders.len()
10172 + banking.customers.len()
10173 + audit.engagements.len()
10174 + financial_reporting.bank_reconciliations.len();
10175 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
10176
10177 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
10179 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
10180
10181 let ocpm_config = OcpmGeneratorConfig {
10183 generate_p2p: true,
10184 generate_o2c: true,
10185 generate_s2c: !sourcing.sourcing_projects.is_empty(),
10186 generate_h2r: !hr.payroll_runs.is_empty(),
10187 generate_mfg: !manufacturing.production_orders.is_empty(),
10188 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
10189 generate_bank: !banking.customers.is_empty(),
10190 generate_audit: !audit.engagements.is_empty(),
10191 happy_path_rate: 0.75,
10192 exception_path_rate: 0.20,
10193 error_path_rate: 0.05,
10194 add_duration_variability: true,
10195 duration_std_dev_factor: 0.3,
10196 };
10197 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
10198 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
10199
10200 let available_users: Vec<String> = self
10202 .master_data
10203 .employees
10204 .iter()
10205 .take(20)
10206 .map(|e| e.user_id.clone())
10207 .collect();
10208
10209 let fallback_date =
10211 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
10212 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10213 .unwrap_or(fallback_date);
10214 let base_midnight = base_date
10215 .and_hms_opt(0, 0, 0)
10216 .expect("midnight is always valid");
10217 let base_datetime =
10218 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
10219
10220 let add_result = |event_log: &mut OcpmEventLog,
10222 result: datasynth_ocpm::CaseGenerationResult| {
10223 for event in result.events {
10224 event_log.add_event(event);
10225 }
10226 for object in result.objects {
10227 event_log.add_object(object);
10228 }
10229 for relationship in result.relationships {
10230 event_log.add_relationship(relationship);
10231 }
10232 for corr in result.correlation_events {
10233 event_log.add_correlation_event(corr);
10234 }
10235 event_log.add_case(result.case_trace);
10236 };
10237
10238 for chain in &flows.p2p_chains {
10240 let po = &chain.purchase_order;
10241 let documents = P2pDocuments::new(
10242 &po.header.document_id,
10243 &po.vendor_id,
10244 &po.header.company_code,
10245 po.total_net_amount,
10246 &po.header.currency,
10247 &ocpm_uuid_factory,
10248 )
10249 .with_goods_receipt(
10250 chain
10251 .goods_receipts
10252 .first()
10253 .map(|gr| gr.header.document_id.as_str())
10254 .unwrap_or(""),
10255 &ocpm_uuid_factory,
10256 )
10257 .with_invoice(
10258 chain
10259 .vendor_invoice
10260 .as_ref()
10261 .map(|vi| vi.header.document_id.as_str())
10262 .unwrap_or(""),
10263 &ocpm_uuid_factory,
10264 )
10265 .with_payment(
10266 chain
10267 .payment
10268 .as_ref()
10269 .map(|p| p.header.document_id.as_str())
10270 .unwrap_or(""),
10271 &ocpm_uuid_factory,
10272 );
10273
10274 let start_time =
10275 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10276 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10277 add_result(&mut event_log, result);
10278
10279 if let Some(pb) = &pb {
10280 pb.inc(1);
10281 }
10282 }
10283
10284 for chain in &flows.o2c_chains {
10286 let so = &chain.sales_order;
10287 let documents = O2cDocuments::new(
10288 &so.header.document_id,
10289 &so.customer_id,
10290 &so.header.company_code,
10291 so.total_net_amount,
10292 &so.header.currency,
10293 &ocpm_uuid_factory,
10294 )
10295 .with_delivery(
10296 chain
10297 .deliveries
10298 .first()
10299 .map(|d| d.header.document_id.as_str())
10300 .unwrap_or(""),
10301 &ocpm_uuid_factory,
10302 )
10303 .with_invoice(
10304 chain
10305 .customer_invoice
10306 .as_ref()
10307 .map(|ci| ci.header.document_id.as_str())
10308 .unwrap_or(""),
10309 &ocpm_uuid_factory,
10310 )
10311 .with_receipt(
10312 chain
10313 .customer_receipt
10314 .as_ref()
10315 .map(|r| r.header.document_id.as_str())
10316 .unwrap_or(""),
10317 &ocpm_uuid_factory,
10318 );
10319
10320 let start_time =
10321 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10322 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10323 add_result(&mut event_log, result);
10324
10325 if let Some(pb) = &pb {
10326 pb.inc(1);
10327 }
10328 }
10329
10330 for project in &sourcing.sourcing_projects {
10332 let vendor_id = sourcing
10334 .contracts
10335 .iter()
10336 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10337 .map(|c| c.vendor_id.clone())
10338 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10339 .or_else(|| {
10340 self.master_data
10341 .vendors
10342 .first()
10343 .map(|v| v.vendor_id.clone())
10344 })
10345 .unwrap_or_else(|| "V000".to_string());
10346 let mut docs = S2cDocuments::new(
10347 &project.project_id,
10348 &vendor_id,
10349 &project.company_code,
10350 project.estimated_annual_spend,
10351 &ocpm_uuid_factory,
10352 );
10353 if let Some(rfx) = sourcing
10355 .rfx_events
10356 .iter()
10357 .find(|r| r.sourcing_project_id == project.project_id)
10358 {
10359 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10360 if let Some(bid) = sourcing.bids.iter().find(|b| {
10362 b.rfx_id == rfx.rfx_id
10363 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10364 }) {
10365 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10366 }
10367 }
10368 if let Some(contract) = sourcing
10370 .contracts
10371 .iter()
10372 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10373 {
10374 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10375 }
10376 let start_time = base_datetime - chrono::Duration::days(90);
10377 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10378 add_result(&mut event_log, result);
10379
10380 if let Some(pb) = &pb {
10381 pb.inc(1);
10382 }
10383 }
10384
10385 for run in &hr.payroll_runs {
10387 let employee_id = hr
10389 .payroll_line_items
10390 .iter()
10391 .find(|li| li.payroll_id == run.payroll_id)
10392 .map(|li| li.employee_id.as_str())
10393 .unwrap_or("EMP000");
10394 let docs = H2rDocuments::new(
10395 &run.payroll_id,
10396 employee_id,
10397 &run.company_code,
10398 run.total_gross,
10399 &ocpm_uuid_factory,
10400 )
10401 .with_time_entries(
10402 hr.time_entries
10403 .iter()
10404 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10405 .take(5)
10406 .map(|t| t.entry_id.as_str())
10407 .collect(),
10408 );
10409 let start_time = base_datetime - chrono::Duration::days(30);
10410 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10411 add_result(&mut event_log, result);
10412
10413 if let Some(pb) = &pb {
10414 pb.inc(1);
10415 }
10416 }
10417
10418 for order in &manufacturing.production_orders {
10420 let mut docs = MfgDocuments::new(
10421 &order.order_id,
10422 &order.material_id,
10423 &order.company_code,
10424 order.planned_quantity,
10425 &ocpm_uuid_factory,
10426 )
10427 .with_operations(
10428 order
10429 .operations
10430 .iter()
10431 .map(|o| format!("OP-{:04}", o.operation_number))
10432 .collect::<Vec<_>>()
10433 .iter()
10434 .map(std::string::String::as_str)
10435 .collect(),
10436 );
10437 if let Some(insp) = manufacturing
10439 .quality_inspections
10440 .iter()
10441 .find(|i| i.reference_id == order.order_id)
10442 {
10443 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10444 }
10445 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10447 cc.items
10448 .iter()
10449 .any(|item| item.material_id == order.material_id)
10450 }) {
10451 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10452 }
10453 let start_time = base_datetime - chrono::Duration::days(60);
10454 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10455 add_result(&mut event_log, result);
10456
10457 if let Some(pb) = &pb {
10458 pb.inc(1);
10459 }
10460 }
10461
10462 for customer in &banking.customers {
10464 let customer_id_str = customer.customer_id.to_string();
10465 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10466 if let Some(account) = banking
10468 .accounts
10469 .iter()
10470 .find(|a| a.primary_owner_id == customer.customer_id)
10471 {
10472 let account_id_str = account.account_id.to_string();
10473 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10474 let txn_strs: Vec<String> = banking
10476 .transactions
10477 .iter()
10478 .filter(|t| t.account_id == account.account_id)
10479 .take(10)
10480 .map(|t| t.transaction_id.to_string())
10481 .collect();
10482 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10483 let txn_amounts: Vec<rust_decimal::Decimal> = banking
10484 .transactions
10485 .iter()
10486 .filter(|t| t.account_id == account.account_id)
10487 .take(10)
10488 .map(|t| t.amount)
10489 .collect();
10490 if !txn_ids.is_empty() {
10491 docs = docs.with_transactions(txn_ids, txn_amounts);
10492 }
10493 }
10494 let start_time = base_datetime - chrono::Duration::days(180);
10495 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10496 add_result(&mut event_log, result);
10497
10498 if let Some(pb) = &pb {
10499 pb.inc(1);
10500 }
10501 }
10502
10503 for engagement in &audit.engagements {
10505 let engagement_id_str = engagement.engagement_id.to_string();
10506 let docs = AuditDocuments::new(
10507 &engagement_id_str,
10508 &engagement.client_entity_id,
10509 &ocpm_uuid_factory,
10510 )
10511 .with_workpapers(
10512 audit
10513 .workpapers
10514 .iter()
10515 .filter(|w| w.engagement_id == engagement.engagement_id)
10516 .take(10)
10517 .map(|w| w.workpaper_id.to_string())
10518 .collect::<Vec<_>>()
10519 .iter()
10520 .map(std::string::String::as_str)
10521 .collect(),
10522 )
10523 .with_evidence(
10524 audit
10525 .evidence
10526 .iter()
10527 .filter(|e| e.engagement_id == engagement.engagement_id)
10528 .take(10)
10529 .map(|e| e.evidence_id.to_string())
10530 .collect::<Vec<_>>()
10531 .iter()
10532 .map(std::string::String::as_str)
10533 .collect(),
10534 )
10535 .with_risks(
10536 audit
10537 .risk_assessments
10538 .iter()
10539 .filter(|r| r.engagement_id == engagement.engagement_id)
10540 .take(5)
10541 .map(|r| r.risk_id.to_string())
10542 .collect::<Vec<_>>()
10543 .iter()
10544 .map(std::string::String::as_str)
10545 .collect(),
10546 )
10547 .with_findings(
10548 audit
10549 .findings
10550 .iter()
10551 .filter(|f| f.engagement_id == engagement.engagement_id)
10552 .take(5)
10553 .map(|f| f.finding_id.to_string())
10554 .collect::<Vec<_>>()
10555 .iter()
10556 .map(std::string::String::as_str)
10557 .collect(),
10558 )
10559 .with_judgments(
10560 audit
10561 .judgments
10562 .iter()
10563 .filter(|j| j.engagement_id == engagement.engagement_id)
10564 .take(5)
10565 .map(|j| j.judgment_id.to_string())
10566 .collect::<Vec<_>>()
10567 .iter()
10568 .map(std::string::String::as_str)
10569 .collect(),
10570 );
10571 let start_time = base_datetime - chrono::Duration::days(120);
10572 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10573 add_result(&mut event_log, result);
10574
10575 if let Some(pb) = &pb {
10576 pb.inc(1);
10577 }
10578 }
10579
10580 for recon in &financial_reporting.bank_reconciliations {
10582 let docs = BankReconDocuments::new(
10583 &recon.reconciliation_id,
10584 &recon.bank_account_id,
10585 &recon.company_code,
10586 recon.bank_ending_balance,
10587 &ocpm_uuid_factory,
10588 )
10589 .with_statement_lines(
10590 recon
10591 .statement_lines
10592 .iter()
10593 .take(20)
10594 .map(|l| l.line_id.as_str())
10595 .collect(),
10596 )
10597 .with_reconciling_items(
10598 recon
10599 .reconciling_items
10600 .iter()
10601 .take(10)
10602 .map(|i| i.item_id.as_str())
10603 .collect(),
10604 );
10605 let start_time = base_datetime - chrono::Duration::days(30);
10606 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10607 add_result(&mut event_log, result);
10608
10609 if let Some(pb) = &pb {
10610 pb.inc(1);
10611 }
10612 }
10613
10614 event_log.compute_variants();
10616
10617 let summary = event_log.summary();
10618
10619 if let Some(pb) = pb {
10620 pb.finish_with_message(format!(
10621 "Generated {} OCPM events, {} objects",
10622 summary.event_count, summary.object_count
10623 ));
10624 }
10625
10626 Ok(OcpmSnapshot {
10627 event_count: summary.event_count,
10628 object_count: summary.object_count,
10629 case_count: summary.case_count,
10630 event_log: Some(event_log),
10631 })
10632 }
10633
10634 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10636 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10637
10638 let total_rate = if self.config.anomaly_injection.enabled {
10641 self.config.anomaly_injection.rates.total_rate
10642 } else if self.config.fraud.enabled {
10643 self.config.fraud.fraud_rate
10644 } else {
10645 0.02
10646 };
10647
10648 let fraud_rate = if self.config.anomaly_injection.enabled {
10649 self.config.anomaly_injection.rates.fraud_rate
10650 } else {
10651 AnomalyRateConfig::default().fraud_rate
10652 };
10653
10654 let error_rate = if self.config.anomaly_injection.enabled {
10655 self.config.anomaly_injection.rates.error_rate
10656 } else {
10657 AnomalyRateConfig::default().error_rate
10658 };
10659
10660 let process_issue_rate = if self.config.anomaly_injection.enabled {
10661 self.config.anomaly_injection.rates.process_rate
10662 } else {
10663 AnomalyRateConfig::default().process_issue_rate
10664 };
10665
10666 let anomaly_config = AnomalyInjectorConfig {
10667 rates: AnomalyRateConfig {
10668 total_rate,
10669 fraud_rate,
10670 error_rate,
10671 process_issue_rate,
10672 ..Default::default()
10673 },
10674 seed: self.seed + 5000,
10675 ..Default::default()
10676 };
10677
10678 let mut injector = AnomalyInjector::new(anomaly_config);
10679 let result = injector.process_entries(entries);
10680
10681 if let Some(pb) = &pb {
10682 pb.inc(entries.len() as u64);
10683 pb.finish_with_message("Anomaly injection complete");
10684 }
10685
10686 let mut by_type = HashMap::new();
10687 for label in &result.labels {
10688 *by_type
10689 .entry(format!("{:?}", label.anomaly_type))
10690 .or_insert(0) += 1;
10691 }
10692
10693 Ok(AnomalyLabels {
10694 labels: result.labels,
10695 summary: Some(result.summary),
10696 by_type,
10697 })
10698 }
10699
10700 fn validate_journal_entries(
10709 &mut self,
10710 entries: &[JournalEntry],
10711 ) -> SynthResult<BalanceValidationResult> {
10712 let clean_entries: Vec<&JournalEntry> = entries
10714 .iter()
10715 .filter(|e| {
10716 e.header
10717 .header_text
10718 .as_ref()
10719 .map(|t| !t.contains("[HUMAN_ERROR:"))
10720 .unwrap_or(true)
10721 })
10722 .collect();
10723
10724 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10725
10726 let config = BalanceTrackerConfig {
10728 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
10732 };
10733 let validation_currency = self
10734 .config
10735 .companies
10736 .first()
10737 .map(|c| c.currency.clone())
10738 .unwrap_or_else(|| "USD".to_string());
10739
10740 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10741
10742 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10744 let errors = tracker.apply_entries(&clean_refs);
10745
10746 if let Some(pb) = &pb {
10747 pb.inc(entries.len() as u64);
10748 }
10749
10750 let has_unbalanced = tracker
10753 .get_validation_errors()
10754 .iter()
10755 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10756
10757 let mut all_errors = errors;
10760 all_errors.extend(tracker.get_validation_errors().iter().cloned());
10761 let company_codes: Vec<String> = self
10762 .config
10763 .companies
10764 .iter()
10765 .map(|c| c.code.clone())
10766 .collect();
10767
10768 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10769 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10770 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10771
10772 for company_code in &company_codes {
10773 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10774 all_errors.push(e);
10775 }
10776 }
10777
10778 let stats = tracker.get_statistics();
10780
10781 let is_balanced = all_errors.is_empty();
10783
10784 if let Some(pb) = pb {
10785 let msg = if is_balanced {
10786 "Balance validation passed"
10787 } else {
10788 "Balance validation completed with errors"
10789 };
10790 pb.finish_with_message(msg);
10791 }
10792
10793 Ok(BalanceValidationResult {
10794 validated: true,
10795 is_balanced,
10796 entries_processed: stats.entries_processed,
10797 total_debits: stats.total_debits,
10798 total_credits: stats.total_credits,
10799 accounts_tracked: stats.accounts_tracked,
10800 companies_tracked: stats.companies_tracked,
10801 validation_errors: all_errors,
10802 has_unbalanced_entries: has_unbalanced,
10803 })
10804 }
10805
10806 fn inject_data_quality(
10811 &mut self,
10812 entries: &mut [JournalEntry],
10813 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10814 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10815
10816 let config = if self.config.data_quality.enabled {
10819 let dq = &self.config.data_quality;
10820 DataQualityConfig {
10821 enable_missing_values: dq.missing_values.enabled,
10822 missing_values: datasynth_generators::MissingValueConfig {
10823 global_rate: dq.effective_missing_rate(),
10824 ..Default::default()
10825 },
10826 enable_format_variations: dq.format_variations.enabled,
10827 format_variations: datasynth_generators::FormatVariationConfig {
10828 date_variation_rate: dq.format_variations.dates.rate,
10829 amount_variation_rate: dq.format_variations.amounts.rate,
10830 identifier_variation_rate: dq.format_variations.identifiers.rate,
10831 ..Default::default()
10832 },
10833 enable_duplicates: dq.duplicates.enabled,
10834 duplicates: datasynth_generators::DuplicateConfig {
10835 duplicate_rate: dq.effective_duplicate_rate(),
10836 ..Default::default()
10837 },
10838 enable_typos: dq.typos.enabled,
10839 typos: datasynth_generators::TypoConfig {
10840 char_error_rate: dq.effective_typo_rate(),
10841 ..Default::default()
10842 },
10843 enable_encoding_issues: dq.encoding_issues.enabled,
10844 encoding_issue_rate: dq.encoding_issues.rate,
10845 seed: self.seed.wrapping_add(77), track_statistics: true,
10847 }
10848 } else {
10849 DataQualityConfig::minimal()
10850 };
10851 let mut injector = DataQualityInjector::new(config);
10852
10853 injector.set_country_pack(self.primary_pack().clone());
10855
10856 let context = HashMap::new();
10858
10859 for entry in entries.iter_mut() {
10860 if let Some(text) = &entry.header.header_text {
10862 let processed = injector.process_text_field(
10863 "header_text",
10864 text,
10865 &entry.header.document_id.to_string(),
10866 &context,
10867 );
10868 match processed {
10869 Some(new_text) if new_text != *text => {
10870 entry.header.header_text = Some(new_text);
10871 }
10872 None => {
10873 entry.header.header_text = None; }
10875 _ => {}
10876 }
10877 }
10878
10879 if let Some(ref_text) = &entry.header.reference {
10881 let processed = injector.process_text_field(
10882 "reference",
10883 ref_text,
10884 &entry.header.document_id.to_string(),
10885 &context,
10886 );
10887 match processed {
10888 Some(new_text) if new_text != *ref_text => {
10889 entry.header.reference = Some(new_text);
10890 }
10891 None => {
10892 entry.header.reference = None;
10893 }
10894 _ => {}
10895 }
10896 }
10897
10898 let user_persona = entry.header.user_persona.clone();
10900 if let Some(processed) = injector.process_text_field(
10901 "user_persona",
10902 &user_persona,
10903 &entry.header.document_id.to_string(),
10904 &context,
10905 ) {
10906 if processed != user_persona {
10907 entry.header.user_persona = processed;
10908 }
10909 }
10910
10911 for line in &mut entry.lines {
10913 if let Some(ref text) = line.line_text {
10915 let processed = injector.process_text_field(
10916 "line_text",
10917 text,
10918 &entry.header.document_id.to_string(),
10919 &context,
10920 );
10921 match processed {
10922 Some(new_text) if new_text != *text => {
10923 line.line_text = Some(new_text);
10924 }
10925 None => {
10926 line.line_text = None;
10927 }
10928 _ => {}
10929 }
10930 }
10931
10932 if let Some(cc) = &line.cost_center {
10934 let processed = injector.process_text_field(
10935 "cost_center",
10936 cc,
10937 &entry.header.document_id.to_string(),
10938 &context,
10939 );
10940 match processed {
10941 Some(new_cc) if new_cc != *cc => {
10942 line.cost_center = Some(new_cc);
10943 }
10944 None => {
10945 line.cost_center = None;
10946 }
10947 _ => {}
10948 }
10949 }
10950 }
10951
10952 if let Some(pb) = &pb {
10953 pb.inc(1);
10954 }
10955 }
10956
10957 if let Some(pb) = pb {
10958 pb.finish_with_message("Data quality injection complete");
10959 }
10960
10961 let quality_issues = injector.issues().to_vec();
10962 Ok((injector.stats().clone(), quality_issues))
10963 }
10964
10965 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10976 let use_fsm = self
10978 .config
10979 .audit
10980 .fsm
10981 .as_ref()
10982 .map(|f| f.enabled)
10983 .unwrap_or(false);
10984
10985 if use_fsm {
10986 return self.generate_audit_data_with_fsm(entries);
10987 }
10988
10989 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10991 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10992 let fiscal_year = start_date.year() as u16;
10993 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10994
10995 let total_revenue: rust_decimal::Decimal = entries
10997 .iter()
10998 .flat_map(|e| e.lines.iter())
10999 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11000 .map(|l| l.credit_amount)
11001 .sum();
11002
11003 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
11005
11006 let mut snapshot = AuditSnapshot::default();
11007
11008 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
11010 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
11011 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
11012 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
11013 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
11014 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
11015 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
11016 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
11017 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
11018 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
11019 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
11020 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
11021
11022 let accounts: Vec<String> = self
11024 .coa
11025 .as_ref()
11026 .map(|coa| {
11027 coa.get_postable_accounts()
11028 .iter()
11029 .map(|acc| acc.account_code().to_string())
11030 .collect()
11031 })
11032 .unwrap_or_default();
11033
11034 for (i, company) in self.config.companies.iter().enumerate() {
11036 let company_revenue = total_revenue
11038 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
11039
11040 let engagements_for_company =
11042 self.phase_config.audit_engagements / self.config.companies.len().max(1);
11043 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11044 1
11045 } else {
11046 0
11047 };
11048
11049 for _eng_idx in 0..(engagements_for_company + extra) {
11050 let mut engagement = engagement_gen.generate_engagement(
11052 &company.code,
11053 &company.name,
11054 fiscal_year,
11055 period_end,
11056 company_revenue,
11057 None, );
11059
11060 if !self.master_data.employees.is_empty() {
11062 let emp_count = self.master_data.employees.len();
11063 let base = (i * 10 + _eng_idx) % emp_count;
11065 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11066 .employee_id
11067 .clone();
11068 engagement.engagement_manager_id = self.master_data.employees
11069 [(base + 1) % emp_count]
11070 .employee_id
11071 .clone();
11072 let real_team: Vec<String> = engagement
11073 .team_member_ids
11074 .iter()
11075 .enumerate()
11076 .map(|(j, _)| {
11077 self.master_data.employees[(base + 2 + j) % emp_count]
11078 .employee_id
11079 .clone()
11080 })
11081 .collect();
11082 engagement.team_member_ids = real_team;
11083 }
11084
11085 if let Some(pb) = &pb {
11086 pb.inc(1);
11087 }
11088
11089 let team_members: Vec<String> = engagement.team_member_ids.clone();
11091
11092 let workpapers =
11094 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
11095
11096 for wp in &workpapers {
11097 if let Some(pb) = &pb {
11098 pb.inc(1);
11099 }
11100
11101 let evidence = evidence_gen.generate_evidence_for_workpaper(
11103 wp,
11104 &team_members,
11105 wp.preparer_date,
11106 );
11107
11108 for _ in &evidence {
11109 if let Some(pb) = &pb {
11110 pb.inc(1);
11111 }
11112 }
11113
11114 snapshot.evidence.extend(evidence);
11115 }
11116
11117 let risks =
11119 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11120
11121 for _ in &risks {
11122 if let Some(pb) = &pb {
11123 pb.inc(1);
11124 }
11125 }
11126 snapshot.risk_assessments.extend(risks);
11127
11128 let findings = finding_gen.generate_findings_for_engagement(
11130 &engagement,
11131 &workpapers,
11132 &team_members,
11133 );
11134
11135 for _ in &findings {
11136 if let Some(pb) = &pb {
11137 pb.inc(1);
11138 }
11139 }
11140 snapshot.findings.extend(findings);
11141
11142 let judgments =
11144 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11145
11146 for _ in &judgments {
11147 if let Some(pb) = &pb {
11148 pb.inc(1);
11149 }
11150 }
11151 snapshot.judgments.extend(judgments);
11152
11153 let (confs, resps) =
11155 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
11156 snapshot.confirmations.extend(confs);
11157 snapshot.confirmation_responses.extend(resps);
11158
11159 let team_pairs: Vec<(String, String)> = team_members
11161 .iter()
11162 .map(|id| {
11163 let name = self
11164 .master_data
11165 .employees
11166 .iter()
11167 .find(|e| e.employee_id == *id)
11168 .map(|e| e.display_name.clone())
11169 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
11170 (id.clone(), name)
11171 })
11172 .collect();
11173 for wp in &workpapers {
11174 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
11175 snapshot.procedure_steps.extend(steps);
11176 }
11177
11178 for wp in &workpapers {
11180 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
11181 snapshot.samples.push(sample);
11182 }
11183 }
11184
11185 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
11187 snapshot.analytical_results.extend(analytical);
11188
11189 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
11191 snapshot.ia_functions.push(ia_func);
11192 snapshot.ia_reports.extend(ia_reports);
11193
11194 let vendor_names: Vec<String> = self
11196 .master_data
11197 .vendors
11198 .iter()
11199 .map(|v| v.name.clone())
11200 .collect();
11201 let customer_names: Vec<String> = self
11202 .master_data
11203 .customers
11204 .iter()
11205 .map(|c| c.name.clone())
11206 .collect();
11207 let (parties, rp_txns) =
11208 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
11209 snapshot.related_parties.extend(parties);
11210 snapshot.related_party_transactions.extend(rp_txns);
11211
11212 snapshot.workpapers.extend(workpapers);
11214
11215 {
11217 let scope_id = format!(
11218 "SCOPE-{}-{}",
11219 engagement.engagement_id.simple(),
11220 &engagement.client_entity_id
11221 );
11222 let scope = datasynth_core::models::audit::AuditScope::new(
11223 scope_id.clone(),
11224 engagement.engagement_id.to_string(),
11225 engagement.client_entity_id.clone(),
11226 engagement.materiality,
11227 );
11228 let mut eng = engagement;
11230 eng.scope_id = Some(scope_id);
11231 snapshot.audit_scopes.push(scope);
11232 snapshot.engagements.push(eng);
11233 }
11234 }
11235 }
11236
11237 if self.config.companies.len() > 1 {
11241 let group_materiality = snapshot
11244 .engagements
11245 .first()
11246 .map(|e| e.materiality)
11247 .unwrap_or_else(|| {
11248 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11249 total_revenue * pct
11250 });
11251
11252 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11253 let group_engagement_id = snapshot
11254 .engagements
11255 .first()
11256 .map(|e| e.engagement_id.to_string())
11257 .unwrap_or_else(|| "GROUP-ENG".to_string());
11258
11259 let component_snapshot = component_gen.generate(
11260 &self.config.companies,
11261 group_materiality,
11262 &group_engagement_id,
11263 period_end,
11264 );
11265
11266 snapshot.component_auditors = component_snapshot.component_auditors;
11267 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11268 snapshot.component_instructions = component_snapshot.component_instructions;
11269 snapshot.component_reports = component_snapshot.component_reports;
11270
11271 info!(
11272 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11273 snapshot.component_auditors.len(),
11274 snapshot.component_instructions.len(),
11275 snapshot.component_reports.len(),
11276 );
11277 }
11278
11279 {
11283 let applicable_framework = self
11284 .config
11285 .accounting_standards
11286 .framework
11287 .as_ref()
11288 .map(|f| format!("{f:?}"))
11289 .unwrap_or_else(|| "IFRS".to_string());
11290
11291 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11292 let entity_count = self.config.companies.len();
11293
11294 for engagement in &snapshot.engagements {
11295 let company = self
11296 .config
11297 .companies
11298 .iter()
11299 .find(|c| c.code == engagement.client_entity_id);
11300 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11301 let letter_date = engagement.planning_start;
11302 let letter = letter_gen.generate(
11303 &engagement.engagement_id.to_string(),
11304 &engagement.client_name,
11305 entity_count,
11306 engagement.period_end_date,
11307 currency,
11308 &applicable_framework,
11309 letter_date,
11310 );
11311 snapshot.engagement_letters.push(letter);
11312 }
11313
11314 info!(
11315 "ISA 210 engagement letters: {} generated",
11316 snapshot.engagement_letters.len()
11317 );
11318 }
11319
11320 {
11324 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11325 let entity_codes: Vec<String> = self
11326 .config
11327 .companies
11328 .iter()
11329 .map(|c| c.code.clone())
11330 .collect();
11331 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11332 info!(
11333 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11334 subsequent.len(),
11335 subsequent
11336 .iter()
11337 .filter(|e| matches!(
11338 e.classification,
11339 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11340 ))
11341 .count(),
11342 subsequent
11343 .iter()
11344 .filter(|e| matches!(
11345 e.classification,
11346 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11347 ))
11348 .count(),
11349 );
11350 snapshot.subsequent_events = subsequent;
11351 }
11352
11353 {
11357 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11358 let entity_codes: Vec<String> = self
11359 .config
11360 .companies
11361 .iter()
11362 .map(|c| c.code.clone())
11363 .collect();
11364 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11365 info!(
11366 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11367 soc_snapshot.service_organizations.len(),
11368 soc_snapshot.soc_reports.len(),
11369 soc_snapshot.user_entity_controls.len(),
11370 );
11371 snapshot.service_organizations = soc_snapshot.service_organizations;
11372 snapshot.soc_reports = soc_snapshot.soc_reports;
11373 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11374 }
11375
11376 {
11380 use datasynth_generators::audit::going_concern_generator::{
11381 GoingConcernGenerator, GoingConcernInput,
11382 };
11383 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11384 let entity_codes: Vec<String> = self
11385 .config
11386 .companies
11387 .iter()
11388 .map(|c| c.code.clone())
11389 .collect();
11390 let assessment_date = period_end + chrono::Duration::days(75);
11392 let period_label = format!("FY{}", period_end.year());
11393
11394 let gc_inputs: Vec<GoingConcernInput> = self
11405 .config
11406 .companies
11407 .iter()
11408 .map(|company| {
11409 let code = &company.code;
11410 let mut revenue = rust_decimal::Decimal::ZERO;
11411 let mut expenses = rust_decimal::Decimal::ZERO;
11412 let mut current_assets = rust_decimal::Decimal::ZERO;
11413 let mut current_liabs = rust_decimal::Decimal::ZERO;
11414 let mut total_debt = rust_decimal::Decimal::ZERO;
11415
11416 for je in entries.iter().filter(|je| &je.header.company_code == code) {
11417 for line in &je.lines {
11418 let acct = line.gl_account.as_str();
11419 let net = line.debit_amount - line.credit_amount;
11420 if acct.starts_with('4') {
11421 revenue -= net;
11423 } else if acct.starts_with('6') {
11424 expenses += net;
11426 }
11427 if acct.starts_with('1') {
11429 if let Ok(n) = acct.parse::<u32>() {
11431 if (1000..=1499).contains(&n) {
11432 current_assets += net;
11433 }
11434 }
11435 } else if acct.starts_with('2') {
11436 if let Ok(n) = acct.parse::<u32>() {
11437 if (2000..=2499).contains(&n) {
11438 current_liabs -= net; } else if (2500..=2999).contains(&n) {
11441 total_debt -= net;
11443 }
11444 }
11445 }
11446 }
11447 }
11448
11449 let net_income = revenue - expenses;
11450 let working_capital = current_assets - current_liabs;
11451 let operating_cash_flow = net_income;
11454
11455 GoingConcernInput {
11456 entity_code: code.clone(),
11457 net_income,
11458 working_capital,
11459 operating_cash_flow,
11460 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11461 assessment_date,
11462 }
11463 })
11464 .collect();
11465
11466 let assessments = if gc_inputs.is_empty() {
11467 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11468 } else {
11469 gc_gen.generate_for_entities_with_inputs(
11470 &entity_codes,
11471 &gc_inputs,
11472 assessment_date,
11473 &period_label,
11474 )
11475 };
11476 info!(
11477 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11478 assessments.len(),
11479 assessments.iter().filter(|a| matches!(
11480 a.auditor_conclusion,
11481 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11482 )).count(),
11483 assessments.iter().filter(|a| matches!(
11484 a.auditor_conclusion,
11485 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11486 )).count(),
11487 assessments.iter().filter(|a| matches!(
11488 a.auditor_conclusion,
11489 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11490 )).count(),
11491 );
11492 snapshot.going_concern_assessments = assessments;
11493 }
11494
11495 {
11499 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11500 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11501 let entity_codes: Vec<String> = self
11502 .config
11503 .companies
11504 .iter()
11505 .map(|c| c.code.clone())
11506 .collect();
11507 let estimates = est_gen.generate_for_entities(&entity_codes);
11508 info!(
11509 "ISA 540 accounting estimates: {} estimates across {} entities \
11510 ({} with retrospective reviews, {} with auditor point estimates)",
11511 estimates.len(),
11512 entity_codes.len(),
11513 estimates
11514 .iter()
11515 .filter(|e| e.retrospective_review.is_some())
11516 .count(),
11517 estimates
11518 .iter()
11519 .filter(|e| e.auditor_point_estimate.is_some())
11520 .count(),
11521 );
11522 snapshot.accounting_estimates = estimates;
11523 }
11524
11525 {
11529 use datasynth_generators::audit::audit_opinion_generator::{
11530 AuditOpinionGenerator, AuditOpinionInput,
11531 };
11532
11533 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11534
11535 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11537 .engagements
11538 .iter()
11539 .map(|eng| {
11540 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11542 .findings
11543 .iter()
11544 .filter(|f| f.engagement_id == eng.engagement_id)
11545 .cloned()
11546 .collect();
11547
11548 let gc = snapshot
11550 .going_concern_assessments
11551 .iter()
11552 .find(|g| g.entity_code == eng.client_entity_id)
11553 .cloned();
11554
11555 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11557 snapshot.component_reports.clone();
11558
11559 let auditor = self
11560 .master_data
11561 .employees
11562 .first()
11563 .map(|e| e.display_name.clone())
11564 .unwrap_or_else(|| "Global Audit LLP".into());
11565
11566 let partner = self
11567 .master_data
11568 .employees
11569 .get(1)
11570 .map(|e| e.display_name.clone())
11571 .unwrap_or_else(|| eng.engagement_partner_id.clone());
11572
11573 AuditOpinionInput {
11574 entity_code: eng.client_entity_id.clone(),
11575 entity_name: eng.client_name.clone(),
11576 engagement_id: eng.engagement_id,
11577 period_end: eng.period_end_date,
11578 findings: eng_findings,
11579 going_concern: gc,
11580 component_reports: comp_reports,
11581 is_us_listed: {
11583 let fw = &self.config.audit_standards.isa_compliance.framework;
11584 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11585 },
11586 auditor_name: auditor,
11587 engagement_partner: partner,
11588 }
11589 })
11590 .collect();
11591
11592 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11593
11594 for go in &generated_opinions {
11595 snapshot
11596 .key_audit_matters
11597 .extend(go.key_audit_matters.clone());
11598 }
11599 snapshot.audit_opinions = generated_opinions
11600 .into_iter()
11601 .map(|go| go.opinion)
11602 .collect();
11603
11604 info!(
11605 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11606 snapshot.audit_opinions.len(),
11607 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11608 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11609 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11610 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11611 );
11612 }
11613
11614 {
11618 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11619
11620 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11621
11622 for (i, company) in self.config.companies.iter().enumerate() {
11623 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11625 .engagements
11626 .iter()
11627 .filter(|e| e.client_entity_id == company.code)
11628 .map(|e| e.engagement_id)
11629 .collect();
11630
11631 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11632 .findings
11633 .iter()
11634 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11635 .cloned()
11636 .collect();
11637
11638 let emp_count = self.master_data.employees.len();
11640 let ceo_name = if emp_count > 0 {
11641 self.master_data.employees[i % emp_count]
11642 .display_name
11643 .clone()
11644 } else {
11645 format!("CEO of {}", company.name)
11646 };
11647 let cfo_name = if emp_count > 1 {
11648 self.master_data.employees[(i + 1) % emp_count]
11649 .display_name
11650 .clone()
11651 } else {
11652 format!("CFO of {}", company.name)
11653 };
11654
11655 let materiality = snapshot
11657 .engagements
11658 .iter()
11659 .find(|e| e.client_entity_id == company.code)
11660 .map(|e| e.materiality)
11661 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11662
11663 let input = SoxGeneratorInput {
11664 company_code: company.code.clone(),
11665 company_name: company.name.clone(),
11666 fiscal_year,
11667 period_end,
11668 findings: company_findings,
11669 ceo_name,
11670 cfo_name,
11671 materiality_threshold: materiality,
11672 revenue_percent: rust_decimal::Decimal::from(100),
11673 assets_percent: rust_decimal::Decimal::from(100),
11674 significant_accounts: vec![
11675 "Revenue".into(),
11676 "Accounts Receivable".into(),
11677 "Inventory".into(),
11678 "Fixed Assets".into(),
11679 "Accounts Payable".into(),
11680 ],
11681 };
11682
11683 let (certs, assessment) = sox_gen.generate(&input);
11684 snapshot.sox_302_certifications.extend(certs);
11685 snapshot.sox_404_assessments.push(assessment);
11686 }
11687
11688 info!(
11689 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11690 snapshot.sox_302_certifications.len(),
11691 snapshot.sox_404_assessments.len(),
11692 snapshot
11693 .sox_404_assessments
11694 .iter()
11695 .filter(|a| a.icfr_effective)
11696 .count(),
11697 snapshot
11698 .sox_404_assessments
11699 .iter()
11700 .filter(|a| !a.icfr_effective)
11701 .count(),
11702 );
11703 }
11704
11705 {
11709 use datasynth_generators::audit::materiality_generator::{
11710 MaterialityGenerator, MaterialityInput,
11711 };
11712
11713 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11714
11715 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11719
11720 for company in &self.config.companies {
11721 let company_code = company.code.clone();
11722
11723 let company_revenue: rust_decimal::Decimal = entries
11725 .iter()
11726 .filter(|e| e.company_code() == company_code)
11727 .flat_map(|e| e.lines.iter())
11728 .filter(|l| l.account_code.starts_with('4'))
11729 .map(|l| l.credit_amount)
11730 .sum();
11731
11732 let total_assets: rust_decimal::Decimal = entries
11734 .iter()
11735 .filter(|e| e.company_code() == company_code)
11736 .flat_map(|e| e.lines.iter())
11737 .filter(|l| l.account_code.starts_with('1'))
11738 .map(|l| l.debit_amount)
11739 .sum();
11740
11741 let total_expenses: rust_decimal::Decimal = entries
11743 .iter()
11744 .filter(|e| e.company_code() == company_code)
11745 .flat_map(|e| e.lines.iter())
11746 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11747 .map(|l| l.debit_amount)
11748 .sum();
11749
11750 let equity: rust_decimal::Decimal = entries
11752 .iter()
11753 .filter(|e| e.company_code() == company_code)
11754 .flat_map(|e| e.lines.iter())
11755 .filter(|l| l.account_code.starts_with('3'))
11756 .map(|l| l.credit_amount)
11757 .sum();
11758
11759 let pretax_income = company_revenue - total_expenses;
11760
11761 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11763 let w = rust_decimal::Decimal::try_from(company.volume_weight)
11764 .unwrap_or(rust_decimal::Decimal::ONE);
11765 (
11766 total_revenue * w,
11767 total_revenue * w * rust_decimal::Decimal::from(3),
11768 total_revenue * w * rust_decimal::Decimal::new(1, 1),
11769 total_revenue * w * rust_decimal::Decimal::from(2),
11770 )
11771 } else {
11772 (company_revenue, total_assets, pretax_income, equity)
11773 };
11774
11775 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
11778 entity_code: company_code,
11779 period: format!("FY{}", fiscal_year),
11780 revenue: rev,
11781 pretax_income: pti,
11782 total_assets: assets,
11783 equity: eq,
11784 gross_profit,
11785 });
11786 }
11787
11788 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11789
11790 info!(
11791 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11792 {} total assets, {} equity benchmarks)",
11793 snapshot.materiality_calculations.len(),
11794 snapshot
11795 .materiality_calculations
11796 .iter()
11797 .filter(|m| matches!(
11798 m.benchmark,
11799 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11800 ))
11801 .count(),
11802 snapshot
11803 .materiality_calculations
11804 .iter()
11805 .filter(|m| matches!(
11806 m.benchmark,
11807 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11808 ))
11809 .count(),
11810 snapshot
11811 .materiality_calculations
11812 .iter()
11813 .filter(|m| matches!(
11814 m.benchmark,
11815 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11816 ))
11817 .count(),
11818 snapshot
11819 .materiality_calculations
11820 .iter()
11821 .filter(|m| matches!(
11822 m.benchmark,
11823 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11824 ))
11825 .count(),
11826 );
11827 }
11828
11829 {
11833 use datasynth_generators::audit::cra_generator::CraGenerator;
11834
11835 let mut cra_gen = CraGenerator::new(self.seed + 8315);
11836
11837 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11839 .audit_scopes
11840 .iter()
11841 .map(|s| (s.entity_code.clone(), s.id.clone()))
11842 .collect();
11843
11844 for company in &self.config.companies {
11845 let cras = cra_gen.generate_for_entity(&company.code, None);
11846 let scope_id = entity_scope_map.get(&company.code).cloned();
11847 let cras_with_scope: Vec<_> = cras
11848 .into_iter()
11849 .map(|mut cra| {
11850 cra.scope_id = scope_id.clone();
11851 cra
11852 })
11853 .collect();
11854 snapshot.combined_risk_assessments.extend(cras_with_scope);
11855 }
11856
11857 let significant_count = snapshot
11858 .combined_risk_assessments
11859 .iter()
11860 .filter(|c| c.significant_risk)
11861 .count();
11862 let high_cra_count = snapshot
11863 .combined_risk_assessments
11864 .iter()
11865 .filter(|c| {
11866 matches!(
11867 c.combined_risk,
11868 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11869 )
11870 })
11871 .count();
11872
11873 info!(
11874 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11875 snapshot.combined_risk_assessments.len(),
11876 significant_count,
11877 high_cra_count,
11878 );
11879 }
11880
11881 {
11885 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11886
11887 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11888
11889 for company in &self.config.companies {
11891 let entity_code = company.code.clone();
11892
11893 let tolerable_error = snapshot
11895 .materiality_calculations
11896 .iter()
11897 .find(|m| m.entity_code == entity_code)
11898 .map(|m| m.tolerable_error);
11899
11900 let entity_cras: Vec<_> = snapshot
11902 .combined_risk_assessments
11903 .iter()
11904 .filter(|c| c.entity_code == entity_code)
11905 .cloned()
11906 .collect();
11907
11908 if !entity_cras.is_empty() {
11909 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11910 snapshot.sampling_plans.extend(plans);
11911 snapshot.sampled_items.extend(items);
11912 }
11913 }
11914
11915 let misstatement_count = snapshot
11916 .sampled_items
11917 .iter()
11918 .filter(|i| i.misstatement_found)
11919 .count();
11920
11921 info!(
11922 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11923 snapshot.sampling_plans.len(),
11924 snapshot.sampled_items.len(),
11925 misstatement_count,
11926 );
11927 }
11928
11929 {
11933 use datasynth_generators::audit::scots_generator::{
11934 ScotsGenerator, ScotsGeneratorConfig,
11935 };
11936
11937 let ic_enabled = self.config.intercompany.enabled;
11938
11939 let config = ScotsGeneratorConfig {
11940 intercompany_enabled: ic_enabled,
11941 ..ScotsGeneratorConfig::default()
11942 };
11943 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11944
11945 for company in &self.config.companies {
11946 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11947 snapshot
11948 .significant_transaction_classes
11949 .extend(entity_scots);
11950 }
11951
11952 let estimation_count = snapshot
11953 .significant_transaction_classes
11954 .iter()
11955 .filter(|s| {
11956 matches!(
11957 s.transaction_type,
11958 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11959 )
11960 })
11961 .count();
11962
11963 info!(
11964 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11965 snapshot.significant_transaction_classes.len(),
11966 estimation_count,
11967 );
11968 }
11969
11970 {
11974 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11975
11976 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11977 let entity_codes: Vec<String> = self
11978 .config
11979 .companies
11980 .iter()
11981 .map(|c| c.code.clone())
11982 .collect();
11983 let unusual_flags =
11984 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11985 info!(
11986 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11987 unusual_flags.len(),
11988 unusual_flags
11989 .iter()
11990 .filter(|f| matches!(
11991 f.severity,
11992 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11993 ))
11994 .count(),
11995 unusual_flags
11996 .iter()
11997 .filter(|f| matches!(
11998 f.severity,
11999 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
12000 ))
12001 .count(),
12002 unusual_flags
12003 .iter()
12004 .filter(|f| matches!(
12005 f.severity,
12006 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
12007 ))
12008 .count(),
12009 );
12010 snapshot.unusual_items = unusual_flags;
12011 }
12012
12013 {
12017 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
12018
12019 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
12020 let entity_codes: Vec<String> = self
12021 .config
12022 .companies
12023 .iter()
12024 .map(|c| c.code.clone())
12025 .collect();
12026 let current_period_label = format!("FY{fiscal_year}");
12027 let prior_period_label = format!("FY{}", fiscal_year - 1);
12028 let analytical_rels = ar_gen.generate_for_entities(
12029 &entity_codes,
12030 entries,
12031 ¤t_period_label,
12032 &prior_period_label,
12033 );
12034 let out_of_range = analytical_rels
12035 .iter()
12036 .filter(|r| !r.within_expected_range)
12037 .count();
12038 info!(
12039 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
12040 analytical_rels.len(),
12041 out_of_range,
12042 );
12043 snapshot.analytical_relationships = analytical_rels;
12044 }
12045
12046 if let Some(pb) = pb {
12047 pb.finish_with_message(format!(
12048 "Audit data: {} engagements, {} workpapers, {} evidence, \
12049 {} confirmations, {} procedure steps, {} samples, \
12050 {} analytical, {} IA funcs, {} related parties, \
12051 {} component auditors, {} letters, {} subsequent events, \
12052 {} service orgs, {} going concern, {} accounting estimates, \
12053 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
12054 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
12055 {} unusual items, {} analytical relationships",
12056 snapshot.engagements.len(),
12057 snapshot.workpapers.len(),
12058 snapshot.evidence.len(),
12059 snapshot.confirmations.len(),
12060 snapshot.procedure_steps.len(),
12061 snapshot.samples.len(),
12062 snapshot.analytical_results.len(),
12063 snapshot.ia_functions.len(),
12064 snapshot.related_parties.len(),
12065 snapshot.component_auditors.len(),
12066 snapshot.engagement_letters.len(),
12067 snapshot.subsequent_events.len(),
12068 snapshot.service_organizations.len(),
12069 snapshot.going_concern_assessments.len(),
12070 snapshot.accounting_estimates.len(),
12071 snapshot.audit_opinions.len(),
12072 snapshot.key_audit_matters.len(),
12073 snapshot.sox_302_certifications.len(),
12074 snapshot.sox_404_assessments.len(),
12075 snapshot.materiality_calculations.len(),
12076 snapshot.combined_risk_assessments.len(),
12077 snapshot.sampling_plans.len(),
12078 snapshot.significant_transaction_classes.len(),
12079 snapshot.unusual_items.len(),
12080 snapshot.analytical_relationships.len(),
12081 ));
12082 }
12083
12084 {
12091 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12092 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12093 debug!(
12094 "PCAOB-ISA mappings generated: {} mappings",
12095 snapshot.isa_pcaob_mappings.len()
12096 );
12097 }
12098
12099 {
12106 use datasynth_standards::audit::isa_reference::IsaStandard;
12107 snapshot.isa_mappings = IsaStandard::standard_entries();
12108 debug!(
12109 "ISA standard entries generated: {} standards",
12110 snapshot.isa_mappings.len()
12111 );
12112 }
12113
12114 {
12117 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
12118 .engagements
12119 .iter()
12120 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
12121 .collect();
12122
12123 for rpt in &mut snapshot.related_party_transactions {
12124 if rpt.journal_entry_id.is_some() {
12125 continue; }
12127 let entity = engagement_by_id
12128 .get(&rpt.engagement_id.to_string())
12129 .copied()
12130 .unwrap_or("");
12131
12132 let best_je = entries
12134 .iter()
12135 .filter(|je| je.header.company_code == entity)
12136 .min_by_key(|je| {
12137 (je.header.posting_date - rpt.transaction_date)
12138 .num_days()
12139 .abs()
12140 });
12141
12142 if let Some(je) = best_je {
12143 rpt.journal_entry_id = Some(je.header.document_id.to_string());
12144 }
12145 }
12146
12147 let linked = snapshot
12148 .related_party_transactions
12149 .iter()
12150 .filter(|t| t.journal_entry_id.is_some())
12151 .count();
12152 debug!(
12153 "Linked {}/{} related party transactions to journal entries",
12154 linked,
12155 snapshot.related_party_transactions.len()
12156 );
12157 }
12158
12159 if !snapshot.engagements.is_empty() {
12165 use datasynth_generators::audit_opinion_generator::{
12166 AuditOpinionGenerator, AuditOpinionInput,
12167 };
12168
12169 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
12170 let inputs: Vec<AuditOpinionInput> = snapshot
12171 .engagements
12172 .iter()
12173 .map(|eng| {
12174 let findings = snapshot
12175 .findings
12176 .iter()
12177 .filter(|f| f.engagement_id == eng.engagement_id)
12178 .cloned()
12179 .collect();
12180 let going_concern = snapshot
12181 .going_concern_assessments
12182 .iter()
12183 .find(|gc| gc.entity_code == eng.client_entity_id)
12184 .cloned();
12185 let component_reports = snapshot
12188 .component_reports
12189 .iter()
12190 .filter(|r| r.entity_code == eng.client_entity_id)
12191 .cloned()
12192 .collect();
12193
12194 AuditOpinionInput {
12195 entity_code: eng.client_entity_id.clone(),
12196 entity_name: eng.client_name.clone(),
12197 engagement_id: eng.engagement_id,
12198 period_end: eng.period_end_date,
12199 findings,
12200 going_concern,
12201 component_reports,
12202 is_us_listed: matches!(
12203 eng.engagement_type,
12204 datasynth_core::audit::EngagementType::IntegratedAudit
12205 | datasynth_core::audit::EngagementType::Sox404
12206 ),
12207 auditor_name: "DataSynth Audit LLP".to_string(),
12208 engagement_partner: "Engagement Partner".to_string(),
12209 }
12210 })
12211 .collect();
12212
12213 let generated = opinion_gen.generate_batch(&inputs);
12214 for g in generated {
12215 snapshot.key_audit_matters.extend(g.key_audit_matters);
12216 snapshot.audit_opinions.push(g.opinion);
12217 }
12218 debug!(
12219 "Generated {} audit opinions with {} key audit matters",
12220 snapshot.audit_opinions.len(),
12221 snapshot.key_audit_matters.len()
12222 );
12223 }
12224
12225 Ok(snapshot)
12226 }
12227
12228 fn generate_audit_data_with_fsm(
12235 &mut self,
12236 entries: &[JournalEntry],
12237 ) -> SynthResult<AuditSnapshot> {
12238 use datasynth_audit_fsm::{
12239 context::EngagementContext,
12240 engine::AuditFsmEngine,
12241 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
12242 };
12243 use rand::SeedableRng;
12244 use rand_chacha::ChaCha8Rng;
12245
12246 info!("Audit FSM: generating audit data via FSM engine");
12247
12248 let fsm_config = self
12249 .config
12250 .audit
12251 .fsm
12252 .as_ref()
12253 .expect("FSM config must be present when FSM is enabled");
12254
12255 let bwp = match fsm_config.blueprint.as_str() {
12257 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
12258 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
12259 _ => {
12260 warn!(
12261 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
12262 fsm_config.blueprint
12263 );
12264 BlueprintWithPreconditions::load_builtin_fsa()
12265 }
12266 }
12267 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
12268
12269 let overlay = match fsm_config.overlay.as_str() {
12271 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
12272 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
12273 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
12274 _ => {
12275 warn!(
12276 "Unknown FSM overlay '{}', falling back to builtin:default",
12277 fsm_config.overlay
12278 );
12279 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
12280 }
12281 }
12282 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
12283
12284 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12286 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12287 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12288
12289 let company = self.config.companies.first();
12291 let company_code = company
12292 .map(|c| c.code.clone())
12293 .unwrap_or_else(|| "UNKNOWN".to_string());
12294 let company_name = company
12295 .map(|c| c.name.clone())
12296 .unwrap_or_else(|| "Unknown Company".to_string());
12297 let currency = company
12298 .map(|c| c.currency.clone())
12299 .unwrap_or_else(|| "USD".to_string());
12300
12301 let entity_entries: Vec<_> = entries
12303 .iter()
12304 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
12305 .cloned()
12306 .collect();
12307 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
12311 .iter()
12312 .flat_map(|e| e.lines.iter())
12313 .filter(|l| l.account_code.starts_with('4'))
12314 .map(|l| l.credit_amount - l.debit_amount)
12315 .sum();
12316
12317 let total_assets: rust_decimal::Decimal = entries
12318 .iter()
12319 .flat_map(|e| e.lines.iter())
12320 .filter(|l| l.account_code.starts_with('1'))
12321 .map(|l| l.debit_amount - l.credit_amount)
12322 .sum();
12323
12324 let total_expenses: rust_decimal::Decimal = entries
12325 .iter()
12326 .flat_map(|e| e.lines.iter())
12327 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12328 .map(|l| l.debit_amount)
12329 .sum();
12330
12331 let equity: rust_decimal::Decimal = entries
12332 .iter()
12333 .flat_map(|e| e.lines.iter())
12334 .filter(|l| l.account_code.starts_with('3'))
12335 .map(|l| l.credit_amount - l.debit_amount)
12336 .sum();
12337
12338 let total_debt: rust_decimal::Decimal = entries
12339 .iter()
12340 .flat_map(|e| e.lines.iter())
12341 .filter(|l| l.account_code.starts_with('2'))
12342 .map(|l| l.credit_amount - l.debit_amount)
12343 .sum();
12344
12345 let pretax_income = total_revenue - total_expenses;
12346
12347 let cogs: rust_decimal::Decimal = entries
12348 .iter()
12349 .flat_map(|e| e.lines.iter())
12350 .filter(|l| l.account_code.starts_with('5'))
12351 .map(|l| l.debit_amount)
12352 .sum();
12353 let gross_profit = total_revenue - cogs;
12354
12355 let current_assets: rust_decimal::Decimal = entries
12356 .iter()
12357 .flat_map(|e| e.lines.iter())
12358 .filter(|l| {
12359 l.account_code.starts_with("10")
12360 || l.account_code.starts_with("11")
12361 || l.account_code.starts_with("12")
12362 || l.account_code.starts_with("13")
12363 })
12364 .map(|l| l.debit_amount - l.credit_amount)
12365 .sum();
12366 let current_liabilities: rust_decimal::Decimal = entries
12367 .iter()
12368 .flat_map(|e| e.lines.iter())
12369 .filter(|l| {
12370 l.account_code.starts_with("20")
12371 || l.account_code.starts_with("21")
12372 || l.account_code.starts_with("22")
12373 })
12374 .map(|l| l.credit_amount - l.debit_amount)
12375 .sum();
12376 let working_capital = current_assets - current_liabilities;
12377
12378 let depreciation: rust_decimal::Decimal = entries
12379 .iter()
12380 .flat_map(|e| e.lines.iter())
12381 .filter(|l| l.account_code.starts_with("60"))
12382 .map(|l| l.debit_amount)
12383 .sum();
12384 let operating_cash_flow = pretax_income + depreciation;
12385
12386 let accounts: Vec<String> = self
12388 .coa
12389 .as_ref()
12390 .map(|coa| {
12391 coa.get_postable_accounts()
12392 .iter()
12393 .map(|acc| acc.account_code().to_string())
12394 .collect()
12395 })
12396 .unwrap_or_default();
12397
12398 let team_member_ids: Vec<String> = self
12400 .master_data
12401 .employees
12402 .iter()
12403 .take(8) .map(|e| e.employee_id.clone())
12405 .collect();
12406 let team_member_pairs: Vec<(String, String)> = self
12407 .master_data
12408 .employees
12409 .iter()
12410 .take(8)
12411 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12412 .collect();
12413
12414 let vendor_names: Vec<String> = self
12415 .master_data
12416 .vendors
12417 .iter()
12418 .map(|v| v.name.clone())
12419 .collect();
12420 let customer_names: Vec<String> = self
12421 .master_data
12422 .customers
12423 .iter()
12424 .map(|c| c.name.clone())
12425 .collect();
12426
12427 let entity_codes: Vec<String> = self
12428 .config
12429 .companies
12430 .iter()
12431 .map(|c| c.code.clone())
12432 .collect();
12433
12434 let journal_entry_ids: Vec<String> = entries
12436 .iter()
12437 .take(50)
12438 .map(|e| e.header.document_id.to_string())
12439 .collect();
12440
12441 let mut account_balances = std::collections::HashMap::<String, f64>::new();
12443 for entry in entries {
12444 for line in &entry.lines {
12445 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12446 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12447 *account_balances
12448 .entry(line.account_code.clone())
12449 .or_insert(0.0) += debit_f64 - credit_f64;
12450 }
12451 }
12452
12453 let control_ids: Vec<String> = Vec::new();
12458 let anomaly_refs: Vec<String> = Vec::new();
12459
12460 let mut context = EngagementContext {
12461 company_code,
12462 company_name,
12463 fiscal_year: start_date.year(),
12464 currency,
12465 total_revenue,
12466 total_assets,
12467 engagement_start: start_date,
12468 report_date: period_end,
12469 pretax_income,
12470 equity,
12471 gross_profit,
12472 working_capital,
12473 operating_cash_flow,
12474 total_debt,
12475 team_member_ids,
12476 team_member_pairs,
12477 accounts,
12478 vendor_names,
12479 customer_names,
12480 journal_entry_ids,
12481 account_balances,
12482 control_ids,
12483 anomaly_refs,
12484 journal_entries: entries.to_vec(),
12485 is_us_listed: false,
12486 entity_codes,
12487 auditor_firm_name: "DataSynth Audit LLP".into(),
12488 accounting_framework: self
12489 .config
12490 .accounting_standards
12491 .framework
12492 .map(|f| match f {
12493 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12494 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12495 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12496 "French GAAP"
12497 }
12498 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12499 "German GAAP"
12500 }
12501 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12502 "Dual Reporting"
12503 }
12504 })
12505 .unwrap_or("IFRS")
12506 .into(),
12507 };
12508
12509 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12511 let rng = ChaCha8Rng::seed_from_u64(seed);
12512 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12513
12514 let mut result = engine
12515 .run_engagement(&context)
12516 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12517
12518 info!(
12519 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12520 {} phases completed, duration {:.1}h",
12521 result.event_log.len(),
12522 result.artifacts.total_artifacts(),
12523 result.anomalies.len(),
12524 result.phases_completed.len(),
12525 result.total_duration_hours,
12526 );
12527
12528 let tb_entity = context.company_code.clone();
12530 let tb_fy = context.fiscal_year;
12531 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12532 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12533 entries,
12534 &tb_entity,
12535 tb_fy,
12536 self.coa.as_ref().map(|c| c.as_ref()),
12537 );
12538
12539 let bag = result.artifacts;
12541 let mut snapshot = AuditSnapshot {
12542 engagements: bag.engagements,
12543 engagement_letters: bag.engagement_letters,
12544 materiality_calculations: bag.materiality_calculations,
12545 risk_assessments: bag.risk_assessments,
12546 combined_risk_assessments: bag.combined_risk_assessments,
12547 workpapers: bag.workpapers,
12548 evidence: bag.evidence,
12549 findings: bag.findings,
12550 judgments: bag.judgments,
12551 sampling_plans: bag.sampling_plans,
12552 sampled_items: bag.sampled_items,
12553 analytical_results: bag.analytical_results,
12554 going_concern_assessments: bag.going_concern_assessments,
12555 subsequent_events: bag.subsequent_events,
12556 audit_opinions: bag.audit_opinions,
12557 key_audit_matters: bag.key_audit_matters,
12558 procedure_steps: bag.procedure_steps,
12559 samples: bag.samples,
12560 confirmations: bag.confirmations,
12561 confirmation_responses: bag.confirmation_responses,
12562 fsm_event_trail: Some(result.event_log),
12564 ..Default::default()
12566 };
12567
12568 {
12570 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12571 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12572 }
12573 {
12574 use datasynth_standards::audit::isa_reference::IsaStandard;
12575 snapshot.isa_mappings = IsaStandard::standard_entries();
12576 }
12577
12578 info!(
12579 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12580 {} risk assessments, {} findings, {} materiality calcs",
12581 snapshot.engagements.len(),
12582 snapshot.workpapers.len(),
12583 snapshot.evidence.len(),
12584 snapshot.risk_assessments.len(),
12585 snapshot.findings.len(),
12586 snapshot.materiality_calculations.len(),
12587 );
12588
12589 Ok(snapshot)
12590 }
12591
12592 fn export_graphs(
12599 &mut self,
12600 entries: &[JournalEntry],
12601 _coa: &Arc<ChartOfAccounts>,
12602 stats: &mut EnhancedGenerationStatistics,
12603 ) -> SynthResult<GraphExportSnapshot> {
12604 let pb = self.create_progress_bar(100, "Exporting Graphs");
12605
12606 let mut snapshot = GraphExportSnapshot::default();
12607
12608 let output_dir = self
12610 .output_path
12611 .clone()
12612 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12613 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12614
12615 for graph_type in &self.config.graph_export.graph_types {
12617 if let Some(pb) = &pb {
12618 pb.inc(10);
12619 }
12620
12621 let graph_config = TransactionGraphConfig {
12623 include_vendors: false,
12624 include_customers: false,
12625 create_debit_credit_edges: true,
12626 include_document_nodes: graph_type.include_document_nodes,
12627 min_edge_weight: graph_type.min_edge_weight,
12628 aggregate_parallel_edges: graph_type.aggregate_edges,
12629 framework: None,
12630 };
12631
12632 let mut builder = TransactionGraphBuilder::new(graph_config);
12633 builder.add_journal_entries(entries);
12634 let graph = builder.build();
12635
12636 stats.graph_node_count += graph.node_count();
12638 stats.graph_edge_count += graph.edge_count();
12639
12640 if let Some(pb) = &pb {
12641 pb.inc(40);
12642 }
12643
12644 for format in &self.config.graph_export.formats {
12646 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12647
12648 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12650 warn!("Failed to create graph output directory: {}", e);
12651 continue;
12652 }
12653
12654 match format {
12655 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12656 let pyg_config = PyGExportConfig {
12657 common: datasynth_graph::CommonExportConfig {
12658 export_node_features: true,
12659 export_edge_features: true,
12660 export_node_labels: true,
12661 export_edge_labels: true,
12662 export_masks: true,
12663 train_ratio: self.config.graph_export.train_ratio,
12664 val_ratio: self.config.graph_export.validation_ratio,
12665 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12666 },
12667 one_hot_categoricals: false,
12668 };
12669
12670 let exporter = PyGExporter::new(pyg_config);
12671 match exporter.export(&graph, &format_dir) {
12672 Ok(metadata) => {
12673 snapshot.exports.insert(
12674 format!("{}_{}", graph_type.name, "pytorch_geometric"),
12675 GraphExportInfo {
12676 name: graph_type.name.clone(),
12677 format: "pytorch_geometric".to_string(),
12678 output_path: format_dir.clone(),
12679 node_count: metadata.num_nodes,
12680 edge_count: metadata.num_edges,
12681 },
12682 );
12683 snapshot.graph_count += 1;
12684 }
12685 Err(e) => {
12686 warn!("Failed to export PyTorch Geometric graph: {}", e);
12687 }
12688 }
12689 }
12690 datasynth_config::schema::GraphExportFormat::Neo4j => {
12691 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12692
12693 let neo4j_config = Neo4jExportConfig {
12694 export_node_properties: true,
12695 export_edge_properties: true,
12696 export_features: true,
12697 generate_cypher: true,
12698 generate_admin_import: true,
12699 database_name: "synth".to_string(),
12700 cypher_batch_size: 1000,
12701 };
12702
12703 let exporter = Neo4jExporter::new(neo4j_config);
12704 match exporter.export(&graph, &format_dir) {
12705 Ok(metadata) => {
12706 snapshot.exports.insert(
12707 format!("{}_{}", graph_type.name, "neo4j"),
12708 GraphExportInfo {
12709 name: graph_type.name.clone(),
12710 format: "neo4j".to_string(),
12711 output_path: format_dir.clone(),
12712 node_count: metadata.num_nodes,
12713 edge_count: metadata.num_edges,
12714 },
12715 );
12716 snapshot.graph_count += 1;
12717 }
12718 Err(e) => {
12719 warn!("Failed to export Neo4j graph: {}", e);
12720 }
12721 }
12722 }
12723 datasynth_config::schema::GraphExportFormat::Dgl => {
12724 use datasynth_graph::{DGLExportConfig, DGLExporter};
12725
12726 let dgl_config = DGLExportConfig {
12727 common: datasynth_graph::CommonExportConfig {
12728 export_node_features: true,
12729 export_edge_features: true,
12730 export_node_labels: true,
12731 export_edge_labels: true,
12732 export_masks: true,
12733 train_ratio: self.config.graph_export.train_ratio,
12734 val_ratio: self.config.graph_export.validation_ratio,
12735 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12736 },
12737 heterogeneous: self.config.graph_export.dgl.heterogeneous,
12738 include_pickle_script: true, };
12740
12741 let exporter = DGLExporter::new(dgl_config);
12742 match exporter.export(&graph, &format_dir) {
12743 Ok(metadata) => {
12744 snapshot.exports.insert(
12745 format!("{}_{}", graph_type.name, "dgl"),
12746 GraphExportInfo {
12747 name: graph_type.name.clone(),
12748 format: "dgl".to_string(),
12749 output_path: format_dir.clone(),
12750 node_count: metadata.common.num_nodes,
12751 edge_count: metadata.common.num_edges,
12752 },
12753 );
12754 snapshot.graph_count += 1;
12755 }
12756 Err(e) => {
12757 warn!("Failed to export DGL graph: {}", e);
12758 }
12759 }
12760 }
12761 datasynth_config::schema::GraphExportFormat::RustGraph => {
12762 use datasynth_graph::{
12763 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12764 };
12765
12766 let rustgraph_config = RustGraphExportConfig {
12767 include_features: true,
12768 include_temporal: true,
12769 include_labels: true,
12770 source_name: "datasynth".to_string(),
12771 batch_id: None,
12772 output_format: RustGraphOutputFormat::JsonLines,
12773 export_node_properties: true,
12774 export_edge_properties: true,
12775 pretty_print: false,
12776 };
12777
12778 let exporter = RustGraphExporter::new(rustgraph_config);
12779 match exporter.export(&graph, &format_dir) {
12780 Ok(metadata) => {
12781 snapshot.exports.insert(
12782 format!("{}_{}", graph_type.name, "rustgraph"),
12783 GraphExportInfo {
12784 name: graph_type.name.clone(),
12785 format: "rustgraph".to_string(),
12786 output_path: format_dir.clone(),
12787 node_count: metadata.num_nodes,
12788 edge_count: metadata.num_edges,
12789 },
12790 );
12791 snapshot.graph_count += 1;
12792 }
12793 Err(e) => {
12794 warn!("Failed to export RustGraph: {}", e);
12795 }
12796 }
12797 }
12798 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12799 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12801 }
12802 }
12803 }
12804
12805 if let Some(pb) = &pb {
12806 pb.inc(40);
12807 }
12808 }
12809
12810 stats.graph_export_count = snapshot.graph_count;
12811 snapshot.exported = snapshot.graph_count > 0;
12812
12813 if let Some(pb) = pb {
12814 pb.finish_with_message(format!(
12815 "Graphs exported: {} graphs ({} nodes, {} edges)",
12816 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12817 ));
12818 }
12819
12820 Ok(snapshot)
12821 }
12822
12823 fn build_additional_graphs(
12828 &self,
12829 banking: &BankingSnapshot,
12830 intercompany: &IntercompanySnapshot,
12831 entries: &[JournalEntry],
12832 stats: &mut EnhancedGenerationStatistics,
12833 ) {
12834 let output_dir = self
12835 .output_path
12836 .clone()
12837 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12838 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12839
12840 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12842 info!("Phase 10c: Building banking network graph");
12843 let config = BankingGraphConfig::default();
12844 let mut builder = BankingGraphBuilder::new(config);
12845 builder.add_customers(&banking.customers);
12846 builder.add_accounts(&banking.accounts, &banking.customers);
12847 builder.add_transactions(&banking.transactions);
12848 let graph = builder.build();
12849
12850 let node_count = graph.node_count();
12851 let edge_count = graph.edge_count();
12852 stats.graph_node_count += node_count;
12853 stats.graph_edge_count += edge_count;
12854
12855 for format in &self.config.graph_export.formats {
12857 if matches!(
12858 format,
12859 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12860 ) {
12861 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12862 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12863 warn!("Failed to create banking graph output dir: {}", e);
12864 continue;
12865 }
12866 let pyg_config = PyGExportConfig::default();
12867 let exporter = PyGExporter::new(pyg_config);
12868 if let Err(e) = exporter.export(&graph, &format_dir) {
12869 warn!("Failed to export banking graph as PyG: {}", e);
12870 } else {
12871 info!(
12872 "Banking network graph exported: {} nodes, {} edges",
12873 node_count, edge_count
12874 );
12875 }
12876 }
12877 }
12878 }
12879
12880 let approval_entries: Vec<_> = entries
12882 .iter()
12883 .filter(|je| je.header.approval_workflow.is_some())
12884 .collect();
12885
12886 if !approval_entries.is_empty() {
12887 info!(
12888 "Phase 10c: Building approval network graph ({} entries with approvals)",
12889 approval_entries.len()
12890 );
12891 let config = ApprovalGraphConfig::default();
12892 let mut builder = ApprovalGraphBuilder::new(config);
12893
12894 for je in &approval_entries {
12895 if let Some(ref wf) = je.header.approval_workflow {
12896 for action in &wf.actions {
12897 let record = datasynth_core::models::ApprovalRecord {
12898 approval_id: format!(
12899 "APR-{}-{}",
12900 je.header.document_id, action.approval_level
12901 ),
12902 document_number: je.header.document_id.to_string(),
12903 document_type: "JE".to_string(),
12904 company_code: je.company_code().to_string(),
12905 requester_id: wf.preparer_id.clone(),
12906 requester_name: Some(wf.preparer_name.clone()),
12907 approver_id: action.actor_id.clone(),
12908 approver_name: action.actor_name.clone(),
12909 approval_date: je.posting_date(),
12910 action: format!("{:?}", action.action),
12911 amount: wf.amount,
12912 approval_limit: None,
12913 comments: action.comments.clone(),
12914 delegation_from: None,
12915 is_auto_approved: false,
12916 };
12917 builder.add_approval(&record);
12918 }
12919 }
12920 }
12921
12922 let graph = builder.build();
12923 let node_count = graph.node_count();
12924 let edge_count = graph.edge_count();
12925 stats.graph_node_count += node_count;
12926 stats.graph_edge_count += edge_count;
12927
12928 for format in &self.config.graph_export.formats {
12930 if matches!(
12931 format,
12932 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12933 ) {
12934 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12935 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12936 warn!("Failed to create approval graph output dir: {}", e);
12937 continue;
12938 }
12939 let pyg_config = PyGExportConfig::default();
12940 let exporter = PyGExporter::new(pyg_config);
12941 if let Err(e) = exporter.export(&graph, &format_dir) {
12942 warn!("Failed to export approval graph as PyG: {}", e);
12943 } else {
12944 info!(
12945 "Approval network graph exported: {} nodes, {} edges",
12946 node_count, edge_count
12947 );
12948 }
12949 }
12950 }
12951 }
12952
12953 if self.config.companies.len() >= 2 {
12955 info!(
12956 "Phase 10c: Building entity relationship graph ({} companies)",
12957 self.config.companies.len()
12958 );
12959
12960 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12961 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12962
12963 let parent_code = &self.config.companies[0].code;
12965 let mut companies: Vec<datasynth_core::models::Company> =
12966 Vec::with_capacity(self.config.companies.len());
12967
12968 let first = &self.config.companies[0];
12970 companies.push(datasynth_core::models::Company::parent(
12971 &first.code,
12972 &first.name,
12973 &first.country,
12974 &first.currency,
12975 ));
12976
12977 for cc in self.config.companies.iter().skip(1) {
12979 companies.push(datasynth_core::models::Company::subsidiary(
12980 &cc.code,
12981 &cc.name,
12982 &cc.country,
12983 &cc.currency,
12984 parent_code,
12985 rust_decimal::Decimal::from(100),
12986 ));
12987 }
12988
12989 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12991 self.config
12992 .companies
12993 .iter()
12994 .skip(1)
12995 .enumerate()
12996 .map(|(i, cc)| {
12997 let mut rel =
12998 datasynth_core::models::intercompany::IntercompanyRelationship::new(
12999 format!("REL{:03}", i + 1),
13000 parent_code.clone(),
13001 cc.code.clone(),
13002 rust_decimal::Decimal::from(100),
13003 start_date,
13004 );
13005 rel.functional_currency = cc.currency.clone();
13006 rel
13007 })
13008 .collect();
13009
13010 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
13011 builder.add_companies(&companies);
13012 builder.add_ownership_relationships(&relationships);
13013
13014 for pair in &intercompany.matched_pairs {
13016 builder.add_intercompany_edge(
13017 &pair.seller_company,
13018 &pair.buyer_company,
13019 pair.amount,
13020 &format!("{:?}", pair.transaction_type),
13021 );
13022 }
13023
13024 let graph = builder.build();
13025 let node_count = graph.node_count();
13026 let edge_count = graph.edge_count();
13027 stats.graph_node_count += node_count;
13028 stats.graph_edge_count += edge_count;
13029
13030 for format in &self.config.graph_export.formats {
13032 if matches!(
13033 format,
13034 datasynth_config::schema::GraphExportFormat::PytorchGeometric
13035 ) {
13036 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
13037 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13038 warn!("Failed to create entity graph output dir: {}", e);
13039 continue;
13040 }
13041 let pyg_config = PyGExportConfig::default();
13042 let exporter = PyGExporter::new(pyg_config);
13043 if let Err(e) = exporter.export(&graph, &format_dir) {
13044 warn!("Failed to export entity graph as PyG: {}", e);
13045 } else {
13046 info!(
13047 "Entity relationship graph exported: {} nodes, {} edges",
13048 node_count, edge_count
13049 );
13050 }
13051 }
13052 }
13053 } else {
13054 debug!(
13055 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
13056 self.config.companies.len()
13057 );
13058 }
13059 }
13060
13061 #[allow(clippy::too_many_arguments)]
13068 fn export_hypergraph(
13069 &self,
13070 coa: &Arc<ChartOfAccounts>,
13071 entries: &[JournalEntry],
13072 document_flows: &DocumentFlowSnapshot,
13073 sourcing: &SourcingSnapshot,
13074 hr: &HrSnapshot,
13075 manufacturing: &ManufacturingSnapshot,
13076 banking: &BankingSnapshot,
13077 audit: &AuditSnapshot,
13078 financial_reporting: &FinancialReportingSnapshot,
13079 ocpm: &OcpmSnapshot,
13080 compliance: &ComplianceRegulationsSnapshot,
13081 stats: &mut EnhancedGenerationStatistics,
13082 ) -> SynthResult<HypergraphExportInfo> {
13083 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
13084 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
13085 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
13086 use datasynth_graph::models::hypergraph::AggregationStrategy;
13087
13088 let hg_settings = &self.config.graph_export.hypergraph;
13089
13090 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
13092 "truncate" => AggregationStrategy::Truncate,
13093 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
13094 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
13095 "importance_sample" => AggregationStrategy::ImportanceSample,
13096 _ => AggregationStrategy::PoolByCounterparty,
13097 };
13098
13099 let builder_config = HypergraphConfig {
13100 max_nodes: hg_settings.max_nodes,
13101 aggregation_strategy,
13102 include_coso: hg_settings.governance_layer.include_coso,
13103 include_controls: hg_settings.governance_layer.include_controls,
13104 include_sox: hg_settings.governance_layer.include_sox,
13105 include_vendors: hg_settings.governance_layer.include_vendors,
13106 include_customers: hg_settings.governance_layer.include_customers,
13107 include_employees: hg_settings.governance_layer.include_employees,
13108 include_p2p: hg_settings.process_layer.include_p2p,
13109 include_o2c: hg_settings.process_layer.include_o2c,
13110 include_s2c: hg_settings.process_layer.include_s2c,
13111 include_h2r: hg_settings.process_layer.include_h2r,
13112 include_mfg: hg_settings.process_layer.include_mfg,
13113 include_bank: hg_settings.process_layer.include_bank,
13114 include_audit: hg_settings.process_layer.include_audit,
13115 include_r2r: hg_settings.process_layer.include_r2r,
13116 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
13117 docs_per_counterparty_threshold: hg_settings
13118 .process_layer
13119 .docs_per_counterparty_threshold,
13120 include_accounts: hg_settings.accounting_layer.include_accounts,
13121 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
13122 include_cross_layer_edges: hg_settings.cross_layer.enabled,
13123 include_compliance: self.config.compliance_regulations.enabled,
13124 include_tax: true,
13125 include_treasury: true,
13126 include_esg: true,
13127 include_project: true,
13128 include_intercompany: true,
13129 include_temporal_events: true,
13130 };
13131
13132 let mut builder = HypergraphBuilder::new(builder_config);
13133
13134 builder.add_coso_framework();
13136
13137 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
13140 let controls = InternalControl::standard_controls();
13141 builder.add_controls(&controls);
13142 }
13143
13144 builder.add_vendors(&self.master_data.vendors);
13146 builder.add_customers(&self.master_data.customers);
13147 builder.add_employees(&self.master_data.employees);
13148
13149 builder.add_p2p_documents(
13151 &document_flows.purchase_orders,
13152 &document_flows.goods_receipts,
13153 &document_flows.vendor_invoices,
13154 &document_flows.payments,
13155 );
13156 builder.add_o2c_documents(
13157 &document_flows.sales_orders,
13158 &document_flows.deliveries,
13159 &document_flows.customer_invoices,
13160 );
13161 builder.add_s2c_documents(
13162 &sourcing.sourcing_projects,
13163 &sourcing.qualifications,
13164 &sourcing.rfx_events,
13165 &sourcing.bids,
13166 &sourcing.bid_evaluations,
13167 &sourcing.contracts,
13168 );
13169 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
13170 builder.add_mfg_documents(
13171 &manufacturing.production_orders,
13172 &manufacturing.quality_inspections,
13173 &manufacturing.cycle_counts,
13174 );
13175 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
13176 builder.add_audit_documents(
13177 &audit.engagements,
13178 &audit.workpapers,
13179 &audit.findings,
13180 &audit.evidence,
13181 &audit.risk_assessments,
13182 &audit.judgments,
13183 &audit.materiality_calculations,
13184 &audit.audit_opinions,
13185 &audit.going_concern_assessments,
13186 );
13187 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
13188
13189 if let Some(ref event_log) = ocpm.event_log {
13191 builder.add_ocpm_events(event_log);
13192 }
13193
13194 if self.config.compliance_regulations.enabled
13196 && hg_settings.governance_layer.include_controls
13197 {
13198 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13200 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
13201 .standard_records
13202 .iter()
13203 .filter_map(|r| {
13204 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
13205 registry.get(&sid).cloned()
13206 })
13207 .collect();
13208
13209 builder.add_compliance_regulations(
13210 &standards,
13211 &compliance.findings,
13212 &compliance.filings,
13213 );
13214 }
13215
13216 builder.add_accounts(coa);
13218 builder.add_journal_entries_as_hyperedges(entries);
13219
13220 let hypergraph = builder.build();
13222
13223 let output_dir = self
13225 .output_path
13226 .clone()
13227 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13228 let hg_dir = output_dir
13229 .join(&self.config.graph_export.output_subdirectory)
13230 .join(&hg_settings.output_subdirectory);
13231
13232 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
13234 "unified" => {
13235 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13236 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13237 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
13238 })?;
13239 (
13240 metadata.num_nodes,
13241 metadata.num_edges,
13242 metadata.num_hyperedges,
13243 )
13244 }
13245 _ => {
13246 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
13248 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13249 SynthError::generation(format!("Hypergraph export failed: {e}"))
13250 })?;
13251 (
13252 metadata.num_nodes,
13253 metadata.num_edges,
13254 metadata.num_hyperedges,
13255 )
13256 }
13257 };
13258
13259 #[cfg(feature = "streaming")]
13261 if let Some(ref target_url) = hg_settings.stream_target {
13262 use crate::stream_client::{StreamClient, StreamConfig};
13263 use std::io::Write as _;
13264
13265 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
13266 let stream_config = StreamConfig {
13267 target_url: target_url.clone(),
13268 batch_size: hg_settings.stream_batch_size,
13269 api_key,
13270 ..StreamConfig::default()
13271 };
13272
13273 match StreamClient::new(stream_config) {
13274 Ok(mut client) => {
13275 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13276 match exporter.export_to_writer(&hypergraph, &mut client) {
13277 Ok(_) => {
13278 if let Err(e) = client.flush() {
13279 warn!("Failed to flush stream client: {}", e);
13280 } else {
13281 info!("Streamed {} records to {}", client.total_sent(), target_url);
13282 }
13283 }
13284 Err(e) => {
13285 warn!("Streaming export failed: {}", e);
13286 }
13287 }
13288 }
13289 Err(e) => {
13290 warn!("Failed to create stream client: {}", e);
13291 }
13292 }
13293 }
13294
13295 stats.graph_node_count += num_nodes;
13297 stats.graph_edge_count += num_edges;
13298 stats.graph_export_count += 1;
13299
13300 Ok(HypergraphExportInfo {
13301 node_count: num_nodes,
13302 edge_count: num_edges,
13303 hyperedge_count: num_hyperedges,
13304 output_path: hg_dir,
13305 })
13306 }
13307
13308 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
13313 let pb = self.create_progress_bar(100, "Generating Banking Data");
13314
13315 let orchestrator = BankingOrchestratorBuilder::new()
13317 .config(self.config.banking.clone())
13318 .seed(self.seed + 9000)
13319 .country_pack(self.primary_pack().clone())
13320 .build();
13321
13322 if let Some(pb) = &pb {
13323 pb.inc(10);
13324 }
13325
13326 let result = orchestrator.generate();
13328
13329 if let Some(pb) = &pb {
13330 pb.inc(90);
13331 pb.finish_with_message(format!(
13332 "Banking: {} customers, {} transactions",
13333 result.customers.len(),
13334 result.transactions.len()
13335 ));
13336 }
13337
13338 let mut banking_customers = result.customers;
13343 let core_customers = &self.master_data.customers;
13344 if !core_customers.is_empty() {
13345 for (i, bc) in banking_customers.iter_mut().enumerate() {
13346 let core = &core_customers[i % core_customers.len()];
13347 bc.name = CustomerName::business(&core.name);
13348 bc.residence_country = core.country.clone();
13349 bc.enterprise_customer_id = Some(core.customer_id.clone());
13350 }
13351 debug!(
13352 "Cross-referenced {} banking customers with {} core customers",
13353 banking_customers.len(),
13354 core_customers.len()
13355 );
13356 }
13357
13358 Ok(BankingSnapshot {
13359 customers: banking_customers,
13360 accounts: result.accounts,
13361 transactions: result.transactions,
13362 transaction_labels: result.transaction_labels,
13363 customer_labels: result.customer_labels,
13364 account_labels: result.account_labels,
13365 relationship_labels: result.relationship_labels,
13366 narratives: result.narratives,
13367 suspicious_count: result.stats.suspicious_count,
13368 scenario_count: result.scenarios.len(),
13369 })
13370 }
13371
13372 fn calculate_total_transactions(&self) -> u64 {
13374 let months = self.config.global.period_months as f64;
13375 self.config
13376 .companies
13377 .iter()
13378 .map(|c| {
13379 let annual = c.annual_transaction_volume.count() as f64;
13380 let weighted = annual * c.volume_weight;
13381 (weighted * months / 12.0) as u64
13382 })
13383 .sum()
13384 }
13385
13386 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13388 if !self.phase_config.show_progress {
13389 return None;
13390 }
13391
13392 let pb = if let Some(mp) = &self.multi_progress {
13393 mp.add(ProgressBar::new(total))
13394 } else {
13395 ProgressBar::new(total)
13396 };
13397
13398 pb.set_style(
13399 ProgressStyle::default_bar()
13400 .template(&format!(
13401 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13402 ))
13403 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13404 .progress_chars("#>-"),
13405 );
13406
13407 Some(pb)
13408 }
13409
13410 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13412 self.coa.clone()
13413 }
13414
13415 pub fn get_master_data(&self) -> &MasterDataSnapshot {
13417 &self.master_data
13418 }
13419
13420 fn phase_compliance_regulations(
13422 &mut self,
13423 _stats: &mut EnhancedGenerationStatistics,
13424 ) -> SynthResult<ComplianceRegulationsSnapshot> {
13425 if !self.phase_config.generate_compliance_regulations {
13426 return Ok(ComplianceRegulationsSnapshot::default());
13427 }
13428
13429 info!("Phase: Generating Compliance Regulations Data");
13430
13431 let cr_config = &self.config.compliance_regulations;
13432
13433 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13435 self.config
13436 .companies
13437 .iter()
13438 .map(|c| c.country.clone())
13439 .collect::<std::collections::HashSet<_>>()
13440 .into_iter()
13441 .collect()
13442 } else {
13443 cr_config.jurisdictions.clone()
13444 };
13445
13446 let fallback_date =
13448 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13449 let reference_date = cr_config
13450 .reference_date
13451 .as_ref()
13452 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13453 .unwrap_or_else(|| {
13454 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13455 .unwrap_or(fallback_date)
13456 });
13457
13458 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13460 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13461 let cross_reference_records = reg_gen.generate_cross_reference_records();
13462 let jurisdiction_records =
13463 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13464
13465 info!(
13466 " Standards: {} records, {} cross-references, {} jurisdictions",
13467 standard_records.len(),
13468 cross_reference_records.len(),
13469 jurisdiction_records.len()
13470 );
13471
13472 let audit_procedures = if cr_config.audit_procedures.enabled {
13474 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13475 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13476 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13477 confidence_level: cr_config.audit_procedures.confidence_level,
13478 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13479 };
13480 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13481 self.seed + 9000,
13482 proc_config,
13483 );
13484 let registry = reg_gen.registry();
13485 let mut all_procs = Vec::new();
13486 for jurisdiction in &jurisdictions {
13487 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13488 all_procs.extend(procs);
13489 }
13490 info!(" Audit procedures: {}", all_procs.len());
13491 all_procs
13492 } else {
13493 Vec::new()
13494 };
13495
13496 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13498 let finding_config =
13499 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13500 finding_rate: cr_config.findings.finding_rate,
13501 material_weakness_rate: cr_config.findings.material_weakness_rate,
13502 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13503 generate_remediation: cr_config.findings.generate_remediation,
13504 };
13505 let mut finding_gen =
13506 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13507 self.seed + 9100,
13508 finding_config,
13509 );
13510 let mut all_findings = Vec::new();
13511 for company in &self.config.companies {
13512 let company_findings =
13513 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13514 all_findings.extend(company_findings);
13515 }
13516 info!(" Compliance findings: {}", all_findings.len());
13517 all_findings
13518 } else {
13519 Vec::new()
13520 };
13521
13522 let filings = if cr_config.filings.enabled {
13524 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13525 filing_types: cr_config.filings.filing_types.clone(),
13526 generate_status_progression: cr_config.filings.generate_status_progression,
13527 };
13528 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13529 self.seed + 9200,
13530 filing_config,
13531 );
13532 let company_codes: Vec<String> = self
13533 .config
13534 .companies
13535 .iter()
13536 .map(|c| c.code.clone())
13537 .collect();
13538 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13539 .unwrap_or(fallback_date);
13540 let filings = filing_gen.generate_filings(
13541 &company_codes,
13542 &jurisdictions,
13543 start_date,
13544 self.config.global.period_months,
13545 );
13546 info!(" Regulatory filings: {}", filings.len());
13547 filings
13548 } else {
13549 Vec::new()
13550 };
13551
13552 let compliance_graph = if cr_config.graph.enabled {
13554 let graph_config = datasynth_graph::ComplianceGraphConfig {
13555 include_standard_nodes: cr_config.graph.include_compliance_nodes,
13556 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13557 include_cross_references: cr_config.graph.include_cross_references,
13558 include_supersession_edges: cr_config.graph.include_supersession_edges,
13559 include_account_links: cr_config.graph.include_account_links,
13560 include_control_links: cr_config.graph.include_control_links,
13561 include_company_links: cr_config.graph.include_company_links,
13562 };
13563 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13564
13565 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13567 .iter()
13568 .map(|r| datasynth_graph::StandardNodeInput {
13569 standard_id: r.standard_id.clone(),
13570 title: r.title.clone(),
13571 category: r.category.clone(),
13572 domain: r.domain.clone(),
13573 is_active: r.is_active,
13574 features: vec![if r.is_active { 1.0 } else { 0.0 }],
13575 applicable_account_types: r.applicable_account_types.clone(),
13576 applicable_processes: r.applicable_processes.clone(),
13577 })
13578 .collect();
13579 builder.add_standards(&standard_inputs);
13580
13581 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13583 jurisdiction_records
13584 .iter()
13585 .map(|r| datasynth_graph::JurisdictionNodeInput {
13586 country_code: r.country_code.clone(),
13587 country_name: r.country_name.clone(),
13588 framework: r.accounting_framework.clone(),
13589 standard_count: r.standard_count,
13590 tax_rate: r.statutory_tax_rate,
13591 })
13592 .collect();
13593 builder.add_jurisdictions(&jurisdiction_inputs);
13594
13595 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13597 cross_reference_records
13598 .iter()
13599 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13600 from_standard: r.from_standard.clone(),
13601 to_standard: r.to_standard.clone(),
13602 relationship: r.relationship.clone(),
13603 convergence_level: r.convergence_level,
13604 })
13605 .collect();
13606 builder.add_cross_references(&xref_inputs);
13607
13608 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13610 .iter()
13611 .map(|r| datasynth_graph::JurisdictionMappingInput {
13612 country_code: r.jurisdiction.clone(),
13613 standard_id: r.standard_id.clone(),
13614 })
13615 .collect();
13616 builder.add_jurisdiction_mappings(&mapping_inputs);
13617
13618 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13620 .iter()
13621 .map(|p| datasynth_graph::ProcedureNodeInput {
13622 procedure_id: p.procedure_id.clone(),
13623 standard_id: p.standard_id.clone(),
13624 procedure_type: p.procedure_type.clone(),
13625 sample_size: p.sample_size,
13626 confidence_level: p.confidence_level,
13627 })
13628 .collect();
13629 builder.add_procedures(&proc_inputs);
13630
13631 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13633 .iter()
13634 .map(|f| datasynth_graph::FindingNodeInput {
13635 finding_id: f.finding_id.to_string(),
13636 standard_id: f
13637 .related_standards
13638 .first()
13639 .map(|s| s.as_str().to_string())
13640 .unwrap_or_default(),
13641 severity: f.severity.to_string(),
13642 deficiency_level: f.deficiency_level.to_string(),
13643 severity_score: f.deficiency_level.severity_score(),
13644 control_id: f.control_id.clone(),
13645 affected_accounts: f.affected_accounts.clone(),
13646 })
13647 .collect();
13648 builder.add_findings(&finding_inputs);
13649
13650 if cr_config.graph.include_account_links {
13652 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13653 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13654 for std_record in &standard_records {
13655 if let Some(std_obj) =
13656 registry.get(&datasynth_core::models::compliance::StandardId::parse(
13657 &std_record.standard_id,
13658 ))
13659 {
13660 for acct_type in &std_obj.applicable_account_types {
13661 account_links.push(datasynth_graph::AccountLinkInput {
13662 standard_id: std_record.standard_id.clone(),
13663 account_code: acct_type.clone(),
13664 account_name: acct_type.clone(),
13665 });
13666 }
13667 }
13668 }
13669 builder.add_account_links(&account_links);
13670 }
13671
13672 if cr_config.graph.include_control_links {
13674 let mut control_links = Vec::new();
13675 let sox_like_ids: Vec<String> = standard_records
13677 .iter()
13678 .filter(|r| {
13679 r.standard_id.starts_with("SOX")
13680 || r.standard_id.starts_with("PCAOB-AS-2201")
13681 })
13682 .map(|r| r.standard_id.clone())
13683 .collect();
13684 let control_ids = [
13686 ("C001", "Cash Controls"),
13687 ("C002", "Large Transaction Approval"),
13688 ("C010", "PO Approval"),
13689 ("C011", "Three-Way Match"),
13690 ("C020", "Revenue Recognition"),
13691 ("C021", "Credit Check"),
13692 ("C030", "Manual JE Approval"),
13693 ("C031", "Period Close Review"),
13694 ("C032", "Account Reconciliation"),
13695 ("C040", "Payroll Processing"),
13696 ("C050", "Fixed Asset Capitalization"),
13697 ("C060", "Intercompany Elimination"),
13698 ];
13699 for sox_id in &sox_like_ids {
13700 for (ctrl_id, ctrl_name) in &control_ids {
13701 control_links.push(datasynth_graph::ControlLinkInput {
13702 standard_id: sox_id.clone(),
13703 control_id: ctrl_id.to_string(),
13704 control_name: ctrl_name.to_string(),
13705 });
13706 }
13707 }
13708 builder.add_control_links(&control_links);
13709 }
13710
13711 if cr_config.graph.include_company_links {
13713 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13714 .iter()
13715 .enumerate()
13716 .map(|(i, f)| datasynth_graph::FilingNodeInput {
13717 filing_id: format!("F{:04}", i + 1),
13718 filing_type: f.filing_type.to_string(),
13719 company_code: f.company_code.clone(),
13720 jurisdiction: f.jurisdiction.clone(),
13721 status: format!("{:?}", f.status),
13722 })
13723 .collect();
13724 builder.add_filings(&filing_inputs);
13725 }
13726
13727 let graph = builder.build();
13728 info!(
13729 " Compliance graph: {} nodes, {} edges",
13730 graph.nodes.len(),
13731 graph.edges.len()
13732 );
13733 Some(graph)
13734 } else {
13735 None
13736 };
13737
13738 self.check_resources_with_log("post-compliance-regulations")?;
13739
13740 Ok(ComplianceRegulationsSnapshot {
13741 standard_records,
13742 cross_reference_records,
13743 jurisdiction_records,
13744 audit_procedures,
13745 findings,
13746 filings,
13747 compliance_graph,
13748 })
13749 }
13750
13751 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13753 use super::lineage::LineageGraphBuilder;
13754
13755 let mut builder = LineageGraphBuilder::new();
13756
13757 builder.add_config_section("config:global", "Global Config");
13759 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13760 builder.add_config_section("config:transactions", "Transaction Config");
13761
13762 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13764 builder.add_generator_phase("phase:je", "Journal Entry Generation");
13765
13766 builder.configured_by("phase:coa", "config:chart_of_accounts");
13768 builder.configured_by("phase:je", "config:transactions");
13769
13770 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13772 builder.produced_by("output:je", "phase:je");
13773
13774 if self.phase_config.generate_master_data {
13776 builder.add_config_section("config:master_data", "Master Data Config");
13777 builder.add_generator_phase("phase:master_data", "Master Data Generation");
13778 builder.configured_by("phase:master_data", "config:master_data");
13779 builder.input_to("phase:master_data", "phase:je");
13780 }
13781
13782 if self.phase_config.generate_document_flows {
13783 builder.add_config_section("config:document_flows", "Document Flow Config");
13784 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13785 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13786 builder.configured_by("phase:p2p", "config:document_flows");
13787 builder.configured_by("phase:o2c", "config:document_flows");
13788
13789 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13790 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13791 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13792 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13793 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13794
13795 builder.produced_by("output:po", "phase:p2p");
13796 builder.produced_by("output:gr", "phase:p2p");
13797 builder.produced_by("output:vi", "phase:p2p");
13798 builder.produced_by("output:so", "phase:o2c");
13799 builder.produced_by("output:ci", "phase:o2c");
13800 }
13801
13802 if self.phase_config.inject_anomalies {
13803 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13804 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13805 builder.configured_by("phase:anomaly", "config:fraud");
13806 builder.add_output_file(
13807 "output:labels",
13808 "Anomaly Labels",
13809 "labels/anomaly_labels.csv",
13810 );
13811 builder.produced_by("output:labels", "phase:anomaly");
13812 }
13813
13814 if self.phase_config.generate_audit {
13815 builder.add_config_section("config:audit", "Audit Config");
13816 builder.add_generator_phase("phase:audit", "Audit Data Generation");
13817 builder.configured_by("phase:audit", "config:audit");
13818 }
13819
13820 if self.phase_config.generate_banking {
13821 builder.add_config_section("config:banking", "Banking Config");
13822 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13823 builder.configured_by("phase:banking", "config:banking");
13824 }
13825
13826 if self.config.llm.enabled {
13827 builder.add_config_section("config:llm", "LLM Enrichment Config");
13828 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13829 builder.configured_by("phase:llm_enrichment", "config:llm");
13830 }
13831
13832 if self.config.diffusion.enabled {
13833 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13834 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13835 builder.configured_by("phase:diffusion", "config:diffusion");
13836 }
13837
13838 if self.config.causal.enabled {
13839 builder.add_config_section("config:causal", "Causal Generation Config");
13840 builder.add_generator_phase("phase:causal", "Causal Overlay");
13841 builder.configured_by("phase:causal", "config:causal");
13842 }
13843
13844 builder.build()
13845 }
13846
13847 fn compute_company_revenue(
13856 entries: &[JournalEntry],
13857 company_code: &str,
13858 ) -> rust_decimal::Decimal {
13859 use rust_decimal::Decimal;
13860 let mut revenue = Decimal::ZERO;
13861 for je in entries {
13862 if je.header.company_code != company_code {
13863 continue;
13864 }
13865 for line in &je.lines {
13866 if line.gl_account.starts_with('4') {
13867 revenue += line.credit_amount - line.debit_amount;
13869 }
13870 }
13871 }
13872 revenue.max(Decimal::ZERO)
13873 }
13874
13875 fn compute_entity_net_assets(
13879 entries: &[JournalEntry],
13880 entity_code: &str,
13881 ) -> rust_decimal::Decimal {
13882 use rust_decimal::Decimal;
13883 let mut asset_net = Decimal::ZERO;
13884 let mut liability_net = Decimal::ZERO;
13885 for je in entries {
13886 if je.header.company_code != entity_code {
13887 continue;
13888 }
13889 for line in &je.lines {
13890 if line.gl_account.starts_with('1') {
13891 asset_net += line.debit_amount - line.credit_amount;
13892 } else if line.gl_account.starts_with('2') {
13893 liability_net += line.credit_amount - line.debit_amount;
13894 }
13895 }
13896 }
13897 asset_net - liability_net
13898 }
13899}
13900
13901fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13903 match format {
13904 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13905 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13906 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13907 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13908 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13909 }
13910}
13911
13912fn compute_trial_balance_entries(
13917 entries: &[JournalEntry],
13918 entity_code: &str,
13919 fiscal_year: i32,
13920 coa: Option<&ChartOfAccounts>,
13921) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13922 use std::collections::BTreeMap;
13923
13924 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13925 BTreeMap::new();
13926
13927 for je in entries {
13928 for line in &je.lines {
13929 let entry = balances.entry(line.account_code.clone()).or_default();
13930 entry.0 += line.debit_amount;
13931 entry.1 += line.credit_amount;
13932 }
13933 }
13934
13935 balances
13936 .into_iter()
13937 .map(
13938 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13939 account_description: coa
13940 .and_then(|c| c.get_account(&account_code))
13941 .map(|a| a.description().to_string())
13942 .unwrap_or_else(|| account_code.clone()),
13943 account_code,
13944 debit_balance: debit,
13945 credit_balance: credit,
13946 net_balance: debit - credit,
13947 entity_code: entity_code.to_string(),
13948 period: format!("FY{}", fiscal_year),
13949 },
13950 )
13951 .collect()
13952}
13953
13954#[cfg(test)]
13955#[allow(clippy::unwrap_used)]
13956mod tests {
13957 use super::*;
13958 use datasynth_config::schema::*;
13959
13960 fn create_test_config() -> GeneratorConfig {
13961 GeneratorConfig {
13962 global: GlobalConfig {
13963 industry: IndustrySector::Manufacturing,
13964 start_date: "2024-01-01".to_string(),
13965 period_months: 1,
13966 seed: Some(42),
13967 parallel: false,
13968 group_currency: "USD".to_string(),
13969 presentation_currency: None,
13970 worker_threads: 0,
13971 memory_limit_mb: 0,
13972 fiscal_year_months: None,
13973 },
13974 companies: vec![CompanyConfig {
13975 code: "1000".to_string(),
13976 name: "Test Company".to_string(),
13977 currency: "USD".to_string(),
13978 functional_currency: None,
13979 country: "US".to_string(),
13980 annual_transaction_volume: TransactionVolume::TenK,
13981 volume_weight: 1.0,
13982 fiscal_year_variant: "K4".to_string(),
13983 }],
13984 chart_of_accounts: ChartOfAccountsConfig {
13985 complexity: CoAComplexity::Small,
13986 industry_specific: true,
13987 custom_accounts: None,
13988 min_hierarchy_depth: 2,
13989 max_hierarchy_depth: 4,
13990 },
13991 transactions: TransactionConfig::default(),
13992 output: OutputConfig::default(),
13993 fraud: FraudConfig::default(),
13994 internal_controls: InternalControlsConfig::default(),
13995 business_processes: BusinessProcessConfig::default(),
13996 user_personas: UserPersonaConfig::default(),
13997 templates: TemplateConfig::default(),
13998 approval: ApprovalConfig::default(),
13999 departments: DepartmentConfig::default(),
14000 master_data: MasterDataConfig::default(),
14001 document_flows: DocumentFlowConfig::default(),
14002 intercompany: IntercompanyConfig::default(),
14003 balance: BalanceConfig::default(),
14004 ocpm: OcpmConfig::default(),
14005 audit: AuditGenerationConfig::default(),
14006 banking: datasynth_banking::BankingConfig::default(),
14007 data_quality: DataQualitySchemaConfig::default(),
14008 scenario: ScenarioConfig::default(),
14009 temporal: TemporalDriftConfig::default(),
14010 graph_export: GraphExportConfig::default(),
14011 streaming: StreamingSchemaConfig::default(),
14012 rate_limit: RateLimitSchemaConfig::default(),
14013 temporal_attributes: TemporalAttributeSchemaConfig::default(),
14014 relationships: RelationshipSchemaConfig::default(),
14015 accounting_standards: AccountingStandardsConfig::default(),
14016 audit_standards: AuditStandardsConfig::default(),
14017 distributions: Default::default(),
14018 temporal_patterns: Default::default(),
14019 vendor_network: VendorNetworkSchemaConfig::default(),
14020 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
14021 relationship_strength: RelationshipStrengthSchemaConfig::default(),
14022 cross_process_links: CrossProcessLinksSchemaConfig::default(),
14023 organizational_events: OrganizationalEventsSchemaConfig::default(),
14024 behavioral_drift: BehavioralDriftSchemaConfig::default(),
14025 market_drift: MarketDriftSchemaConfig::default(),
14026 drift_labeling: DriftLabelingSchemaConfig::default(),
14027 anomaly_injection: Default::default(),
14028 industry_specific: Default::default(),
14029 fingerprint_privacy: Default::default(),
14030 quality_gates: Default::default(),
14031 compliance: Default::default(),
14032 webhooks: Default::default(),
14033 llm: Default::default(),
14034 diffusion: Default::default(),
14035 causal: Default::default(),
14036 source_to_pay: Default::default(),
14037 financial_reporting: Default::default(),
14038 hr: Default::default(),
14039 manufacturing: Default::default(),
14040 sales_quotes: Default::default(),
14041 tax: Default::default(),
14042 treasury: Default::default(),
14043 project_accounting: Default::default(),
14044 esg: Default::default(),
14045 country_packs: None,
14046 scenarios: Default::default(),
14047 session: Default::default(),
14048 compliance_regulations: Default::default(),
14049 }
14050 }
14051
14052 #[test]
14053 fn test_enhanced_orchestrator_creation() {
14054 let config = create_test_config();
14055 let orchestrator = EnhancedOrchestrator::with_defaults(config);
14056 assert!(orchestrator.is_ok());
14057 }
14058
14059 #[test]
14060 fn test_minimal_generation() {
14061 let config = create_test_config();
14062 let phase_config = PhaseConfig {
14063 generate_master_data: false,
14064 generate_document_flows: false,
14065 generate_journal_entries: true,
14066 inject_anomalies: false,
14067 show_progress: false,
14068 ..Default::default()
14069 };
14070
14071 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14072 let result = orchestrator.generate();
14073
14074 assert!(result.is_ok());
14075 let result = result.unwrap();
14076 assert!(!result.journal_entries.is_empty());
14077 }
14078
14079 #[test]
14080 fn test_master_data_generation() {
14081 let config = create_test_config();
14082 let phase_config = PhaseConfig {
14083 generate_master_data: true,
14084 generate_document_flows: false,
14085 generate_journal_entries: false,
14086 inject_anomalies: false,
14087 show_progress: false,
14088 vendors_per_company: 5,
14089 customers_per_company: 5,
14090 materials_per_company: 10,
14091 assets_per_company: 5,
14092 employees_per_company: 10,
14093 ..Default::default()
14094 };
14095
14096 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14097 let result = orchestrator.generate().unwrap();
14098
14099 assert!(!result.master_data.vendors.is_empty());
14100 assert!(!result.master_data.customers.is_empty());
14101 assert!(!result.master_data.materials.is_empty());
14102 }
14103
14104 #[test]
14105 fn test_document_flow_generation() {
14106 let config = create_test_config();
14107 let phase_config = PhaseConfig {
14108 generate_master_data: true,
14109 generate_document_flows: true,
14110 generate_journal_entries: false,
14111 inject_anomalies: false,
14112 inject_data_quality: false,
14113 validate_balances: false,
14114 generate_ocpm_events: false,
14115 show_progress: false,
14116 vendors_per_company: 5,
14117 customers_per_company: 5,
14118 materials_per_company: 10,
14119 assets_per_company: 5,
14120 employees_per_company: 10,
14121 p2p_chains: 5,
14122 o2c_chains: 5,
14123 ..Default::default()
14124 };
14125
14126 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14127 let result = orchestrator.generate().unwrap();
14128
14129 assert!(!result.document_flows.p2p_chains.is_empty());
14131 assert!(!result.document_flows.o2c_chains.is_empty());
14132
14133 assert!(!result.document_flows.purchase_orders.is_empty());
14135 assert!(!result.document_flows.sales_orders.is_empty());
14136 }
14137
14138 #[test]
14139 fn test_anomaly_injection() {
14140 let config = create_test_config();
14141 let phase_config = PhaseConfig {
14142 generate_master_data: false,
14143 generate_document_flows: false,
14144 generate_journal_entries: true,
14145 inject_anomalies: true,
14146 show_progress: false,
14147 ..Default::default()
14148 };
14149
14150 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14151 let result = orchestrator.generate().unwrap();
14152
14153 assert!(!result.journal_entries.is_empty());
14155
14156 assert!(result.anomaly_labels.summary.is_some());
14159 }
14160
14161 #[test]
14162 fn test_full_generation_pipeline() {
14163 let config = create_test_config();
14164 let phase_config = PhaseConfig {
14165 generate_master_data: true,
14166 generate_document_flows: true,
14167 generate_journal_entries: true,
14168 inject_anomalies: false,
14169 inject_data_quality: false,
14170 validate_balances: true,
14171 generate_ocpm_events: false,
14172 show_progress: false,
14173 vendors_per_company: 3,
14174 customers_per_company: 3,
14175 materials_per_company: 5,
14176 assets_per_company: 3,
14177 employees_per_company: 5,
14178 p2p_chains: 3,
14179 o2c_chains: 3,
14180 ..Default::default()
14181 };
14182
14183 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14184 let result = orchestrator.generate().unwrap();
14185
14186 assert!(!result.master_data.vendors.is_empty());
14188 assert!(!result.master_data.customers.is_empty());
14189 assert!(!result.document_flows.p2p_chains.is_empty());
14190 assert!(!result.document_flows.o2c_chains.is_empty());
14191 assert!(!result.journal_entries.is_empty());
14192 assert!(result.statistics.accounts_count > 0);
14193
14194 assert!(!result.subledger.ap_invoices.is_empty());
14196 assert!(!result.subledger.ar_invoices.is_empty());
14197
14198 assert!(result.balance_validation.validated);
14200 assert!(result.balance_validation.entries_processed > 0);
14201 }
14202
14203 #[test]
14204 fn test_subledger_linking() {
14205 let config = create_test_config();
14206 let phase_config = PhaseConfig {
14207 generate_master_data: true,
14208 generate_document_flows: true,
14209 generate_journal_entries: false,
14210 inject_anomalies: false,
14211 inject_data_quality: false,
14212 validate_balances: false,
14213 generate_ocpm_events: false,
14214 show_progress: false,
14215 vendors_per_company: 5,
14216 customers_per_company: 5,
14217 materials_per_company: 10,
14218 assets_per_company: 3,
14219 employees_per_company: 5,
14220 p2p_chains: 5,
14221 o2c_chains: 5,
14222 ..Default::default()
14223 };
14224
14225 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14226 let result = orchestrator.generate().unwrap();
14227
14228 assert!(!result.document_flows.vendor_invoices.is_empty());
14230 assert!(!result.document_flows.customer_invoices.is_empty());
14231
14232 assert!(!result.subledger.ap_invoices.is_empty());
14234 assert!(!result.subledger.ar_invoices.is_empty());
14235
14236 assert_eq!(
14238 result.subledger.ap_invoices.len(),
14239 result.document_flows.vendor_invoices.len()
14240 );
14241
14242 assert_eq!(
14244 result.subledger.ar_invoices.len(),
14245 result.document_flows.customer_invoices.len()
14246 );
14247
14248 assert_eq!(
14250 result.statistics.ap_invoice_count,
14251 result.subledger.ap_invoices.len()
14252 );
14253 assert_eq!(
14254 result.statistics.ar_invoice_count,
14255 result.subledger.ar_invoices.len()
14256 );
14257 }
14258
14259 #[test]
14260 fn test_balance_validation() {
14261 let config = create_test_config();
14262 let phase_config = PhaseConfig {
14263 generate_master_data: false,
14264 generate_document_flows: false,
14265 generate_journal_entries: true,
14266 inject_anomalies: false,
14267 validate_balances: true,
14268 show_progress: false,
14269 ..Default::default()
14270 };
14271
14272 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14273 let result = orchestrator.generate().unwrap();
14274
14275 assert!(result.balance_validation.validated);
14277 assert!(result.balance_validation.entries_processed > 0);
14278
14279 assert!(!result.balance_validation.has_unbalanced_entries);
14281
14282 assert_eq!(
14284 result.balance_validation.total_debits,
14285 result.balance_validation.total_credits
14286 );
14287 }
14288
14289 #[test]
14290 fn test_statistics_accuracy() {
14291 let config = create_test_config();
14292 let phase_config = PhaseConfig {
14293 generate_master_data: true,
14294 generate_document_flows: false,
14295 generate_journal_entries: true,
14296 inject_anomalies: false,
14297 show_progress: false,
14298 vendors_per_company: 10,
14299 customers_per_company: 20,
14300 materials_per_company: 15,
14301 assets_per_company: 5,
14302 employees_per_company: 8,
14303 ..Default::default()
14304 };
14305
14306 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14307 let result = orchestrator.generate().unwrap();
14308
14309 assert_eq!(
14311 result.statistics.vendor_count,
14312 result.master_data.vendors.len()
14313 );
14314 assert_eq!(
14315 result.statistics.customer_count,
14316 result.master_data.customers.len()
14317 );
14318 assert_eq!(
14319 result.statistics.material_count,
14320 result.master_data.materials.len()
14321 );
14322 assert_eq!(
14323 result.statistics.total_entries as usize,
14324 result.journal_entries.len()
14325 );
14326 }
14327
14328 #[test]
14329 fn test_phase_config_defaults() {
14330 let config = PhaseConfig::default();
14331 assert!(config.generate_master_data);
14332 assert!(config.generate_document_flows);
14333 assert!(config.generate_journal_entries);
14334 assert!(!config.inject_anomalies);
14335 assert!(config.validate_balances);
14336 assert!(config.show_progress);
14337 assert!(config.vendors_per_company > 0);
14338 assert!(config.customers_per_company > 0);
14339 }
14340
14341 #[test]
14342 fn test_get_coa_before_generation() {
14343 let config = create_test_config();
14344 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
14345
14346 assert!(orchestrator.get_coa().is_none());
14348 }
14349
14350 #[test]
14351 fn test_get_coa_after_generation() {
14352 let config = create_test_config();
14353 let phase_config = PhaseConfig {
14354 generate_master_data: false,
14355 generate_document_flows: false,
14356 generate_journal_entries: true,
14357 inject_anomalies: false,
14358 show_progress: false,
14359 ..Default::default()
14360 };
14361
14362 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14363 let _ = orchestrator.generate().unwrap();
14364
14365 assert!(orchestrator.get_coa().is_some());
14367 }
14368
14369 #[test]
14370 fn test_get_master_data() {
14371 let config = create_test_config();
14372 let phase_config = PhaseConfig {
14373 generate_master_data: true,
14374 generate_document_flows: false,
14375 generate_journal_entries: false,
14376 inject_anomalies: false,
14377 show_progress: false,
14378 vendors_per_company: 5,
14379 customers_per_company: 5,
14380 materials_per_company: 5,
14381 assets_per_company: 5,
14382 employees_per_company: 5,
14383 ..Default::default()
14384 };
14385
14386 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14387 let result = orchestrator.generate().unwrap();
14388
14389 assert!(!result.master_data.vendors.is_empty());
14391 }
14392
14393 #[test]
14394 fn test_with_progress_builder() {
14395 let config = create_test_config();
14396 let orchestrator = EnhancedOrchestrator::with_defaults(config)
14397 .unwrap()
14398 .with_progress(false);
14399
14400 assert!(!orchestrator.phase_config.show_progress);
14402 }
14403
14404 #[test]
14405 fn test_multi_company_generation() {
14406 let mut config = create_test_config();
14407 config.companies.push(CompanyConfig {
14408 code: "2000".to_string(),
14409 name: "Subsidiary".to_string(),
14410 currency: "EUR".to_string(),
14411 functional_currency: None,
14412 country: "DE".to_string(),
14413 annual_transaction_volume: TransactionVolume::TenK,
14414 volume_weight: 0.5,
14415 fiscal_year_variant: "K4".to_string(),
14416 });
14417
14418 let phase_config = PhaseConfig {
14419 generate_master_data: true,
14420 generate_document_flows: false,
14421 generate_journal_entries: true,
14422 inject_anomalies: false,
14423 show_progress: false,
14424 vendors_per_company: 5,
14425 customers_per_company: 5,
14426 materials_per_company: 5,
14427 assets_per_company: 5,
14428 employees_per_company: 5,
14429 ..Default::default()
14430 };
14431
14432 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14433 let result = orchestrator.generate().unwrap();
14434
14435 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
14438 assert!(result.statistics.companies_count == 2);
14439 }
14440
14441 #[test]
14442 fn test_empty_master_data_skips_document_flows() {
14443 let config = create_test_config();
14444 let phase_config = PhaseConfig {
14445 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
14448 inject_anomalies: false,
14449 show_progress: false,
14450 ..Default::default()
14451 };
14452
14453 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14454 let result = orchestrator.generate().unwrap();
14455
14456 assert!(result.document_flows.p2p_chains.is_empty());
14458 assert!(result.document_flows.o2c_chains.is_empty());
14459 }
14460
14461 #[test]
14462 fn test_journal_entry_line_item_count() {
14463 let config = create_test_config();
14464 let phase_config = PhaseConfig {
14465 generate_master_data: false,
14466 generate_document_flows: false,
14467 generate_journal_entries: true,
14468 inject_anomalies: false,
14469 show_progress: false,
14470 ..Default::default()
14471 };
14472
14473 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14474 let result = orchestrator.generate().unwrap();
14475
14476 let calculated_line_items: u64 = result
14478 .journal_entries
14479 .iter()
14480 .map(|e| e.line_count() as u64)
14481 .sum();
14482 assert_eq!(result.statistics.total_line_items, calculated_line_items);
14483 }
14484
14485 #[test]
14486 fn test_audit_generation() {
14487 let config = create_test_config();
14488 let phase_config = PhaseConfig {
14489 generate_master_data: false,
14490 generate_document_flows: false,
14491 generate_journal_entries: true,
14492 inject_anomalies: false,
14493 show_progress: false,
14494 generate_audit: true,
14495 audit_engagements: 2,
14496 workpapers_per_engagement: 5,
14497 evidence_per_workpaper: 2,
14498 risks_per_engagement: 3,
14499 findings_per_engagement: 2,
14500 judgments_per_engagement: 2,
14501 ..Default::default()
14502 };
14503
14504 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14505 let result = orchestrator.generate().unwrap();
14506
14507 assert_eq!(result.audit.engagements.len(), 2);
14509 assert!(!result.audit.workpapers.is_empty());
14510 assert!(!result.audit.evidence.is_empty());
14511 assert!(!result.audit.risk_assessments.is_empty());
14512 assert!(!result.audit.findings.is_empty());
14513 assert!(!result.audit.judgments.is_empty());
14514
14515 assert!(
14517 !result.audit.confirmations.is_empty(),
14518 "ISA 505 confirmations should be generated"
14519 );
14520 assert!(
14521 !result.audit.confirmation_responses.is_empty(),
14522 "ISA 505 confirmation responses should be generated"
14523 );
14524 assert!(
14525 !result.audit.procedure_steps.is_empty(),
14526 "ISA 330 procedure steps should be generated"
14527 );
14528 assert!(
14530 !result.audit.analytical_results.is_empty(),
14531 "ISA 520 analytical procedures should be generated"
14532 );
14533 assert!(
14534 !result.audit.ia_functions.is_empty(),
14535 "ISA 610 IA functions should be generated (one per engagement)"
14536 );
14537 assert!(
14538 !result.audit.related_parties.is_empty(),
14539 "ISA 550 related parties should be generated"
14540 );
14541
14542 assert_eq!(
14544 result.statistics.audit_engagement_count,
14545 result.audit.engagements.len()
14546 );
14547 assert_eq!(
14548 result.statistics.audit_workpaper_count,
14549 result.audit.workpapers.len()
14550 );
14551 assert_eq!(
14552 result.statistics.audit_evidence_count,
14553 result.audit.evidence.len()
14554 );
14555 assert_eq!(
14556 result.statistics.audit_risk_count,
14557 result.audit.risk_assessments.len()
14558 );
14559 assert_eq!(
14560 result.statistics.audit_finding_count,
14561 result.audit.findings.len()
14562 );
14563 assert_eq!(
14564 result.statistics.audit_judgment_count,
14565 result.audit.judgments.len()
14566 );
14567 assert_eq!(
14568 result.statistics.audit_confirmation_count,
14569 result.audit.confirmations.len()
14570 );
14571 assert_eq!(
14572 result.statistics.audit_confirmation_response_count,
14573 result.audit.confirmation_responses.len()
14574 );
14575 assert_eq!(
14576 result.statistics.audit_procedure_step_count,
14577 result.audit.procedure_steps.len()
14578 );
14579 assert_eq!(
14580 result.statistics.audit_sample_count,
14581 result.audit.samples.len()
14582 );
14583 assert_eq!(
14584 result.statistics.audit_analytical_result_count,
14585 result.audit.analytical_results.len()
14586 );
14587 assert_eq!(
14588 result.statistics.audit_ia_function_count,
14589 result.audit.ia_functions.len()
14590 );
14591 assert_eq!(
14592 result.statistics.audit_ia_report_count,
14593 result.audit.ia_reports.len()
14594 );
14595 assert_eq!(
14596 result.statistics.audit_related_party_count,
14597 result.audit.related_parties.len()
14598 );
14599 assert_eq!(
14600 result.statistics.audit_related_party_transaction_count,
14601 result.audit.related_party_transactions.len()
14602 );
14603 }
14604
14605 #[test]
14606 fn test_new_phases_disabled_by_default() {
14607 let config = create_test_config();
14608 assert!(!config.llm.enabled);
14610 assert!(!config.diffusion.enabled);
14611 assert!(!config.causal.enabled);
14612
14613 let phase_config = PhaseConfig {
14614 generate_master_data: false,
14615 generate_document_flows: false,
14616 generate_journal_entries: true,
14617 inject_anomalies: false,
14618 show_progress: false,
14619 ..Default::default()
14620 };
14621
14622 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14623 let result = orchestrator.generate().unwrap();
14624
14625 assert_eq!(result.statistics.llm_enrichment_ms, 0);
14627 assert_eq!(result.statistics.llm_vendors_enriched, 0);
14628 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14629 assert_eq!(result.statistics.diffusion_samples_generated, 0);
14630 assert_eq!(result.statistics.causal_generation_ms, 0);
14631 assert_eq!(result.statistics.causal_samples_generated, 0);
14632 assert!(result.statistics.causal_validation_passed.is_none());
14633 assert_eq!(result.statistics.counterfactual_pair_count, 0);
14634 assert!(result.counterfactual_pairs.is_empty());
14635 }
14636
14637 #[test]
14638 fn test_counterfactual_generation_enabled() {
14639 let config = create_test_config();
14640 let phase_config = PhaseConfig {
14641 generate_master_data: false,
14642 generate_document_flows: false,
14643 generate_journal_entries: true,
14644 inject_anomalies: false,
14645 show_progress: false,
14646 generate_counterfactuals: true,
14647 generate_period_close: false, ..Default::default()
14649 };
14650
14651 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14652 let result = orchestrator.generate().unwrap();
14653
14654 if !result.journal_entries.is_empty() {
14656 assert_eq!(
14657 result.counterfactual_pairs.len(),
14658 result.journal_entries.len()
14659 );
14660 assert_eq!(
14661 result.statistics.counterfactual_pair_count,
14662 result.journal_entries.len()
14663 );
14664 let ids: std::collections::HashSet<_> = result
14666 .counterfactual_pairs
14667 .iter()
14668 .map(|p| p.pair_id.clone())
14669 .collect();
14670 assert_eq!(ids.len(), result.counterfactual_pairs.len());
14671 }
14672 }
14673
14674 #[test]
14675 fn test_llm_enrichment_enabled() {
14676 let mut config = create_test_config();
14677 config.llm.enabled = true;
14678 config.llm.max_vendor_enrichments = 3;
14679
14680 let phase_config = PhaseConfig {
14681 generate_master_data: true,
14682 generate_document_flows: false,
14683 generate_journal_entries: false,
14684 inject_anomalies: false,
14685 show_progress: false,
14686 vendors_per_company: 5,
14687 customers_per_company: 3,
14688 materials_per_company: 3,
14689 assets_per_company: 3,
14690 employees_per_company: 3,
14691 ..Default::default()
14692 };
14693
14694 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14695 let result = orchestrator.generate().unwrap();
14696
14697 assert!(result.statistics.llm_vendors_enriched > 0);
14699 assert!(result.statistics.llm_vendors_enriched <= 3);
14700 }
14701
14702 #[test]
14703 fn test_diffusion_enhancement_enabled() {
14704 let mut config = create_test_config();
14705 config.diffusion.enabled = true;
14706 config.diffusion.n_steps = 50;
14707 config.diffusion.sample_size = 20;
14708
14709 let phase_config = PhaseConfig {
14710 generate_master_data: false,
14711 generate_document_flows: false,
14712 generate_journal_entries: true,
14713 inject_anomalies: false,
14714 show_progress: false,
14715 ..Default::default()
14716 };
14717
14718 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14719 let result = orchestrator.generate().unwrap();
14720
14721 assert_eq!(result.statistics.diffusion_samples_generated, 20);
14723 }
14724
14725 #[test]
14726 fn test_causal_overlay_enabled() {
14727 let mut config = create_test_config();
14728 config.causal.enabled = true;
14729 config.causal.template = "fraud_detection".to_string();
14730 config.causal.sample_size = 100;
14731 config.causal.validate = true;
14732
14733 let phase_config = PhaseConfig {
14734 generate_master_data: false,
14735 generate_document_flows: false,
14736 generate_journal_entries: true,
14737 inject_anomalies: false,
14738 show_progress: false,
14739 ..Default::default()
14740 };
14741
14742 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14743 let result = orchestrator.generate().unwrap();
14744
14745 assert_eq!(result.statistics.causal_samples_generated, 100);
14747 assert!(result.statistics.causal_validation_passed.is_some());
14749 }
14750
14751 #[test]
14752 fn test_causal_overlay_revenue_cycle_template() {
14753 let mut config = create_test_config();
14754 config.causal.enabled = true;
14755 config.causal.template = "revenue_cycle".to_string();
14756 config.causal.sample_size = 50;
14757 config.causal.validate = false;
14758
14759 let phase_config = PhaseConfig {
14760 generate_master_data: false,
14761 generate_document_flows: false,
14762 generate_journal_entries: true,
14763 inject_anomalies: false,
14764 show_progress: false,
14765 ..Default::default()
14766 };
14767
14768 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14769 let result = orchestrator.generate().unwrap();
14770
14771 assert_eq!(result.statistics.causal_samples_generated, 50);
14773 assert!(result.statistics.causal_validation_passed.is_none());
14775 }
14776
14777 #[test]
14778 fn test_all_new_phases_enabled_together() {
14779 let mut config = create_test_config();
14780 config.llm.enabled = true;
14781 config.llm.max_vendor_enrichments = 2;
14782 config.diffusion.enabled = true;
14783 config.diffusion.n_steps = 20;
14784 config.diffusion.sample_size = 10;
14785 config.causal.enabled = true;
14786 config.causal.sample_size = 50;
14787 config.causal.validate = true;
14788
14789 let phase_config = PhaseConfig {
14790 generate_master_data: true,
14791 generate_document_flows: false,
14792 generate_journal_entries: true,
14793 inject_anomalies: false,
14794 show_progress: false,
14795 vendors_per_company: 5,
14796 customers_per_company: 3,
14797 materials_per_company: 3,
14798 assets_per_company: 3,
14799 employees_per_company: 3,
14800 ..Default::default()
14801 };
14802
14803 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14804 let result = orchestrator.generate().unwrap();
14805
14806 assert!(result.statistics.llm_vendors_enriched > 0);
14808 assert_eq!(result.statistics.diffusion_samples_generated, 10);
14809 assert_eq!(result.statistics.causal_samples_generated, 50);
14810 assert!(result.statistics.causal_validation_passed.is_some());
14811 }
14812
14813 #[test]
14814 fn test_statistics_serialization_with_new_fields() {
14815 let stats = EnhancedGenerationStatistics {
14816 total_entries: 100,
14817 total_line_items: 500,
14818 llm_enrichment_ms: 42,
14819 llm_vendors_enriched: 10,
14820 diffusion_enhancement_ms: 100,
14821 diffusion_samples_generated: 50,
14822 causal_generation_ms: 200,
14823 causal_samples_generated: 100,
14824 causal_validation_passed: Some(true),
14825 ..Default::default()
14826 };
14827
14828 let json = serde_json::to_string(&stats).unwrap();
14829 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14830
14831 assert_eq!(deserialized.llm_enrichment_ms, 42);
14832 assert_eq!(deserialized.llm_vendors_enriched, 10);
14833 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14834 assert_eq!(deserialized.diffusion_samples_generated, 50);
14835 assert_eq!(deserialized.causal_generation_ms, 200);
14836 assert_eq!(deserialized.causal_samples_generated, 100);
14837 assert_eq!(deserialized.causal_validation_passed, Some(true));
14838 }
14839
14840 #[test]
14841 fn test_statistics_backward_compat_deserialization() {
14842 let old_json = r#"{
14844 "total_entries": 100,
14845 "total_line_items": 500,
14846 "accounts_count": 50,
14847 "companies_count": 1,
14848 "period_months": 12,
14849 "vendor_count": 10,
14850 "customer_count": 20,
14851 "material_count": 15,
14852 "asset_count": 5,
14853 "employee_count": 8,
14854 "p2p_chain_count": 5,
14855 "o2c_chain_count": 5,
14856 "ap_invoice_count": 5,
14857 "ar_invoice_count": 5,
14858 "ocpm_event_count": 0,
14859 "ocpm_object_count": 0,
14860 "ocpm_case_count": 0,
14861 "audit_engagement_count": 0,
14862 "audit_workpaper_count": 0,
14863 "audit_evidence_count": 0,
14864 "audit_risk_count": 0,
14865 "audit_finding_count": 0,
14866 "audit_judgment_count": 0,
14867 "anomalies_injected": 0,
14868 "data_quality_issues": 0,
14869 "banking_customer_count": 0,
14870 "banking_account_count": 0,
14871 "banking_transaction_count": 0,
14872 "banking_suspicious_count": 0,
14873 "graph_export_count": 0,
14874 "graph_node_count": 0,
14875 "graph_edge_count": 0
14876 }"#;
14877
14878 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14879
14880 assert_eq!(stats.llm_enrichment_ms, 0);
14882 assert_eq!(stats.llm_vendors_enriched, 0);
14883 assert_eq!(stats.diffusion_enhancement_ms, 0);
14884 assert_eq!(stats.diffusion_samples_generated, 0);
14885 assert_eq!(stats.causal_generation_ms, 0);
14886 assert_eq!(stats.causal_samples_generated, 0);
14887 assert!(stats.causal_validation_passed.is_none());
14888 }
14889}