1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186 let payment_behavior = &schema_config.payment_behavior;
187 let late_dist = &payment_behavior.late_payment_days_distribution;
188
189 P2PGeneratorConfig {
190 three_way_match_rate: schema_config.three_way_match_rate,
191 partial_delivery_rate: schema_config.partial_delivery_rate,
192 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193 price_variance_rate: schema_config.price_variance_rate,
194 max_price_variance_percent: schema_config.max_price_variance_percent,
195 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198 payment_method_distribution: vec![
199 (PaymentMethod::BankTransfer, 0.60),
200 (PaymentMethod::Check, 0.25),
201 (PaymentMethod::Wire, 0.10),
202 (PaymentMethod::CreditCard, 0.05),
203 ],
204 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205 payment_behavior: P2PPaymentBehavior {
206 late_payment_rate: payment_behavior.late_payment_rate,
207 late_payment_distribution: LatePaymentDistribution {
208 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209 late_8_to_14: late_dist.late_8_to_14,
210 very_late_15_to_30: late_dist.very_late_15_to_30,
211 severely_late_31_to_60: late_dist.severely_late_31_to_60,
212 extremely_late_over_60: late_dist.extremely_late_over_60,
213 },
214 partial_payment_rate: payment_behavior.partial_payment_rate,
215 payment_correction_rate: payment_behavior.payment_correction_rate,
216 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217 },
218 }
219}
220
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223 let payment_behavior = &schema_config.payment_behavior;
224
225 O2CGeneratorConfig {
226 credit_check_failure_rate: schema_config.credit_check_failure_rate,
227 partial_shipment_rate: schema_config.partial_shipment_rate,
228 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232 bad_debt_rate: schema_config.bad_debt_rate,
233 returns_rate: schema_config.return_rate,
234 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235 payment_method_distribution: vec![
236 (PaymentMethod::BankTransfer, 0.50),
237 (PaymentMethod::Check, 0.30),
238 (PaymentMethod::Wire, 0.15),
239 (PaymentMethod::CreditCard, 0.05),
240 ],
241 payment_behavior: O2CPaymentBehavior {
242 partial_payment_rate: payment_behavior.partial_payments.rate,
243 short_payment_rate: payment_behavior.short_payments.rate,
244 max_short_percent: payment_behavior.short_payments.max_short_percent,
245 on_account_rate: payment_behavior.on_account_payments.rate,
246 payment_correction_rate: payment_behavior.payment_corrections.rate,
247 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248 },
249 }
250}
251
252#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255 pub generate_master_data: bool,
257 pub generate_document_flows: bool,
259 pub generate_ocpm_events: bool,
261 pub generate_journal_entries: bool,
263 pub inject_anomalies: bool,
265 pub inject_data_quality: bool,
267 pub validate_balances: bool,
269 pub show_progress: bool,
271 pub vendors_per_company: usize,
273 pub customers_per_company: usize,
275 pub materials_per_company: usize,
277 pub assets_per_company: usize,
279 pub employees_per_company: usize,
281 pub p2p_chains: usize,
283 pub o2c_chains: usize,
285 pub generate_audit: bool,
287 pub audit_engagements: usize,
289 pub workpapers_per_engagement: usize,
291 pub evidence_per_workpaper: usize,
293 pub risks_per_engagement: usize,
295 pub findings_per_engagement: usize,
297 pub judgments_per_engagement: usize,
299 pub generate_banking: bool,
301 pub generate_graph_export: bool,
303 pub generate_sourcing: bool,
305 pub generate_bank_reconciliation: bool,
307 pub generate_financial_statements: bool,
309 pub generate_accounting_standards: bool,
311 pub generate_manufacturing: bool,
313 pub generate_sales_kpi_budgets: bool,
315 pub generate_tax: bool,
317 pub generate_esg: bool,
319 pub generate_intercompany: bool,
321 pub generate_evolution_events: bool,
323 pub generate_counterfactuals: bool,
325 pub generate_compliance_regulations: bool,
327 pub generate_period_close: bool,
329 pub generate_hr: bool,
331 pub generate_treasury: bool,
333 pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338 fn default() -> Self {
339 Self {
340 generate_master_data: true,
341 generate_document_flows: true,
342 generate_ocpm_events: false, generate_journal_entries: true,
344 inject_anomalies: false,
345 inject_data_quality: false, validate_balances: true,
347 show_progress: true,
348 vendors_per_company: 50,
349 customers_per_company: 100,
350 materials_per_company: 200,
351 assets_per_company: 50,
352 employees_per_company: 100,
353 p2p_chains: 100,
354 o2c_chains: 100,
355 generate_audit: false, audit_engagements: 5,
357 workpapers_per_engagement: 20,
358 evidence_per_workpaper: 5,
359 risks_per_engagement: 15,
360 findings_per_engagement: 8,
361 judgments_per_engagement: 10,
362 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, }
381 }
382}
383
384impl PhaseConfig {
385 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390 Self {
391 generate_master_data: true,
393 generate_document_flows: true,
394 generate_journal_entries: true,
395 validate_balances: true,
396 generate_period_close: true,
397 generate_evolution_events: true,
398 show_progress: true,
399
400 generate_audit: cfg.audit.enabled,
402 generate_banking: cfg.banking.enabled,
403 generate_graph_export: cfg.graph_export.enabled,
404 generate_sourcing: cfg.source_to_pay.enabled,
405 generate_intercompany: cfg.intercompany.enabled,
406 generate_financial_statements: cfg.financial_reporting.enabled,
407 generate_bank_reconciliation: cfg.financial_reporting.enabled,
408 generate_accounting_standards: cfg.accounting_standards.enabled,
409 generate_manufacturing: cfg.manufacturing.enabled,
410 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411 generate_tax: cfg.tax.enabled,
412 generate_esg: cfg.esg.enabled,
413 generate_ocpm_events: cfg.ocpm.enabled,
414 generate_compliance_regulations: cfg.compliance_regulations.enabled,
415 generate_hr: cfg.hr.enabled,
416 generate_treasury: cfg.treasury.enabled,
417 generate_project_accounting: cfg.project_accounting.enabled,
418
419 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423 inject_data_quality: cfg.data_quality.enabled,
424
425 vendors_per_company: 50,
427 customers_per_company: 100,
428 materials_per_company: 200,
429 assets_per_company: 50,
430 employees_per_company: 100,
431 p2p_chains: 100,
432 o2c_chains: 100,
433 audit_engagements: 5,
434 workpapers_per_engagement: 20,
435 evidence_per_workpaper: 5,
436 risks_per_engagement: 15,
437 findings_per_engagement: 8,
438 judgments_per_engagement: 10,
439 }
440 }
441}
442
443#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446 pub vendors: Vec<Vendor>,
448 pub customers: Vec<Customer>,
450 pub materials: Vec<Material>,
452 pub assets: Vec<FixedAsset>,
454 pub employees: Vec<Employee>,
456 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465 pub node_count: usize,
467 pub edge_count: usize,
469 pub hyperedge_count: usize,
471 pub output_path: PathBuf,
473}
474
475#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478 pub p2p_chains: Vec<P2PDocumentChain>,
480 pub o2c_chains: Vec<O2CDocumentChain>,
482 pub purchase_orders: Vec<documents::PurchaseOrder>,
484 pub goods_receipts: Vec<documents::GoodsReceipt>,
486 pub vendor_invoices: Vec<documents::VendorInvoice>,
488 pub sales_orders: Vec<documents::SalesOrder>,
490 pub deliveries: Vec<documents::Delivery>,
492 pub customer_invoices: Vec<documents::CustomerInvoice>,
494 pub payments: Vec<documents::Payment>,
496 pub document_references: Vec<documents::DocumentReference>,
499}
500
501#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504 pub ap_invoices: Vec<APInvoice>,
506 pub ar_invoices: Vec<ARInvoice>,
508 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514 pub ar_aging_reports: Vec<ARAgingReport>,
516 pub ap_aging_reports: Vec<APAgingReport>,
518 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531 pub event_log: Option<OcpmEventLog>,
533 pub event_count: usize,
535 pub object_count: usize,
537 pub case_count: usize,
539}
540
541#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544 pub engagements: Vec<AuditEngagement>,
546 pub workpapers: Vec<Workpaper>,
548 pub evidence: Vec<AuditEvidence>,
550 pub risk_assessments: Vec<RiskAssessment>,
552 pub findings: Vec<AuditFinding>,
554 pub judgments: Vec<ProfessionalJudgment>,
556 pub confirmations: Vec<ExternalConfirmation>,
558 pub confirmation_responses: Vec<ConfirmationResponse>,
560 pub procedure_steps: Vec<AuditProcedureStep>,
562 pub samples: Vec<AuditSample>,
564 pub analytical_results: Vec<AnalyticalProcedureResult>,
566 pub ia_functions: Vec<InternalAuditFunction>,
568 pub ia_reports: Vec<InternalAuditReport>,
570 pub related_parties: Vec<RelatedParty>,
572 pub related_party_transactions: Vec<RelatedPartyTransaction>,
574 pub component_auditors: Vec<ComponentAuditor>,
577 pub group_audit_plan: Option<GroupAuditPlan>,
579 pub component_instructions: Vec<ComponentInstruction>,
581 pub component_reports: Vec<ComponentAuditorReport>,
583 pub engagement_letters: Vec<EngagementLetter>,
586 pub subsequent_events: Vec<SubsequentEvent>,
589 pub service_organizations: Vec<ServiceOrganization>,
592 pub soc_reports: Vec<SocReport>,
594 pub user_entity_controls: Vec<UserEntityControl>,
596 pub going_concern_assessments:
599 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600 pub accounting_estimates:
603 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614 pub materiality_calculations:
617 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618 pub combined_risk_assessments:
621 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627 pub significant_transaction_classes:
630 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634 pub analytical_relationships:
637 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657 pub customers: Vec<BankingCustomer>,
659 pub accounts: Vec<BankAccount>,
661 pub transactions: Vec<BankTransaction>,
663 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673 pub suspicious_count: usize,
675 pub scenario_count: usize,
677}
678
679#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682 pub exported: bool,
684 pub graph_count: usize,
686 pub exports: HashMap<String, GraphExportInfo>,
688}
689
690#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693 pub name: String,
695 pub format: String,
697 pub output_path: PathBuf,
699 pub node_count: usize,
701 pub edge_count: usize,
703}
704
705#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708 pub spend_analyses: Vec<SpendAnalysis>,
710 pub sourcing_projects: Vec<SourcingProject>,
712 pub qualifications: Vec<SupplierQualification>,
714 pub rfx_events: Vec<RfxEvent>,
716 pub bids: Vec<SupplierBid>,
718 pub bid_evaluations: Vec<BidEvaluation>,
720 pub contracts: Vec<ProcurementContract>,
722 pub catalog_items: Vec<CatalogItem>,
724 pub scorecards: Vec<SupplierScorecard>,
726}
727
728#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731 pub fiscal_year: u16,
733 pub fiscal_period: u8,
735 pub period_start: NaiveDate,
737 pub period_end: NaiveDate,
739 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746 pub financial_statements: Vec<FinancialStatement>,
749 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752 pub consolidated_statements: Vec<FinancialStatement>,
754 pub consolidation_schedules: Vec<ConsolidationSchedule>,
756 pub bank_reconciliations: Vec<BankReconciliation>,
758 pub trial_balances: Vec<PeriodTrialBalance>,
760 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771 pub payroll_runs: Vec<PayrollRun>,
773 pub payroll_line_items: Vec<PayrollLineItem>,
775 pub time_entries: Vec<TimeEntry>,
777 pub expense_reports: Vec<ExpenseReport>,
779 pub benefit_enrollments: Vec<BenefitEnrollment>,
781 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789 pub pension_journal_entries: Vec<JournalEntry>,
791 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795 pub stock_comp_journal_entries: Vec<JournalEntry>,
797 pub payroll_run_count: usize,
799 pub payroll_line_item_count: usize,
801 pub time_entry_count: usize,
803 pub expense_report_count: usize,
805 pub benefit_enrollment_count: usize,
807 pub pension_plan_count: usize,
809 pub stock_grant_count: usize,
811}
812
813#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820 pub business_combinations:
822 Vec<datasynth_core::models::business_combination::BusinessCombination>,
823 pub business_combination_journal_entries: Vec<JournalEntry>,
825 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827 pub ecl_provision_movements:
829 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830 pub ecl_journal_entries: Vec<JournalEntry>,
832 pub provisions: Vec<datasynth_core::models::provision::Provision>,
834 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838 pub provision_journal_entries: Vec<JournalEntry>,
840 pub currency_translation_results:
842 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843 pub revenue_contract_count: usize,
845 pub impairment_test_count: usize,
847 pub business_combination_count: usize,
849 pub ecl_model_count: usize,
851 pub provision_count: usize,
853 pub currency_translation_count: usize,
855}
856
857#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872 pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879 pub production_orders: Vec<ProductionOrder>,
881 pub quality_inspections: Vec<QualityInspection>,
883 pub cycle_counts: Vec<CycleCount>,
885 pub bom_components: Vec<BomComponent>,
887 pub inventory_movements: Vec<InventoryMovement>,
889 pub production_order_count: usize,
891 pub quality_inspection_count: usize,
893 pub cycle_count_count: usize,
895 pub bom_component_count: usize,
897 pub inventory_movement_count: usize,
899}
900
901#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904 pub sales_quotes: Vec<SalesQuote>,
906 pub kpis: Vec<ManagementKpi>,
908 pub budgets: Vec<Budget>,
910 pub sales_quote_count: usize,
912 pub kpi_count: usize,
914 pub budget_line_count: usize,
916}
917
918#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921 pub labels: Vec<LabeledAnomaly>,
923 pub summary: Option<AnomalySummary>,
925 pub by_type: HashMap<String, usize>,
927}
928
929#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932 pub validated: bool,
934 pub is_balanced: bool,
936 pub entries_processed: u64,
938 pub total_debits: rust_decimal::Decimal,
940 pub total_credits: rust_decimal::Decimal,
942 pub accounts_tracked: usize,
944 pub companies_tracked: usize,
946 pub validation_errors: Vec<ValidationError>,
948 pub has_unbalanced_entries: bool,
950}
951
952#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955 pub jurisdictions: Vec<TaxJurisdiction>,
957 pub codes: Vec<TaxCode>,
959 pub tax_lines: Vec<TaxLine>,
961 pub tax_returns: Vec<TaxReturn>,
963 pub tax_provisions: Vec<TaxProvision>,
965 pub withholding_records: Vec<WithholdingTaxRecord>,
967 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969 pub jurisdiction_count: usize,
971 pub code_count: usize,
973 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975 pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986 pub seller_journal_entries: Vec<JournalEntry>,
988 pub buyer_journal_entries: Vec<JournalEntry>,
990 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994 #[serde(skip)]
996 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997 pub matched_pair_count: usize,
999 pub elimination_entry_count: usize,
1001 pub match_rate: f64,
1003}
1004
1005#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008 pub emissions: Vec<EmissionRecord>,
1010 pub energy: Vec<EnergyConsumption>,
1012 pub water: Vec<WaterUsage>,
1014 pub waste: Vec<WasteRecord>,
1016 pub diversity: Vec<WorkforceDiversityMetric>,
1018 pub pay_equity: Vec<PayEquityMetric>,
1020 pub safety_incidents: Vec<SafetyIncident>,
1022 pub safety_metrics: Vec<SafetyMetric>,
1024 pub governance: Vec<GovernanceMetric>,
1026 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028 pub materiality: Vec<MaterialityAssessment>,
1030 pub disclosures: Vec<EsgDisclosure>,
1032 pub climate_scenarios: Vec<ClimateScenario>,
1034 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036 pub emission_count: usize,
1038 pub disclosure_count: usize,
1040}
1041
1042#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045 pub cash_positions: Vec<CashPosition>,
1047 pub cash_forecasts: Vec<CashForecast>,
1049 pub cash_pools: Vec<CashPool>,
1051 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053 pub hedging_instruments: Vec<HedgingInstrument>,
1055 pub hedge_relationships: Vec<HedgeRelationship>,
1057 pub debt_instruments: Vec<DebtInstrument>,
1059 pub bank_guarantees: Vec<BankGuarantee>,
1061 pub netting_runs: Vec<NettingRun>,
1063 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065 pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073 pub projects: Vec<Project>,
1075 pub cost_lines: Vec<ProjectCostLine>,
1077 pub revenue_records: Vec<ProjectRevenue>,
1079 pub earned_value_metrics: Vec<EarnedValueMetric>,
1081 pub change_orders: Vec<ChangeOrder>,
1083 pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090 pub chart_of_accounts: ChartOfAccounts,
1092 pub master_data: MasterDataSnapshot,
1094 pub document_flows: DocumentFlowSnapshot,
1096 pub subledger: SubledgerSnapshot,
1098 pub ocpm: OcpmSnapshot,
1100 pub audit: AuditSnapshot,
1102 pub banking: BankingSnapshot,
1104 pub graph_export: GraphExportSnapshot,
1106 pub sourcing: SourcingSnapshot,
1108 pub financial_reporting: FinancialReportingSnapshot,
1110 pub hr: HrSnapshot,
1112 pub accounting_standards: AccountingStandardsSnapshot,
1114 pub manufacturing: ManufacturingSnapshot,
1116 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118 pub tax: TaxSnapshot,
1120 pub esg: EsgSnapshot,
1122 pub treasury: TreasurySnapshot,
1124 pub project_accounting: ProjectAccountingSnapshot,
1126 pub process_evolution: Vec<ProcessEvolutionEvent>,
1128 pub organizational_events: Vec<OrganizationalEvent>,
1130 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132 pub intercompany: IntercompanySnapshot,
1134 pub journal_entries: Vec<JournalEntry>,
1136 pub anomaly_labels: AnomalyLabels,
1138 pub balance_validation: BalanceValidationResult,
1140 pub data_quality_stats: DataQualityStats,
1142 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144 pub statistics: EnhancedGenerationStatistics,
1146 pub lineage: Option<super::lineage::LineageGraph>,
1148 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150 pub internal_controls: Vec<InternalControl>,
1152 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156 pub opening_balances: Vec<GeneratedOpeningBalance>,
1158 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166 pub temporal_vendor_chains:
1168 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175 pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182 pub total_entries: u64,
1184 pub total_line_items: u64,
1186 pub accounts_count: usize,
1188 pub companies_count: usize,
1190 pub period_months: u32,
1192 pub vendor_count: usize,
1194 pub customer_count: usize,
1195 pub material_count: usize,
1196 pub asset_count: usize,
1197 pub employee_count: usize,
1198 pub p2p_chain_count: usize,
1200 pub o2c_chain_count: usize,
1201 pub ap_invoice_count: usize,
1203 pub ar_invoice_count: usize,
1204 pub ocpm_event_count: usize,
1206 pub ocpm_object_count: usize,
1207 pub ocpm_case_count: usize,
1208 pub audit_engagement_count: usize,
1210 pub audit_workpaper_count: usize,
1211 pub audit_evidence_count: usize,
1212 pub audit_risk_count: usize,
1213 pub audit_finding_count: usize,
1214 pub audit_judgment_count: usize,
1215 #[serde(default)]
1217 pub audit_confirmation_count: usize,
1218 #[serde(default)]
1219 pub audit_confirmation_response_count: usize,
1220 #[serde(default)]
1222 pub audit_procedure_step_count: usize,
1223 #[serde(default)]
1224 pub audit_sample_count: usize,
1225 #[serde(default)]
1227 pub audit_analytical_result_count: usize,
1228 #[serde(default)]
1230 pub audit_ia_function_count: usize,
1231 #[serde(default)]
1232 pub audit_ia_report_count: usize,
1233 #[serde(default)]
1235 pub audit_related_party_count: usize,
1236 #[serde(default)]
1237 pub audit_related_party_transaction_count: usize,
1238 pub anomalies_injected: usize,
1240 pub data_quality_issues: usize,
1242 pub banking_customer_count: usize,
1244 pub banking_account_count: usize,
1245 pub banking_transaction_count: usize,
1246 pub banking_suspicious_count: usize,
1247 pub graph_export_count: usize,
1249 pub graph_node_count: usize,
1250 pub graph_edge_count: usize,
1251 #[serde(default)]
1253 pub llm_enrichment_ms: u64,
1254 #[serde(default)]
1256 pub llm_vendors_enriched: usize,
1257 #[serde(default)]
1259 pub diffusion_enhancement_ms: u64,
1260 #[serde(default)]
1262 pub diffusion_samples_generated: usize,
1263 #[serde(default)]
1265 pub causal_generation_ms: u64,
1266 #[serde(default)]
1268 pub causal_samples_generated: usize,
1269 #[serde(default)]
1271 pub causal_validation_passed: Option<bool>,
1272 #[serde(default)]
1274 pub sourcing_project_count: usize,
1275 #[serde(default)]
1276 pub rfx_event_count: usize,
1277 #[serde(default)]
1278 pub bid_count: usize,
1279 #[serde(default)]
1280 pub contract_count: usize,
1281 #[serde(default)]
1282 pub catalog_item_count: usize,
1283 #[serde(default)]
1284 pub scorecard_count: usize,
1285 #[serde(default)]
1287 pub financial_statement_count: usize,
1288 #[serde(default)]
1289 pub bank_reconciliation_count: usize,
1290 #[serde(default)]
1292 pub payroll_run_count: usize,
1293 #[serde(default)]
1294 pub time_entry_count: usize,
1295 #[serde(default)]
1296 pub expense_report_count: usize,
1297 #[serde(default)]
1298 pub benefit_enrollment_count: usize,
1299 #[serde(default)]
1300 pub pension_plan_count: usize,
1301 #[serde(default)]
1302 pub stock_grant_count: usize,
1303 #[serde(default)]
1305 pub revenue_contract_count: usize,
1306 #[serde(default)]
1307 pub impairment_test_count: usize,
1308 #[serde(default)]
1309 pub business_combination_count: usize,
1310 #[serde(default)]
1311 pub ecl_model_count: usize,
1312 #[serde(default)]
1313 pub provision_count: usize,
1314 #[serde(default)]
1316 pub production_order_count: usize,
1317 #[serde(default)]
1318 pub quality_inspection_count: usize,
1319 #[serde(default)]
1320 pub cycle_count_count: usize,
1321 #[serde(default)]
1322 pub bom_component_count: usize,
1323 #[serde(default)]
1324 pub inventory_movement_count: usize,
1325 #[serde(default)]
1327 pub sales_quote_count: usize,
1328 #[serde(default)]
1329 pub kpi_count: usize,
1330 #[serde(default)]
1331 pub budget_line_count: usize,
1332 #[serde(default)]
1334 pub tax_jurisdiction_count: usize,
1335 #[serde(default)]
1336 pub tax_code_count: usize,
1337 #[serde(default)]
1339 pub esg_emission_count: usize,
1340 #[serde(default)]
1341 pub esg_disclosure_count: usize,
1342 #[serde(default)]
1344 pub ic_matched_pair_count: usize,
1345 #[serde(default)]
1346 pub ic_elimination_count: usize,
1347 #[serde(default)]
1349 pub ic_transaction_count: usize,
1350 #[serde(default)]
1352 pub fa_subledger_count: usize,
1353 #[serde(default)]
1355 pub inventory_subledger_count: usize,
1356 #[serde(default)]
1358 pub treasury_debt_instrument_count: usize,
1359 #[serde(default)]
1361 pub treasury_hedging_instrument_count: usize,
1362 #[serde(default)]
1364 pub project_count: usize,
1365 #[serde(default)]
1367 pub project_change_order_count: usize,
1368 #[serde(default)]
1370 pub tax_provision_count: usize,
1371 #[serde(default)]
1373 pub opening_balance_count: usize,
1374 #[serde(default)]
1376 pub subledger_reconciliation_count: usize,
1377 #[serde(default)]
1379 pub tax_line_count: usize,
1380 #[serde(default)]
1382 pub project_cost_line_count: usize,
1383 #[serde(default)]
1385 pub cash_position_count: usize,
1386 #[serde(default)]
1388 pub cash_forecast_count: usize,
1389 #[serde(default)]
1391 pub cash_pool_count: usize,
1392 #[serde(default)]
1394 pub process_evolution_event_count: usize,
1395 #[serde(default)]
1397 pub organizational_event_count: usize,
1398 #[serde(default)]
1400 pub counterfactual_pair_count: usize,
1401 #[serde(default)]
1403 pub red_flag_count: usize,
1404 #[serde(default)]
1406 pub collusion_ring_count: usize,
1407 #[serde(default)]
1409 pub temporal_version_chain_count: usize,
1410 #[serde(default)]
1412 pub entity_relationship_node_count: usize,
1413 #[serde(default)]
1415 pub entity_relationship_edge_count: usize,
1416 #[serde(default)]
1418 pub cross_process_link_count: usize,
1419 #[serde(default)]
1421 pub disruption_event_count: usize,
1422 #[serde(default)]
1424 pub industry_gl_account_count: usize,
1425 #[serde(default)]
1427 pub period_close_je_count: usize,
1428}
1429
1430pub struct EnhancedOrchestrator {
1432 config: GeneratorConfig,
1433 phase_config: PhaseConfig,
1434 coa: Option<Arc<ChartOfAccounts>>,
1435 master_data: MasterDataSnapshot,
1436 seed: u64,
1437 multi_progress: Option<MultiProgress>,
1438 resource_guard: ResourceGuard,
1440 output_path: Option<PathBuf>,
1442 copula_generators: Vec<CopulaGeneratorSpec>,
1444 country_pack_registry: datasynth_core::CountryPackRegistry,
1446 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453 datasynth_config::validate_config(&config)?;
1454
1455 let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457 let resource_guard = Self::build_resource_guard(&config, None);
1459
1460 let country_pack_registry = match &config.country_packs {
1462 Some(cp) => {
1463 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464 .map_err(|e| SynthError::config(e.to_string()))?
1465 }
1466 None => datasynth_core::CountryPackRegistry::builtin_only()
1467 .map_err(|e| SynthError::config(e.to_string()))?,
1468 };
1469
1470 Ok(Self {
1471 config,
1472 phase_config,
1473 coa: None,
1474 master_data: MasterDataSnapshot::default(),
1475 seed,
1476 multi_progress: None,
1477 resource_guard,
1478 output_path: None,
1479 copula_generators: Vec::new(),
1480 country_pack_registry,
1481 phase_sink: None,
1482 })
1483 }
1484
1485 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487 Self::new(config, PhaseConfig::default())
1488 }
1489
1490 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492 self.phase_sink = Some(sink);
1493 self
1494 }
1495
1496 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498 self.phase_sink = Some(sink);
1499 }
1500
1501 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503 if let Some(ref sink) = self.phase_sink {
1504 for item in items {
1505 if let Ok(value) = serde_json::to_value(item) {
1506 if let Err(e) = sink.emit(phase, type_name, &value) {
1507 warn!(
1508 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509 );
1510 }
1511 }
1512 }
1513 if let Err(e) = sink.phase_complete(phase) {
1514 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515 }
1516 }
1517 }
1518
1519 pub fn with_progress(mut self, show: bool) -> Self {
1521 self.phase_config.show_progress = show;
1522 if show {
1523 self.multi_progress = Some(MultiProgress::new());
1524 }
1525 self
1526 }
1527
1528 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530 let path = path.into();
1531 self.output_path = Some(path.clone());
1532 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534 self
1535 }
1536
1537 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539 &self.country_pack_registry
1540 }
1541
1542 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544 self.country_pack_registry.get_by_str(country)
1545 }
1546
1547 fn primary_country_code(&self) -> &str {
1550 self.config
1551 .companies
1552 .first()
1553 .map(|c| c.country.as_str())
1554 .unwrap_or("US")
1555 }
1556
1557 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559 self.country_pack_for(self.primary_country_code())
1560 }
1561
1562 fn resolve_coa_framework(&self) -> CoAFramework {
1564 if self.config.accounting_standards.enabled {
1565 match self.config.accounting_standards.framework {
1566 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567 return CoAFramework::FrenchPcg;
1568 }
1569 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570 return CoAFramework::GermanSkr04;
1571 }
1572 _ => {}
1573 }
1574 }
1575 let pack = self.primary_pack();
1577 match pack.accounting.framework.as_str() {
1578 "french_gaap" => CoAFramework::FrenchPcg,
1579 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580 _ => CoAFramework::UsGaap,
1581 }
1582 }
1583
1584 pub fn has_copulas(&self) -> bool {
1589 !self.copula_generators.is_empty()
1590 }
1591
1592 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598 &self.copula_generators
1599 }
1600
1601 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605 &mut self.copula_generators
1606 }
1607
1608 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612 self.copula_generators
1613 .iter_mut()
1614 .find(|c| c.name == copula_name)
1615 .map(|c| c.generator.sample())
1616 }
1617
1618 pub fn from_fingerprint(
1641 fingerprint_path: &std::path::Path,
1642 phase_config: PhaseConfig,
1643 scale: f64,
1644 ) -> SynthResult<Self> {
1645 info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647 let reader = FingerprintReader::new();
1649 let fingerprint = reader
1650 .read_from_file(fingerprint_path)
1651 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654 }
1655
1656 pub fn from_fingerprint_data(
1663 fingerprint: Fingerprint,
1664 phase_config: PhaseConfig,
1665 scale: f64,
1666 ) -> SynthResult<Self> {
1667 info!(
1668 "Synthesizing config from fingerprint (version: {}, tables: {})",
1669 fingerprint.manifest.version,
1670 fingerprint.schema.tables.len()
1671 );
1672
1673 let seed: u64 = rand::random();
1675 info!("Fingerprint synthesis seed: {}", seed);
1676
1677 let options = SynthesisOptions {
1679 scale,
1680 seed: Some(seed),
1681 preserve_correlations: true,
1682 inject_anomalies: true,
1683 };
1684 let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686 let synthesis_result = synthesizer
1688 .synthesize_full(&fingerprint, seed)
1689 .map_err(|e| {
1690 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691 })?;
1692
1693 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695 Self::base_config_for_industry(industry)
1696 } else {
1697 Self::base_config_for_industry("manufacturing")
1698 };
1699
1700 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703 info!(
1705 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706 fingerprint.schema.tables.len(),
1707 scale,
1708 synthesis_result.copula_generators.len()
1709 );
1710
1711 if !synthesis_result.copula_generators.is_empty() {
1712 for spec in &synthesis_result.copula_generators {
1713 info!(
1714 " Copula '{}' for table '{}': {} columns",
1715 spec.name,
1716 spec.table,
1717 spec.columns.len()
1718 );
1719 }
1720 }
1721
1722 let mut orchestrator = Self::new(config, phase_config)?;
1724
1725 orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728 Ok(orchestrator)
1729 }
1730
1731 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733 use datasynth_config::presets::create_preset;
1734 use datasynth_config::TransactionVolume;
1735 use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737 let sector = match industry.to_lowercase().as_str() {
1738 "manufacturing" => IndustrySector::Manufacturing,
1739 "retail" => IndustrySector::Retail,
1740 "financial" | "financial_services" => IndustrySector::FinancialServices,
1741 "healthcare" => IndustrySector::Healthcare,
1742 "technology" | "tech" => IndustrySector::Technology,
1743 _ => IndustrySector::Manufacturing,
1744 };
1745
1746 create_preset(
1748 sector,
1749 1, 12, CoAComplexity::Medium,
1752 TransactionVolume::TenK,
1753 )
1754 }
1755
1756 fn apply_config_patch(
1758 mut config: GeneratorConfig,
1759 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760 ) -> GeneratorConfig {
1761 use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763 for (key, value) in patch.values() {
1764 match (key.as_str(), value) {
1765 ("transactions.count", ConfigValue::Integer(n)) => {
1768 info!(
1769 "Fingerprint suggests {} transactions (apply via company volumes)",
1770 n
1771 );
1772 }
1773 ("global.period_months", ConfigValue::Integer(n)) => {
1774 config.global.period_months = (*n).clamp(1, 120) as u32;
1775 }
1776 ("global.start_date", ConfigValue::String(s)) => {
1777 config.global.start_date = s.clone();
1778 }
1779 ("global.seed", ConfigValue::Integer(n)) => {
1780 config.global.seed = Some(*n as u64);
1781 }
1782 ("fraud.enabled", ConfigValue::Bool(b)) => {
1783 config.fraud.enabled = *b;
1784 }
1785 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786 config.fraud.fraud_rate = *f;
1787 }
1788 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789 config.data_quality.enabled = *b;
1790 }
1791 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793 config.fraud.enabled = *b;
1794 }
1795 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796 config.fraud.fraud_rate = *f;
1797 }
1798 _ => {
1799 debug!("Ignoring unknown config patch key: {}", key);
1800 }
1801 }
1802 }
1803
1804 config
1805 }
1806
1807 fn build_resource_guard(
1809 config: &GeneratorConfig,
1810 output_path: Option<PathBuf>,
1811 ) -> ResourceGuard {
1812 let mut builder = ResourceGuardBuilder::new();
1813
1814 if config.global.memory_limit_mb > 0 {
1816 builder = builder.memory_limit(config.global.memory_limit_mb);
1817 }
1818
1819 if let Some(path) = output_path {
1821 builder = builder.output_path(path).min_free_disk(100); }
1823
1824 builder = builder.conservative();
1826
1827 builder.build()
1828 }
1829
1830 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835 self.resource_guard.check()
1836 }
1837
1838 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840 let level = self.resource_guard.check()?;
1841
1842 if level != DegradationLevel::Normal {
1843 warn!(
1844 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845 phase,
1846 level,
1847 self.resource_guard.current_memory_mb(),
1848 self.resource_guard.available_disk_mb()
1849 );
1850 }
1851
1852 Ok(level)
1853 }
1854
1855 fn get_degradation_actions(&self) -> DegradationActions {
1857 self.resource_guard.get_actions()
1858 }
1859
1860 fn check_memory_limit(&self) -> SynthResult<()> {
1862 self.check_resources()?;
1863 Ok(())
1864 }
1865
1866 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868 info!("Starting enhanced generation workflow");
1869 info!(
1870 "Config: industry={:?}, period_months={}, companies={}",
1871 self.config.global.industry,
1872 self.config.global.period_months,
1873 self.config.companies.len()
1874 );
1875
1876 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879 datasynth_core::serde_decimal::set_numeric_native(is_native);
1880 struct NumericModeGuard;
1881 impl Drop for NumericModeGuard {
1882 fn drop(&mut self) {
1883 datasynth_core::serde_decimal::set_numeric_native(false);
1884 }
1885 }
1886 let _numeric_guard = if is_native {
1887 Some(NumericModeGuard)
1888 } else {
1889 None
1890 };
1891
1892 let initial_level = self.check_resources_with_log("initial")?;
1894 if initial_level == DegradationLevel::Emergency {
1895 return Err(SynthError::resource(
1896 "Insufficient resources to start generation",
1897 ));
1898 }
1899
1900 let mut stats = EnhancedGenerationStatistics {
1901 companies_count: self.config.companies.len(),
1902 period_months: self.config.global.period_months,
1903 ..Default::default()
1904 };
1905
1906 let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909 self.phase_master_data(&mut stats)?;
1911
1912 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917 let (mut document_flows, mut subledger, fa_journal_entries) =
1919 self.phase_document_flows(&mut stats)?;
1920
1921 self.emit_phase_items(
1923 "document_flows",
1924 "PurchaseOrder",
1925 &document_flows.purchase_orders,
1926 );
1927 self.emit_phase_items(
1928 "document_flows",
1929 "GoodsReceipt",
1930 &document_flows.goods_receipts,
1931 );
1932 self.emit_phase_items(
1933 "document_flows",
1934 "VendorInvoice",
1935 &document_flows.vendor_invoices,
1936 );
1937 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943 let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948 .iter()
1949 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950 .collect();
1951 if !opening_balance_jes.is_empty() {
1952 debug!(
1953 "Prepending {} opening balance JEs to entries",
1954 opening_balance_jes.len()
1955 );
1956 }
1957
1958 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961 if !opening_balance_jes.is_empty() {
1964 let mut combined = opening_balance_jes;
1965 combined.extend(entries);
1966 entries = combined;
1967 }
1968
1969 if !fa_journal_entries.is_empty() {
1971 debug!(
1972 "Appending {} FA acquisition JEs to main entries",
1973 fa_journal_entries.len()
1974 );
1975 entries.extend(fa_journal_entries);
1976 }
1977
1978 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981 let actions = self.get_degradation_actions();
1983
1984 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987 if !sourcing.contracts.is_empty() {
1990 let mut linked_count = 0usize;
1991 let po_vendor_pairs: Vec<(String, String)> = document_flows
1993 .p2p_chains
1994 .iter()
1995 .map(|chain| {
1996 (
1997 chain.purchase_order.vendor_id.clone(),
1998 chain.purchase_order.header.document_id.clone(),
1999 )
2000 })
2001 .collect();
2002
2003 for chain in &mut document_flows.p2p_chains {
2004 if chain.purchase_order.contract_id.is_none() {
2005 if let Some(contract) = sourcing
2006 .contracts
2007 .iter()
2008 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009 {
2010 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011 linked_count += 1;
2012 }
2013 }
2014 }
2015
2016 for contract in &mut sourcing.contracts {
2018 let po_ids: Vec<String> = po_vendor_pairs
2019 .iter()
2020 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021 .map(|(_, po_id)| po_id.clone())
2022 .collect();
2023 if !po_ids.is_empty() {
2024 contract.purchase_order_ids = po_ids;
2025 }
2026 }
2027
2028 if linked_count > 0 {
2029 debug!(
2030 "Linked {} purchase orders to S2C contracts by vendor match",
2031 linked_count
2032 );
2033 }
2034 }
2035
2036 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039 if !intercompany.seller_journal_entries.is_empty()
2041 || !intercompany.buyer_journal_entries.is_empty()
2042 {
2043 let ic_je_count = intercompany.seller_journal_entries.len()
2044 + intercompany.buyer_journal_entries.len();
2045 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047 debug!(
2048 "Appended {} IC journal entries to main entries",
2049 ic_je_count
2050 );
2051 }
2052
2053 if !intercompany.elimination_entries.is_empty() {
2055 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056 &intercompany.elimination_entries,
2057 );
2058 if !elim_jes.is_empty() {
2059 debug!(
2060 "Appended {} elimination journal entries to main entries",
2061 elim_jes.len()
2062 );
2063 let elim_debit: rust_decimal::Decimal =
2065 elim_jes.iter().map(|je| je.total_debit()).sum();
2066 let elim_credit: rust_decimal::Decimal =
2067 elim_jes.iter().map(|je| je.total_credit()).sum();
2068 let elim_diff = (elim_debit - elim_credit).abs();
2069 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2071 return Err(datasynth_core::error::SynthError::generation(format!(
2072 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2073 elim_debit, elim_credit, elim_diff, tolerance
2074 )));
2075 }
2076 debug!(
2077 "IC elimination balance verified: debits={}, credits={} (diff={})",
2078 elim_debit, elim_credit, elim_diff
2079 );
2080 entries.extend(elim_jes);
2081 }
2082 }
2083
2084 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2086 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2087 document_flows
2088 .customer_invoices
2089 .extend(ic_docs.seller_invoices.iter().cloned());
2090 document_flows
2091 .purchase_orders
2092 .extend(ic_docs.buyer_orders.iter().cloned());
2093 document_flows
2094 .goods_receipts
2095 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2096 document_flows
2097 .vendor_invoices
2098 .extend(ic_docs.buyer_invoices.iter().cloned());
2099 debug!(
2100 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2101 ic_docs.seller_invoices.len(),
2102 ic_docs.buyer_orders.len(),
2103 ic_docs.buyer_goods_receipts.len(),
2104 ic_docs.buyer_invoices.len(),
2105 );
2106 }
2107 }
2108
2109 let hr = self.phase_hr_data(&mut stats)?;
2111
2112 if !hr.payroll_runs.is_empty() {
2114 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2115 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2116 entries.extend(payroll_jes);
2117 }
2118
2119 if !hr.pension_journal_entries.is_empty() {
2121 debug!(
2122 "Generated {} JEs from pension plans",
2123 hr.pension_journal_entries.len()
2124 );
2125 entries.extend(hr.pension_journal_entries.iter().cloned());
2126 }
2127
2128 if !hr.stock_comp_journal_entries.is_empty() {
2130 debug!(
2131 "Generated {} JEs from stock-based compensation",
2132 hr.stock_comp_journal_entries.len()
2133 );
2134 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2135 }
2136
2137 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2139
2140 if !manufacturing_snap.production_orders.is_empty() {
2142 let currency = self
2143 .config
2144 .companies
2145 .first()
2146 .map(|c| c.currency.as_str())
2147 .unwrap_or("USD");
2148 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2149 &manufacturing_snap.production_orders,
2150 &manufacturing_snap.quality_inspections,
2151 currency,
2152 );
2153 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2154 entries.extend(mfg_jes);
2155 }
2156
2157 if !manufacturing_snap.quality_inspections.is_empty() {
2159 let framework = match self.config.accounting_standards.framework {
2160 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2161 _ => "US_GAAP",
2162 };
2163 for company in &self.config.companies {
2164 let company_orders: Vec<_> = manufacturing_snap
2165 .production_orders
2166 .iter()
2167 .filter(|o| o.company_code == company.code)
2168 .cloned()
2169 .collect();
2170 let company_inspections: Vec<_> = manufacturing_snap
2171 .quality_inspections
2172 .iter()
2173 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2174 .cloned()
2175 .collect();
2176 if company_inspections.is_empty() {
2177 continue;
2178 }
2179 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2180 let warranty_result = warranty_gen.generate(
2181 &company.code,
2182 &company_orders,
2183 &company_inspections,
2184 &company.currency,
2185 framework,
2186 );
2187 if !warranty_result.journal_entries.is_empty() {
2188 debug!(
2189 "Generated {} warranty provision JEs for {}",
2190 warranty_result.journal_entries.len(),
2191 company.code
2192 );
2193 entries.extend(warranty_result.journal_entries);
2194 }
2195 }
2196 }
2197
2198 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2200 {
2201 let cogs_currency = self
2202 .config
2203 .companies
2204 .first()
2205 .map(|c| c.currency.as_str())
2206 .unwrap_or("USD");
2207 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2208 &document_flows.deliveries,
2209 &manufacturing_snap.production_orders,
2210 cogs_currency,
2211 );
2212 if !cogs_jes.is_empty() {
2213 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2214 entries.extend(cogs_jes);
2215 }
2216 }
2217
2218 if !manufacturing_snap.inventory_movements.is_empty()
2224 && !subledger.inventory_positions.is_empty()
2225 {
2226 use datasynth_core::models::MovementType as MfgMovementType;
2227 let mut receipt_count = 0usize;
2228 let mut issue_count = 0usize;
2229 for movement in &manufacturing_snap.inventory_movements {
2230 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2232 p.material_id == movement.material_code
2233 && p.company_code == movement.entity_code
2234 }) {
2235 match movement.movement_type {
2236 MfgMovementType::GoodsReceipt => {
2237 pos.add_quantity(
2239 movement.quantity,
2240 movement.value,
2241 movement.movement_date,
2242 );
2243 receipt_count += 1;
2244 }
2245 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2246 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2248 issue_count += 1;
2249 }
2250 _ => {}
2251 }
2252 }
2253 }
2254 debug!(
2255 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2256 manufacturing_snap.inventory_movements.len(),
2257 receipt_count,
2258 issue_count,
2259 );
2260 }
2261
2262 if !entries.is_empty() {
2265 stats.total_entries = entries.len() as u64;
2266 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2267 debug!(
2268 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2269 stats.total_entries, stats.total_line_items
2270 );
2271 }
2272
2273 if self.config.internal_controls.enabled && !entries.is_empty() {
2275 info!("Phase 7b: Applying internal controls to journal entries");
2276 let control_config = ControlGeneratorConfig {
2277 exception_rate: self.config.internal_controls.exception_rate,
2278 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2279 enable_sox_marking: true,
2280 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2281 self.config.internal_controls.sox_materiality_threshold,
2282 )
2283 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2284 ..Default::default()
2285 };
2286 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2287 for entry in &mut entries {
2288 control_gen.apply_controls(entry, &coa);
2289 }
2290 let with_controls = entries
2291 .iter()
2292 .filter(|e| !e.header.control_ids.is_empty())
2293 .count();
2294 info!(
2295 "Applied controls to {} entries ({} with control IDs assigned)",
2296 entries.len(),
2297 with_controls
2298 );
2299 }
2300
2301 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2305 .iter()
2306 .filter(|e| e.header.sod_violation)
2307 .filter_map(|e| {
2308 e.header.sod_conflict_type.map(|ct| {
2309 use datasynth_core::models::{RiskLevel, SodViolation};
2310 let severity = match ct {
2311 datasynth_core::models::SodConflictType::PaymentReleaser
2312 | datasynth_core::models::SodConflictType::RequesterApprover => {
2313 RiskLevel::Critical
2314 }
2315 datasynth_core::models::SodConflictType::PreparerApprover
2316 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2317 | datasynth_core::models::SodConflictType::JournalEntryPoster
2318 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2319 RiskLevel::High
2320 }
2321 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2322 RiskLevel::Medium
2323 }
2324 };
2325 let action = format!(
2326 "SoD conflict {:?} on entry {} ({})",
2327 ct, e.header.document_id, e.header.company_code
2328 );
2329 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2330 })
2331 })
2332 .collect();
2333 if !sod_violations.is_empty() {
2334 info!(
2335 "Phase 7c: Extracted {} SoD violations from {} entries",
2336 sod_violations.len(),
2337 entries.len()
2338 );
2339 }
2340
2341 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2343
2344 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2346
2347 self.emit_phase_items(
2349 "anomaly_injection",
2350 "LabeledAnomaly",
2351 &anomaly_labels.labels,
2352 );
2353
2354 {
2358 use std::collections::HashMap;
2359 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2372 for je in &entries {
2373 if je.header.is_fraud {
2374 if let Some(ref fraud_type) = je.header.fraud_type {
2375 if let Some(ref reference) = je.header.reference {
2376 fraud_map.insert(reference.clone(), *fraud_type);
2378 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2381 if !bare.is_empty() {
2382 fraud_map.insert(bare.to_string(), *fraud_type);
2383 }
2384 }
2385 }
2386 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2388 }
2389 }
2390 }
2391 if !fraud_map.is_empty() {
2392 let mut propagated = 0usize;
2393 macro_rules! propagate_to {
2395 ($collection:expr) => {
2396 for doc in &mut $collection {
2397 if doc.header.propagate_fraud(&fraud_map) {
2398 propagated += 1;
2399 }
2400 }
2401 };
2402 }
2403 propagate_to!(document_flows.purchase_orders);
2404 propagate_to!(document_flows.goods_receipts);
2405 propagate_to!(document_flows.vendor_invoices);
2406 propagate_to!(document_flows.payments);
2407 propagate_to!(document_flows.sales_orders);
2408 propagate_to!(document_flows.deliveries);
2409 propagate_to!(document_flows.customer_invoices);
2410 if propagated > 0 {
2411 info!(
2412 "Propagated fraud labels to {} document flow records",
2413 propagated
2414 );
2415 }
2416 }
2417 }
2418
2419 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2421
2422 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2424
2425 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2427
2428 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2430
2431 let balance_validation = self.phase_balance_validation(&entries)?;
2433
2434 let subledger_reconciliation =
2436 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2437
2438 let (data_quality_stats, quality_issues) =
2440 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2441
2442 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2444
2445 {
2447 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2452 for je in &entries {
2453 if je.header.is_fraud || je.header.is_anomaly {
2454 continue;
2455 }
2456 let diff = (je.total_debit() - je.total_credit()).abs();
2457 if diff > tolerance {
2458 unbalanced_clean += 1;
2459 if unbalanced_clean <= 3 {
2460 warn!(
2461 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2462 je.header.document_id,
2463 je.total_debit(),
2464 je.total_credit(),
2465 diff
2466 );
2467 }
2468 }
2469 }
2470 if unbalanced_clean > 0 {
2471 return Err(datasynth_core::error::SynthError::generation(format!(
2472 "{} non-anomaly JEs are unbalanced (debits != credits). \
2473 First few logged above. Tolerance={}",
2474 unbalanced_clean, tolerance
2475 )));
2476 }
2477 debug!(
2478 "Phase 10c: All {} non-anomaly JEs individually balanced",
2479 entries
2480 .iter()
2481 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2482 .count()
2483 );
2484
2485 let company_codes: Vec<String> = self
2487 .config
2488 .companies
2489 .iter()
2490 .map(|c| c.code.clone())
2491 .collect();
2492 for company_code in &company_codes {
2493 let mut assets = rust_decimal::Decimal::ZERO;
2494 let mut liab_equity = rust_decimal::Decimal::ZERO;
2495
2496 for entry in &entries {
2497 if entry.header.company_code != *company_code {
2498 continue;
2499 }
2500 for line in &entry.lines {
2501 let acct = &line.gl_account;
2502 let net = line.debit_amount - line.credit_amount;
2503 if acct.starts_with('1') {
2505 assets += net;
2506 }
2507 else if acct.starts_with('2') || acct.starts_with('3') {
2509 liab_equity -= net; }
2511 }
2514 }
2515
2516 let bs_diff = (assets - liab_equity).abs();
2517 if bs_diff > tolerance {
2518 warn!(
2519 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2520 revenue/expense closing entries may not fully offset",
2521 company_code, assets, liab_equity, bs_diff
2522 );
2523 } else {
2527 debug!(
2528 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2529 company_code, assets, liab_equity, bs_diff
2530 );
2531 }
2532 }
2533
2534 info!("Phase 10c: All generation-time accounting assertions passed");
2535 }
2536
2537 let audit = self.phase_audit_data(&entries, &mut stats)?;
2539
2540 let mut banking = self.phase_banking_data(&mut stats)?;
2542
2543 if self.phase_config.generate_banking
2548 && !document_flows.payments.is_empty()
2549 && !banking.accounts.is_empty()
2550 {
2551 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2552 if bridge_rate > 0.0 {
2553 let mut bridge =
2554 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2555 self.seed,
2556 );
2557 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2558 &document_flows.payments,
2559 &banking.customers,
2560 &banking.accounts,
2561 bridge_rate,
2562 );
2563 info!(
2564 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2565 bridge_stats.bridged_count,
2566 bridge_stats.transactions_emitted,
2567 bridge_stats.fraud_propagated,
2568 );
2569 let bridged_count = bridged_txns.len();
2570 banking.transactions.extend(bridged_txns);
2571
2572 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2575 datasynth_banking::generators::velocity_computer::compute_velocity_features(
2576 &mut banking.transactions,
2577 );
2578 }
2579
2580 banking.suspicious_count = banking
2582 .transactions
2583 .iter()
2584 .filter(|t| t.is_suspicious)
2585 .count();
2586 stats.banking_transaction_count = banking.transactions.len();
2587 stats.banking_suspicious_count = banking.suspicious_count;
2588 }
2589 }
2590
2591 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2593
2594 self.phase_llm_enrichment(&mut stats);
2596
2597 self.phase_diffusion_enhancement(&mut stats);
2599
2600 self.phase_causal_overlay(&mut stats);
2602
2603 let mut financial_reporting = self.phase_financial_reporting(
2607 &document_flows,
2608 &entries,
2609 &coa,
2610 &hr,
2611 &audit,
2612 &mut stats,
2613 )?;
2614
2615 {
2617 use datasynth_core::models::StatementType;
2618 for stmt in &financial_reporting.consolidated_statements {
2619 if stmt.statement_type == StatementType::BalanceSheet {
2620 let total_assets: rust_decimal::Decimal = stmt
2621 .line_items
2622 .iter()
2623 .filter(|li| li.section.to_uppercase().contains("ASSET"))
2624 .map(|li| li.amount)
2625 .sum();
2626 let total_le: rust_decimal::Decimal = stmt
2627 .line_items
2628 .iter()
2629 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2630 .map(|li| li.amount)
2631 .sum();
2632 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2633 warn!(
2634 "BS equation imbalance: assets={}, L+E={}",
2635 total_assets, total_le
2636 );
2637 }
2638 }
2639 }
2640 }
2641
2642 let accounting_standards =
2644 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2645
2646 if !accounting_standards.ecl_journal_entries.is_empty() {
2648 debug!(
2649 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2650 accounting_standards.ecl_journal_entries.len()
2651 );
2652 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2653 }
2654
2655 if !accounting_standards.provision_journal_entries.is_empty() {
2657 debug!(
2658 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2659 accounting_standards.provision_journal_entries.len()
2660 );
2661 entries.extend(
2662 accounting_standards
2663 .provision_journal_entries
2664 .iter()
2665 .cloned(),
2666 );
2667 }
2668
2669 let ocpm = self.phase_ocpm_events(
2671 &document_flows,
2672 &sourcing,
2673 &hr,
2674 &manufacturing_snap,
2675 &banking,
2676 &audit,
2677 &financial_reporting,
2678 &mut stats,
2679 )?;
2680
2681 if let Some(ref event_log) = ocpm.event_log {
2683 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2684 }
2685
2686 if let Some(ref event_log) = ocpm.event_log {
2688 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2690 std::collections::HashMap::new();
2691 for (idx, event) in event_log.events.iter().enumerate() {
2692 if let Some(ref doc_ref) = event.document_ref {
2693 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2694 }
2695 }
2696
2697 if !doc_index.is_empty() {
2698 let mut annotated = 0usize;
2699 for entry in &mut entries {
2700 let doc_id_str = entry.header.document_id.to_string();
2701 let mut matched_indices: Vec<usize> = Vec::new();
2703 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2704 matched_indices.extend(indices);
2705 }
2706 if let Some(ref reference) = entry.header.reference {
2707 let bare_ref = reference
2708 .find(':')
2709 .map(|i| &reference[i + 1..])
2710 .unwrap_or(reference.as_str());
2711 if let Some(indices) = doc_index.get(bare_ref) {
2712 for &idx in indices {
2713 if !matched_indices.contains(&idx) {
2714 matched_indices.push(idx);
2715 }
2716 }
2717 }
2718 }
2719 if !matched_indices.is_empty() {
2721 for &idx in &matched_indices {
2722 let event = &event_log.events[idx];
2723 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2724 entry.header.ocpm_event_ids.push(event.event_id);
2725 }
2726 for obj_ref in &event.object_refs {
2727 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2728 entry.header.ocpm_object_ids.push(obj_ref.object_id);
2729 }
2730 }
2731 if entry.header.ocpm_case_id.is_none() {
2732 entry.header.ocpm_case_id = event.case_id;
2733 }
2734 }
2735 annotated += 1;
2736 }
2737 }
2738 debug!(
2739 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2740 annotated
2741 );
2742 }
2743 }
2744
2745 let sales_kpi_budgets =
2747 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2748
2749 let treasury =
2753 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2754
2755 if !treasury.journal_entries.is_empty() {
2757 debug!(
2758 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2759 treasury.journal_entries.len()
2760 );
2761 entries.extend(treasury.journal_entries.iter().cloned());
2762 }
2763
2764 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2766
2767 if !tax.tax_posting_journal_entries.is_empty() {
2769 debug!(
2770 "Merging {} tax posting JEs into GL",
2771 tax.tax_posting_journal_entries.len()
2772 );
2773 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2774 }
2775
2776 {
2780 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2781
2782 let framework_str = {
2783 use datasynth_config::schema::AccountingFrameworkConfig;
2784 match self
2785 .config
2786 .accounting_standards
2787 .framework
2788 .unwrap_or_default()
2789 {
2790 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2791 "IFRS"
2792 }
2793 _ => "US_GAAP",
2794 }
2795 };
2796
2797 let depreciation_total: rust_decimal::Decimal = entries
2799 .iter()
2800 .filter(|je| je.header.document_type == "CL")
2801 .flat_map(|je| je.lines.iter())
2802 .filter(|l| l.gl_account.starts_with("6000"))
2803 .map(|l| l.debit_amount)
2804 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2805
2806 let interest_paid: rust_decimal::Decimal = entries
2808 .iter()
2809 .flat_map(|je| je.lines.iter())
2810 .filter(|l| l.gl_account.starts_with("7100"))
2811 .map(|l| l.debit_amount)
2812 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2813
2814 let tax_paid: rust_decimal::Decimal = entries
2816 .iter()
2817 .flat_map(|je| je.lines.iter())
2818 .filter(|l| l.gl_account.starts_with("8000"))
2819 .map(|l| l.debit_amount)
2820 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2821
2822 let capex: rust_decimal::Decimal = entries
2824 .iter()
2825 .flat_map(|je| je.lines.iter())
2826 .filter(|l| l.gl_account.starts_with("1500"))
2827 .map(|l| l.debit_amount)
2828 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2829
2830 let dividends_paid: rust_decimal::Decimal = entries
2832 .iter()
2833 .flat_map(|je| je.lines.iter())
2834 .filter(|l| l.gl_account == "2170")
2835 .map(|l| l.debit_amount)
2836 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2837
2838 let cf_data = CashFlowSourceData {
2839 depreciation_total,
2840 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
2842 delta_ap: rust_decimal::Decimal::ZERO,
2843 delta_inventory: rust_decimal::Decimal::ZERO,
2844 capex,
2845 debt_issuance: rust_decimal::Decimal::ZERO,
2846 debt_repayment: rust_decimal::Decimal::ZERO,
2847 interest_paid,
2848 tax_paid,
2849 dividends_paid,
2850 framework: framework_str.to_string(),
2851 };
2852
2853 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2854 if !enhanced_cf_items.is_empty() {
2855 use datasynth_core::models::StatementType;
2857 let merge_count = enhanced_cf_items.len();
2858 for stmt in financial_reporting
2859 .financial_statements
2860 .iter_mut()
2861 .chain(financial_reporting.consolidated_statements.iter_mut())
2862 .chain(
2863 financial_reporting
2864 .standalone_statements
2865 .values_mut()
2866 .flat_map(|v| v.iter_mut()),
2867 )
2868 {
2869 if stmt.statement_type == StatementType::CashFlowStatement {
2870 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2871 }
2872 }
2873 info!(
2874 "Enhanced cash flow: {} supplementary items merged into CF statements",
2875 merge_count
2876 );
2877 }
2878 }
2879
2880 self.generate_notes_to_financial_statements(
2883 &mut financial_reporting,
2884 &accounting_standards,
2885 &tax,
2886 &hr,
2887 &audit,
2888 &treasury,
2889 );
2890
2891 if self.config.companies.len() >= 2 && !entries.is_empty() {
2895 let companies: Vec<(String, String)> = self
2896 .config
2897 .companies
2898 .iter()
2899 .map(|c| (c.code.clone(), c.name.clone()))
2900 .collect();
2901 let ic_elim: rust_decimal::Decimal =
2902 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2903 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2904 .unwrap_or(NaiveDate::MIN);
2905 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2906 let period_label = format!(
2907 "{}-{:02}",
2908 end_date.year(),
2909 (end_date - chrono::Days::new(1)).month()
2910 );
2911
2912 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2913 let (je_segments, je_recon) =
2914 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2915 if !je_segments.is_empty() {
2916 info!(
2917 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2918 je_segments.len(),
2919 ic_elim,
2920 );
2921 if financial_reporting.segment_reports.is_empty() {
2923 financial_reporting.segment_reports = je_segments;
2924 financial_reporting.segment_reconciliations = vec![je_recon];
2925 } else {
2926 financial_reporting.segment_reports.extend(je_segments);
2927 financial_reporting.segment_reconciliations.push(je_recon);
2928 }
2929 }
2930 }
2931
2932 let esg_snap =
2934 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2935
2936 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2938
2939 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2941
2942 let disruption_events = self.phase_disruption_events(&mut stats)?;
2944
2945 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2947
2948 let (entity_relationship_graph, cross_process_links) =
2950 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2951
2952 let industry_output = self.phase_industry_data(&mut stats);
2954
2955 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2957
2958 self.phase_hypergraph_export(
2960 &coa,
2961 &entries,
2962 &document_flows,
2963 &sourcing,
2964 &hr,
2965 &manufacturing_snap,
2966 &banking,
2967 &audit,
2968 &financial_reporting,
2969 &ocpm,
2970 &compliance_regulations,
2971 &mut stats,
2972 )?;
2973
2974 if self.phase_config.generate_graph_export {
2977 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2978 }
2979
2980 if self.config.streaming.enabled {
2982 info!("Note: streaming config is enabled but batch mode does not use it");
2983 }
2984 if self.config.vendor_network.enabled {
2985 debug!("Vendor network config available; relationship graph generation is partial");
2986 }
2987 if self.config.customer_segmentation.enabled {
2988 debug!("Customer segmentation config available; segment-aware generation is partial");
2989 }
2990
2991 let resource_stats = self.resource_guard.stats();
2993 info!(
2994 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2995 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2996 resource_stats.disk.estimated_bytes_written,
2997 resource_stats.degradation_level
2998 );
2999
3000 if let Some(ref sink) = self.phase_sink {
3002 if let Err(e) = sink.flush() {
3003 warn!("Stream sink flush failed: {e}");
3004 }
3005 }
3006
3007 let lineage = self.build_lineage_graph();
3009
3010 let gate_result = if self.config.quality_gates.enabled {
3012 let profile_name = &self.config.quality_gates.profile;
3013 match datasynth_eval::gates::get_profile(profile_name) {
3014 Some(profile) => {
3015 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3017
3018 if balance_validation.validated {
3020 eval.coherence.balance =
3021 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3022 equation_balanced: balance_validation.is_balanced,
3023 max_imbalance: (balance_validation.total_debits
3024 - balance_validation.total_credits)
3025 .abs(),
3026 periods_evaluated: 1,
3027 periods_imbalanced: if balance_validation.is_balanced {
3028 0
3029 } else {
3030 1
3031 },
3032 period_results: Vec::new(),
3033 companies_evaluated: self.config.companies.len(),
3034 });
3035 }
3036
3037 eval.coherence.passes = balance_validation.is_balanced;
3039 if !balance_validation.is_balanced {
3040 eval.coherence
3041 .failures
3042 .push("Balance sheet equation not satisfied".to_string());
3043 }
3044
3045 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3047 eval.statistical.passes = !entries.is_empty();
3048
3049 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3052
3053 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3054 info!(
3055 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3056 profile_name, result.gates_passed, result.gates_total, result.summary
3057 );
3058 Some(result)
3059 }
3060 None => {
3061 warn!(
3062 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3063 profile_name
3064 );
3065 None
3066 }
3067 }
3068 } else {
3069 None
3070 };
3071
3072 let internal_controls = if self.config.internal_controls.enabled {
3074 InternalControl::standard_controls()
3075 } else {
3076 Vec::new()
3077 };
3078
3079 Ok(EnhancedGenerationResult {
3080 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3081 master_data: std::mem::take(&mut self.master_data),
3082 document_flows,
3083 subledger,
3084 ocpm,
3085 audit,
3086 banking,
3087 graph_export,
3088 sourcing,
3089 financial_reporting,
3090 hr,
3091 accounting_standards,
3092 manufacturing: manufacturing_snap,
3093 sales_kpi_budgets,
3094 tax,
3095 esg: esg_snap,
3096 treasury,
3097 project_accounting,
3098 process_evolution,
3099 organizational_events,
3100 disruption_events,
3101 intercompany,
3102 journal_entries: entries,
3103 anomaly_labels,
3104 balance_validation,
3105 data_quality_stats,
3106 quality_issues,
3107 statistics: stats,
3108 lineage: Some(lineage),
3109 gate_result,
3110 internal_controls,
3111 sod_violations,
3112 opening_balances,
3113 subledger_reconciliation,
3114 counterfactual_pairs,
3115 red_flags,
3116 collusion_rings,
3117 temporal_vendor_chains,
3118 entity_relationship_graph,
3119 cross_process_links,
3120 industry_output,
3121 compliance_regulations,
3122 })
3123 }
3124
3125 fn phase_chart_of_accounts(
3131 &mut self,
3132 stats: &mut EnhancedGenerationStatistics,
3133 ) -> SynthResult<Arc<ChartOfAccounts>> {
3134 info!("Phase 1: Generating Chart of Accounts");
3135 let coa = self.generate_coa()?;
3136 stats.accounts_count = coa.account_count();
3137 info!(
3138 "Chart of Accounts generated: {} accounts",
3139 stats.accounts_count
3140 );
3141 self.check_resources_with_log("post-coa")?;
3142 Ok(coa)
3143 }
3144
3145 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3147 if self.phase_config.generate_master_data {
3148 info!("Phase 2: Generating Master Data");
3149 self.generate_master_data()?;
3150 stats.vendor_count = self.master_data.vendors.len();
3151 stats.customer_count = self.master_data.customers.len();
3152 stats.material_count = self.master_data.materials.len();
3153 stats.asset_count = self.master_data.assets.len();
3154 stats.employee_count = self.master_data.employees.len();
3155 info!(
3156 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3157 stats.vendor_count, stats.customer_count, stats.material_count,
3158 stats.asset_count, stats.employee_count
3159 );
3160 self.check_resources_with_log("post-master-data")?;
3161 } else {
3162 debug!("Phase 2: Skipped (master data generation disabled)");
3163 }
3164 Ok(())
3165 }
3166
3167 fn phase_document_flows(
3169 &mut self,
3170 stats: &mut EnhancedGenerationStatistics,
3171 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3172 let mut document_flows = DocumentFlowSnapshot::default();
3173 let mut subledger = SubledgerSnapshot::default();
3174 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3177
3178 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3179 info!("Phase 3: Generating Document Flows");
3180 self.generate_document_flows(&mut document_flows)?;
3181 stats.p2p_chain_count = document_flows.p2p_chains.len();
3182 stats.o2c_chain_count = document_flows.o2c_chains.len();
3183 info!(
3184 "Document flows generated: {} P2P chains, {} O2C chains",
3185 stats.p2p_chain_count, stats.o2c_chain_count
3186 );
3187
3188 debug!("Phase 3b: Linking document flows to subledgers");
3190 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3191 stats.ap_invoice_count = subledger.ap_invoices.len();
3192 stats.ar_invoice_count = subledger.ar_invoices.len();
3193 debug!(
3194 "Subledgers linked: {} AP invoices, {} AR invoices",
3195 stats.ap_invoice_count, stats.ar_invoice_count
3196 );
3197
3198 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3203 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3204 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3205 debug!("Payment settlements applied to AP and AR subledgers");
3206
3207 if let Ok(start_date) =
3210 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3211 {
3212 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3213 - chrono::Days::new(1);
3214 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3215 for company in &self.config.companies {
3222 let ar_report = ARAgingReport::from_invoices(
3223 company.code.clone(),
3224 &subledger.ar_invoices,
3225 as_of_date,
3226 );
3227 subledger.ar_aging_reports.push(ar_report);
3228
3229 let ap_report = APAgingReport::from_invoices(
3230 company.code.clone(),
3231 &subledger.ap_invoices,
3232 as_of_date,
3233 );
3234 subledger.ap_aging_reports.push(ap_report);
3235 }
3236 debug!(
3237 "AR/AP aging reports built: {} AR, {} AP",
3238 subledger.ar_aging_reports.len(),
3239 subledger.ap_aging_reports.len()
3240 );
3241
3242 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3244 {
3245 use datasynth_generators::DunningGenerator;
3246 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3247 for company in &self.config.companies {
3248 let currency = company.currency.as_str();
3249 let mut company_invoices: Vec<
3252 datasynth_core::models::subledger::ar::ARInvoice,
3253 > = subledger
3254 .ar_invoices
3255 .iter()
3256 .filter(|inv| inv.company_code == company.code)
3257 .cloned()
3258 .collect();
3259
3260 if company_invoices.is_empty() {
3261 continue;
3262 }
3263
3264 let result = dunning_gen.execute_dunning_run(
3265 &company.code,
3266 as_of_date,
3267 &mut company_invoices,
3268 currency,
3269 );
3270
3271 for updated in &company_invoices {
3273 if let Some(orig) = subledger
3274 .ar_invoices
3275 .iter_mut()
3276 .find(|i| i.invoice_number == updated.invoice_number)
3277 {
3278 orig.dunning_info = updated.dunning_info.clone();
3279 }
3280 }
3281
3282 subledger.dunning_runs.push(result.dunning_run);
3283 subledger.dunning_letters.extend(result.letters);
3284 dunning_journal_entries.extend(result.journal_entries);
3286 }
3287 debug!(
3288 "Dunning runs complete: {} runs, {} letters",
3289 subledger.dunning_runs.len(),
3290 subledger.dunning_letters.len()
3291 );
3292 }
3293 }
3294
3295 self.check_resources_with_log("post-document-flows")?;
3296 } else {
3297 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3298 }
3299
3300 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3302 if !self.master_data.assets.is_empty() {
3303 debug!("Generating FA subledger records");
3304 let company_code = self
3305 .config
3306 .companies
3307 .first()
3308 .map(|c| c.code.as_str())
3309 .unwrap_or("1000");
3310 let currency = self
3311 .config
3312 .companies
3313 .first()
3314 .map(|c| c.currency.as_str())
3315 .unwrap_or("USD");
3316
3317 let mut fa_gen = datasynth_generators::FAGenerator::new(
3318 datasynth_generators::FAGeneratorConfig::default(),
3319 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3320 );
3321
3322 for asset in &self.master_data.assets {
3323 let (record, je) = fa_gen.generate_asset_acquisition(
3324 company_code,
3325 &format!("{:?}", asset.asset_class),
3326 &asset.description,
3327 asset.acquisition_date,
3328 currency,
3329 asset.cost_center.as_deref(),
3330 );
3331 subledger.fa_records.push(record);
3332 fa_journal_entries.push(je);
3333 }
3334
3335 stats.fa_subledger_count = subledger.fa_records.len();
3336 debug!(
3337 "FA subledger records generated: {} (with {} acquisition JEs)",
3338 stats.fa_subledger_count,
3339 fa_journal_entries.len()
3340 );
3341 }
3342
3343 if !self.master_data.materials.is_empty() {
3345 debug!("Generating Inventory subledger records");
3346 let first_company = self.config.companies.first();
3347 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3348 let inv_currency = first_company
3349 .map(|c| c.currency.clone())
3350 .unwrap_or_else(|| "USD".to_string());
3351
3352 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3353 datasynth_generators::InventoryGeneratorConfig::default(),
3354 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3355 inv_currency.clone(),
3356 );
3357
3358 for (i, material) in self.master_data.materials.iter().enumerate() {
3359 let plant = format!("PLANT{:02}", (i % 3) + 1);
3360 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3361 let initial_qty = rust_decimal::Decimal::from(
3362 material
3363 .safety_stock
3364 .to_string()
3365 .parse::<i64>()
3366 .unwrap_or(100),
3367 );
3368
3369 let position = inv_gen.generate_position(
3370 company_code,
3371 &plant,
3372 &storage_loc,
3373 &material.material_id,
3374 &material.description,
3375 initial_qty,
3376 Some(material.standard_cost),
3377 &inv_currency,
3378 );
3379 subledger.inventory_positions.push(position);
3380 }
3381
3382 stats.inventory_subledger_count = subledger.inventory_positions.len();
3383 debug!(
3384 "Inventory subledger records generated: {}",
3385 stats.inventory_subledger_count
3386 );
3387 }
3388
3389 if !subledger.fa_records.is_empty() {
3391 if let Ok(start_date) =
3392 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3393 {
3394 let company_code = self
3395 .config
3396 .companies
3397 .first()
3398 .map(|c| c.code.as_str())
3399 .unwrap_or("1000");
3400 let fiscal_year = start_date.year();
3401 let start_period = start_date.month();
3402 let end_period =
3403 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3404
3405 let depr_cfg = FaDepreciationScheduleConfig {
3406 fiscal_year,
3407 start_period,
3408 end_period,
3409 seed_offset: 800,
3410 };
3411 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3412 let runs = depr_gen.generate(company_code, &subledger.fa_records);
3413 let run_count = runs.len();
3414 subledger.depreciation_runs = runs;
3415 debug!(
3416 "Depreciation runs generated: {} runs for {} periods",
3417 run_count, self.config.global.period_months
3418 );
3419 }
3420 }
3421
3422 if !subledger.inventory_positions.is_empty() {
3424 if let Ok(start_date) =
3425 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3426 {
3427 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3428 - chrono::Days::new(1);
3429
3430 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3431 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3432
3433 for company in &self.config.companies {
3434 let result = inv_val_gen.generate(
3435 &company.code,
3436 &subledger.inventory_positions,
3437 as_of_date,
3438 );
3439 subledger.inventory_valuations.push(result);
3440 }
3441 debug!(
3442 "Inventory valuations generated: {} company reports",
3443 subledger.inventory_valuations.len()
3444 );
3445 }
3446 }
3447
3448 Ok((document_flows, subledger, fa_journal_entries))
3449 }
3450
3451 #[allow(clippy::too_many_arguments)]
3453 fn phase_ocpm_events(
3454 &mut self,
3455 document_flows: &DocumentFlowSnapshot,
3456 sourcing: &SourcingSnapshot,
3457 hr: &HrSnapshot,
3458 manufacturing: &ManufacturingSnapshot,
3459 banking: &BankingSnapshot,
3460 audit: &AuditSnapshot,
3461 financial_reporting: &FinancialReportingSnapshot,
3462 stats: &mut EnhancedGenerationStatistics,
3463 ) -> SynthResult<OcpmSnapshot> {
3464 let degradation = self.check_resources()?;
3465 if degradation >= DegradationLevel::Reduced {
3466 debug!(
3467 "Phase skipped due to resource pressure (degradation: {:?})",
3468 degradation
3469 );
3470 return Ok(OcpmSnapshot::default());
3471 }
3472 if self.phase_config.generate_ocpm_events {
3473 info!("Phase 3c: Generating OCPM Events");
3474 let ocpm_snapshot = self.generate_ocpm_events(
3475 document_flows,
3476 sourcing,
3477 hr,
3478 manufacturing,
3479 banking,
3480 audit,
3481 financial_reporting,
3482 )?;
3483 stats.ocpm_event_count = ocpm_snapshot.event_count;
3484 stats.ocpm_object_count = ocpm_snapshot.object_count;
3485 stats.ocpm_case_count = ocpm_snapshot.case_count;
3486 info!(
3487 "OCPM events generated: {} events, {} objects, {} cases",
3488 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3489 );
3490 self.check_resources_with_log("post-ocpm")?;
3491 Ok(ocpm_snapshot)
3492 } else {
3493 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3494 Ok(OcpmSnapshot::default())
3495 }
3496 }
3497
3498 fn phase_journal_entries(
3500 &mut self,
3501 coa: &Arc<ChartOfAccounts>,
3502 document_flows: &DocumentFlowSnapshot,
3503 _stats: &mut EnhancedGenerationStatistics,
3504 ) -> SynthResult<Vec<JournalEntry>> {
3505 let mut entries = Vec::new();
3506
3507 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3509 debug!("Phase 4a: Generating JEs from document flows");
3510 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3511 debug!("Generated {} JEs from document flows", flow_entries.len());
3512 entries.extend(flow_entries);
3513 }
3514
3515 if self.phase_config.generate_journal_entries {
3517 info!("Phase 4: Generating Journal Entries");
3518 let je_entries = self.generate_journal_entries(coa)?;
3519 info!("Generated {} standalone journal entries", je_entries.len());
3520 entries.extend(je_entries);
3521 } else {
3522 debug!("Phase 4: Skipped (journal entry generation disabled)");
3523 }
3524
3525 if !entries.is_empty() {
3526 self.check_resources_with_log("post-journal-entries")?;
3529 }
3530
3531 Ok(entries)
3532 }
3533
3534 fn phase_anomaly_injection(
3536 &mut self,
3537 entries: &mut [JournalEntry],
3538 actions: &DegradationActions,
3539 stats: &mut EnhancedGenerationStatistics,
3540 ) -> SynthResult<AnomalyLabels> {
3541 if self.phase_config.inject_anomalies
3542 && !entries.is_empty()
3543 && !actions.skip_anomaly_injection
3544 {
3545 info!("Phase 5: Injecting Anomalies");
3546 let result = self.inject_anomalies(entries)?;
3547 stats.anomalies_injected = result.labels.len();
3548 info!("Injected {} anomalies", stats.anomalies_injected);
3549 self.check_resources_with_log("post-anomaly-injection")?;
3550 Ok(result)
3551 } else if actions.skip_anomaly_injection {
3552 warn!("Phase 5: Skipped due to resource degradation");
3553 Ok(AnomalyLabels::default())
3554 } else {
3555 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3556 Ok(AnomalyLabels::default())
3557 }
3558 }
3559
3560 fn phase_balance_validation(
3562 &mut self,
3563 entries: &[JournalEntry],
3564 ) -> SynthResult<BalanceValidationResult> {
3565 if self.phase_config.validate_balances && !entries.is_empty() {
3566 debug!("Phase 6: Validating Balances");
3567 let balance_validation = self.validate_journal_entries(entries)?;
3568 if balance_validation.is_balanced {
3569 debug!("Balance validation passed");
3570 } else {
3571 warn!(
3572 "Balance validation found {} errors",
3573 balance_validation.validation_errors.len()
3574 );
3575 }
3576 Ok(balance_validation)
3577 } else {
3578 Ok(BalanceValidationResult::default())
3579 }
3580 }
3581
3582 fn phase_data_quality_injection(
3584 &mut self,
3585 entries: &mut [JournalEntry],
3586 actions: &DegradationActions,
3587 stats: &mut EnhancedGenerationStatistics,
3588 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3589 if self.phase_config.inject_data_quality
3590 && !entries.is_empty()
3591 && !actions.skip_data_quality
3592 {
3593 info!("Phase 7: Injecting Data Quality Variations");
3594 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3595 stats.data_quality_issues = dq_stats.records_with_issues;
3596 info!("Injected {} data quality issues", stats.data_quality_issues);
3597 self.check_resources_with_log("post-data-quality")?;
3598 Ok((dq_stats, quality_issues))
3599 } else if actions.skip_data_quality {
3600 warn!("Phase 7: Skipped due to resource degradation");
3601 Ok((DataQualityStats::default(), Vec::new()))
3602 } else {
3603 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3604 Ok((DataQualityStats::default(), Vec::new()))
3605 }
3606 }
3607
3608 fn phase_period_close(
3618 &mut self,
3619 entries: &mut Vec<JournalEntry>,
3620 subledger: &SubledgerSnapshot,
3621 stats: &mut EnhancedGenerationStatistics,
3622 ) -> SynthResult<()> {
3623 if !self.phase_config.generate_period_close || entries.is_empty() {
3624 debug!("Phase 10b: Skipped (period close disabled or no entries)");
3625 return Ok(());
3626 }
3627
3628 info!("Phase 10b: Generating period-close journal entries");
3629
3630 use datasynth_core::accounts::{
3631 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3632 };
3633 use rust_decimal::Decimal;
3634
3635 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3636 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3637 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3638 let close_date = end_date - chrono::Days::new(1);
3640
3641 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
3646 .config
3647 .companies
3648 .iter()
3649 .map(|c| c.code.clone())
3650 .collect();
3651
3652 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3654 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3655
3656 let period_months = self.config.global.period_months;
3660 for asset in &subledger.fa_records {
3661 use datasynth_core::models::subledger::fa::AssetStatus;
3663 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3664 continue;
3665 }
3666 let useful_life_months = asset.useful_life_months();
3667 if useful_life_months == 0 {
3668 continue;
3670 }
3671 let salvage_value = asset.salvage_value();
3672 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3673 if depreciable_base == Decimal::ZERO {
3674 continue;
3675 }
3676 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3677 * Decimal::from(period_months))
3678 .round_dp(2);
3679 if period_depr <= Decimal::ZERO {
3680 continue;
3681 }
3682
3683 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3684 depr_header.document_type = "CL".to_string();
3685 depr_header.header_text = Some(format!(
3686 "Depreciation - {} {}",
3687 asset.asset_number, asset.description
3688 ));
3689 depr_header.created_by = "CLOSE_ENGINE".to_string();
3690 depr_header.source = TransactionSource::Automated;
3691 depr_header.business_process = Some(BusinessProcess::R2R);
3692
3693 let doc_id = depr_header.document_id;
3694 let mut depr_je = JournalEntry::new(depr_header);
3695
3696 depr_je.add_line(JournalEntryLine::debit(
3698 doc_id,
3699 1,
3700 expense_accounts::DEPRECIATION.to_string(),
3701 period_depr,
3702 ));
3703 depr_je.add_line(JournalEntryLine::credit(
3705 doc_id,
3706 2,
3707 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3708 period_depr,
3709 ));
3710
3711 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3712 close_jes.push(depr_je);
3713 }
3714
3715 if !subledger.fa_records.is_empty() {
3716 debug!(
3717 "Generated {} depreciation JEs from {} FA records",
3718 close_jes.len(),
3719 subledger.fa_records.len()
3720 );
3721 }
3722
3723 {
3727 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3728 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3729
3730 let accrual_items: &[(&str, &str, &str)] = &[
3732 ("Accrued Utilities", "6200", "2100"),
3733 ("Accrued Rent", "6300", "2100"),
3734 ("Accrued Interest", "6100", "2150"),
3735 ];
3736
3737 for company_code in &company_codes {
3738 let company_revenue: Decimal = entries
3740 .iter()
3741 .filter(|e| e.header.company_code == *company_code)
3742 .flat_map(|e| e.lines.iter())
3743 .filter(|l| l.gl_account.starts_with('4'))
3744 .map(|l| l.credit_amount - l.debit_amount)
3745 .fold(Decimal::ZERO, |acc, v| acc + v);
3746
3747 if company_revenue <= Decimal::ZERO {
3748 continue;
3749 }
3750
3751 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3753 if accrual_base <= Decimal::ZERO {
3754 continue;
3755 }
3756
3757 for (description, expense_acct, liability_acct) in accrual_items {
3758 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3759 company_code,
3760 description,
3761 accrual_base,
3762 expense_acct,
3763 liability_acct,
3764 close_date,
3765 None,
3766 );
3767 close_jes.push(accrual_je);
3768 if let Some(rev_je) = reversal_je {
3769 close_jes.push(rev_je);
3770 }
3771 }
3772 }
3773
3774 debug!(
3775 "Generated accrual entries for {} companies",
3776 company_codes.len()
3777 );
3778 }
3779
3780 for company_code in &company_codes {
3781 let mut total_revenue = Decimal::ZERO;
3786 let mut total_expenses = Decimal::ZERO;
3787
3788 for entry in entries.iter() {
3789 if entry.header.company_code != *company_code {
3790 continue;
3791 }
3792 for line in &entry.lines {
3793 let category = AccountCategory::from_account(&line.gl_account);
3794 match category {
3795 AccountCategory::Revenue => {
3796 total_revenue += line.credit_amount - line.debit_amount;
3798 }
3799 AccountCategory::Cogs
3800 | AccountCategory::OperatingExpense
3801 | AccountCategory::OtherIncomeExpense
3802 | AccountCategory::Tax => {
3803 total_expenses += line.debit_amount - line.credit_amount;
3805 }
3806 _ => {}
3807 }
3808 }
3809 }
3810
3811 let pre_tax_income = total_revenue - total_expenses;
3812
3813 if pre_tax_income == Decimal::ZERO {
3815 debug!(
3816 "Company {}: no pre-tax income, skipping period close",
3817 company_code
3818 );
3819 continue;
3820 }
3821
3822 if pre_tax_income > Decimal::ZERO {
3824 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3826
3827 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3828 tax_header.document_type = "CL".to_string();
3829 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3830 tax_header.created_by = "CLOSE_ENGINE".to_string();
3831 tax_header.source = TransactionSource::Automated;
3832 tax_header.business_process = Some(BusinessProcess::R2R);
3833
3834 let doc_id = tax_header.document_id;
3835 let mut tax_je = JournalEntry::new(tax_header);
3836
3837 tax_je.add_line(JournalEntryLine::debit(
3839 doc_id,
3840 1,
3841 tax_accounts::TAX_EXPENSE.to_string(),
3842 tax_amount,
3843 ));
3844 tax_je.add_line(JournalEntryLine::credit(
3846 doc_id,
3847 2,
3848 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3849 tax_amount,
3850 ));
3851
3852 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3853 close_jes.push(tax_je);
3854 } else {
3855 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3858 if dta_amount > Decimal::ZERO {
3859 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3860 dta_header.document_type = "CL".to_string();
3861 dta_header.header_text =
3862 Some(format!("Deferred tax asset (DTA) - {}", company_code));
3863 dta_header.created_by = "CLOSE_ENGINE".to_string();
3864 dta_header.source = TransactionSource::Automated;
3865 dta_header.business_process = Some(BusinessProcess::R2R);
3866
3867 let doc_id = dta_header.document_id;
3868 let mut dta_je = JournalEntry::new(dta_header);
3869
3870 dta_je.add_line(JournalEntryLine::debit(
3872 doc_id,
3873 1,
3874 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3875 dta_amount,
3876 ));
3877 dta_je.add_line(JournalEntryLine::credit(
3880 doc_id,
3881 2,
3882 tax_accounts::TAX_EXPENSE.to_string(),
3883 dta_amount,
3884 ));
3885
3886 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3887 close_jes.push(dta_je);
3888 debug!(
3889 "Company {}: loss year — recognised DTA of {}",
3890 company_code, dta_amount
3891 );
3892 }
3893 }
3894
3895 let tax_provision = if pre_tax_income > Decimal::ZERO {
3901 (pre_tax_income * tax_rate).round_dp(2)
3902 } else {
3903 Decimal::ZERO
3904 };
3905 let net_income = pre_tax_income - tax_provision;
3906
3907 if net_income > Decimal::ZERO {
3908 use datasynth_generators::DividendGenerator;
3909 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
3911 let currency_str = self
3912 .config
3913 .companies
3914 .iter()
3915 .find(|c| c.code == *company_code)
3916 .map(|c| c.currency.as_str())
3917 .unwrap_or("USD");
3918 let div_result = div_gen.generate(
3919 company_code,
3920 close_date,
3921 Decimal::new(1, 0), dividend_amount,
3923 currency_str,
3924 );
3925 let div_je_count = div_result.journal_entries.len();
3926 close_jes.extend(div_result.journal_entries);
3927 debug!(
3928 "Company {}: declared dividend of {} ({} JEs)",
3929 company_code, dividend_amount, div_je_count
3930 );
3931 }
3932
3933 if net_income != Decimal::ZERO {
3938 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3939 close_header.document_type = "CL".to_string();
3940 close_header.header_text =
3941 Some(format!("Income statement close - {}", company_code));
3942 close_header.created_by = "CLOSE_ENGINE".to_string();
3943 close_header.source = TransactionSource::Automated;
3944 close_header.business_process = Some(BusinessProcess::R2R);
3945
3946 let doc_id = close_header.document_id;
3947 let mut close_je = JournalEntry::new(close_header);
3948
3949 let abs_net_income = net_income.abs();
3950
3951 if net_income > Decimal::ZERO {
3952 close_je.add_line(JournalEntryLine::debit(
3954 doc_id,
3955 1,
3956 equity_accounts::INCOME_SUMMARY.to_string(),
3957 abs_net_income,
3958 ));
3959 close_je.add_line(JournalEntryLine::credit(
3960 doc_id,
3961 2,
3962 equity_accounts::RETAINED_EARNINGS.to_string(),
3963 abs_net_income,
3964 ));
3965 } else {
3966 close_je.add_line(JournalEntryLine::debit(
3968 doc_id,
3969 1,
3970 equity_accounts::RETAINED_EARNINGS.to_string(),
3971 abs_net_income,
3972 ));
3973 close_je.add_line(JournalEntryLine::credit(
3974 doc_id,
3975 2,
3976 equity_accounts::INCOME_SUMMARY.to_string(),
3977 abs_net_income,
3978 ));
3979 }
3980
3981 debug_assert!(
3982 close_je.is_balanced(),
3983 "Income statement closing JE must be balanced"
3984 );
3985 close_jes.push(close_je);
3986 }
3987 }
3988
3989 let close_count = close_jes.len();
3990 if close_count > 0 {
3991 info!("Generated {} period-close journal entries", close_count);
3992 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3993 entries.extend(close_jes);
3994 stats.period_close_je_count = close_count;
3995
3996 stats.total_entries = entries.len() as u64;
3998 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3999 } else {
4000 debug!("No period-close entries generated (no income statement activity)");
4001 }
4002
4003 Ok(())
4004 }
4005
4006 fn phase_audit_data(
4008 &mut self,
4009 entries: &[JournalEntry],
4010 stats: &mut EnhancedGenerationStatistics,
4011 ) -> SynthResult<AuditSnapshot> {
4012 if self.phase_config.generate_audit {
4013 info!("Phase 8: Generating Audit Data");
4014 let audit_snapshot = self.generate_audit_data(entries)?;
4015 stats.audit_engagement_count = audit_snapshot.engagements.len();
4016 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4017 stats.audit_evidence_count = audit_snapshot.evidence.len();
4018 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4019 stats.audit_finding_count = audit_snapshot.findings.len();
4020 stats.audit_judgment_count = audit_snapshot.judgments.len();
4021 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4022 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4023 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4024 stats.audit_sample_count = audit_snapshot.samples.len();
4025 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4026 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4027 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4028 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4029 stats.audit_related_party_transaction_count =
4030 audit_snapshot.related_party_transactions.len();
4031 info!(
4032 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4033 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4034 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4035 {} RP transactions",
4036 stats.audit_engagement_count,
4037 stats.audit_workpaper_count,
4038 stats.audit_evidence_count,
4039 stats.audit_risk_count,
4040 stats.audit_finding_count,
4041 stats.audit_judgment_count,
4042 stats.audit_confirmation_count,
4043 stats.audit_procedure_step_count,
4044 stats.audit_sample_count,
4045 stats.audit_analytical_result_count,
4046 stats.audit_ia_function_count,
4047 stats.audit_ia_report_count,
4048 stats.audit_related_party_count,
4049 stats.audit_related_party_transaction_count,
4050 );
4051 self.check_resources_with_log("post-audit")?;
4052 Ok(audit_snapshot)
4053 } else {
4054 debug!("Phase 8: Skipped (audit generation disabled)");
4055 Ok(AuditSnapshot::default())
4056 }
4057 }
4058
4059 fn phase_banking_data(
4061 &mut self,
4062 stats: &mut EnhancedGenerationStatistics,
4063 ) -> SynthResult<BankingSnapshot> {
4064 if self.phase_config.generate_banking {
4065 info!("Phase 9: Generating Banking KYC/AML Data");
4066 let banking_snapshot = self.generate_banking_data()?;
4067 stats.banking_customer_count = banking_snapshot.customers.len();
4068 stats.banking_account_count = banking_snapshot.accounts.len();
4069 stats.banking_transaction_count = banking_snapshot.transactions.len();
4070 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4071 info!(
4072 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4073 stats.banking_customer_count, stats.banking_account_count,
4074 stats.banking_transaction_count, stats.banking_suspicious_count
4075 );
4076 self.check_resources_with_log("post-banking")?;
4077 Ok(banking_snapshot)
4078 } else {
4079 debug!("Phase 9: Skipped (banking generation disabled)");
4080 Ok(BankingSnapshot::default())
4081 }
4082 }
4083
4084 fn phase_graph_export(
4086 &mut self,
4087 entries: &[JournalEntry],
4088 coa: &Arc<ChartOfAccounts>,
4089 stats: &mut EnhancedGenerationStatistics,
4090 ) -> SynthResult<GraphExportSnapshot> {
4091 if self.phase_config.generate_graph_export && !entries.is_empty() {
4092 info!("Phase 10: Exporting Accounting Network Graphs");
4093 match self.export_graphs(entries, coa, stats) {
4094 Ok(snapshot) => {
4095 info!(
4096 "Graph export complete: {} graphs ({} nodes, {} edges)",
4097 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4098 );
4099 Ok(snapshot)
4100 }
4101 Err(e) => {
4102 warn!("Phase 10: Graph export failed: {}", e);
4103 Ok(GraphExportSnapshot::default())
4104 }
4105 }
4106 } else {
4107 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4108 Ok(GraphExportSnapshot::default())
4109 }
4110 }
4111
4112 #[allow(clippy::too_many_arguments)]
4114 fn phase_hypergraph_export(
4115 &self,
4116 coa: &Arc<ChartOfAccounts>,
4117 entries: &[JournalEntry],
4118 document_flows: &DocumentFlowSnapshot,
4119 sourcing: &SourcingSnapshot,
4120 hr: &HrSnapshot,
4121 manufacturing: &ManufacturingSnapshot,
4122 banking: &BankingSnapshot,
4123 audit: &AuditSnapshot,
4124 financial_reporting: &FinancialReportingSnapshot,
4125 ocpm: &OcpmSnapshot,
4126 compliance: &ComplianceRegulationsSnapshot,
4127 stats: &mut EnhancedGenerationStatistics,
4128 ) -> SynthResult<()> {
4129 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4130 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4131 match self.export_hypergraph(
4132 coa,
4133 entries,
4134 document_flows,
4135 sourcing,
4136 hr,
4137 manufacturing,
4138 banking,
4139 audit,
4140 financial_reporting,
4141 ocpm,
4142 compliance,
4143 stats,
4144 ) {
4145 Ok(info) => {
4146 info!(
4147 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4148 info.node_count, info.edge_count, info.hyperedge_count
4149 );
4150 }
4151 Err(e) => {
4152 warn!("Phase 10b: Hypergraph export failed: {}", e);
4153 }
4154 }
4155 } else {
4156 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4157 }
4158 Ok(())
4159 }
4160
4161 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4167 if !self.config.llm.enabled {
4168 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4169 return;
4170 }
4171
4172 info!("Phase 11: Starting LLM Enrichment");
4173 let start = std::time::Instant::now();
4174
4175 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4176 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4179 let schema_provider = &self.config.llm.provider;
4180 let api_key_env = match schema_provider.as_str() {
4181 "openai" => Some("OPENAI_API_KEY"),
4182 "anthropic" => Some("ANTHROPIC_API_KEY"),
4183 "custom" => Some("LLM_API_KEY"),
4184 _ => None,
4185 };
4186 if let Some(key_env) = api_key_env {
4187 if std::env::var(key_env).is_ok() {
4188 let llm_config = datasynth_core::llm::LlmConfig {
4189 model: self.config.llm.model.clone(),
4190 api_key_env: key_env.to_string(),
4191 ..datasynth_core::llm::LlmConfig::default()
4192 };
4193 match HttpLlmProvider::new(llm_config) {
4194 Ok(p) => Arc::new(p),
4195 Err(e) => {
4196 warn!(
4197 "Failed to create HttpLlmProvider: {}; falling back to mock",
4198 e
4199 );
4200 Arc::new(MockLlmProvider::new(self.seed))
4201 }
4202 }
4203 } else {
4204 Arc::new(MockLlmProvider::new(self.seed))
4205 }
4206 } else {
4207 Arc::new(MockLlmProvider::new(self.seed))
4208 }
4209 };
4210 let enricher = VendorLlmEnricher::new(provider);
4211
4212 let industry = format!("{:?}", self.config.global.industry);
4213 let max_enrichments = self
4214 .config
4215 .llm
4216 .max_vendor_enrichments
4217 .min(self.master_data.vendors.len());
4218
4219 let mut enriched_count = 0usize;
4220 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4221 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4222 Ok(name) => {
4223 vendor.name = name;
4224 enriched_count += 1;
4225 }
4226 Err(e) => {
4227 warn!(
4228 "LLM vendor enrichment failed for {}: {}",
4229 vendor.vendor_id, e
4230 );
4231 }
4232 }
4233 }
4234
4235 enriched_count
4236 }));
4237
4238 match result {
4239 Ok(enriched_count) => {
4240 stats.llm_vendors_enriched = enriched_count;
4241 let elapsed = start.elapsed();
4242 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4243 info!(
4244 "Phase 11 complete: {} vendors enriched in {}ms",
4245 enriched_count, stats.llm_enrichment_ms
4246 );
4247 }
4248 Err(_) => {
4249 let elapsed = start.elapsed();
4250 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4251 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4252 }
4253 }
4254 }
4255
4256 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4262 if !self.config.diffusion.enabled {
4263 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4264 return;
4265 }
4266
4267 info!("Phase 12: Starting Diffusion Enhancement");
4268 let start = std::time::Instant::now();
4269
4270 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4271 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
4274
4275 let diffusion_config = DiffusionConfig {
4276 n_steps: self.config.diffusion.n_steps,
4277 seed: self.seed,
4278 ..Default::default()
4279 };
4280
4281 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4282
4283 let n_samples = self.config.diffusion.sample_size;
4284 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
4286
4287 samples.len()
4288 }));
4289
4290 match result {
4291 Ok(sample_count) => {
4292 stats.diffusion_samples_generated = sample_count;
4293 let elapsed = start.elapsed();
4294 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4295 info!(
4296 "Phase 12 complete: {} diffusion samples generated in {}ms",
4297 sample_count, stats.diffusion_enhancement_ms
4298 );
4299 }
4300 Err(_) => {
4301 let elapsed = start.elapsed();
4302 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4303 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4304 }
4305 }
4306 }
4307
4308 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4315 if !self.config.causal.enabled {
4316 debug!("Phase 13: Skipped (causal generation disabled)");
4317 return;
4318 }
4319
4320 info!("Phase 13: Starting Causal Overlay");
4321 let start = std::time::Instant::now();
4322
4323 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4324 let graph = match self.config.causal.template.as_str() {
4326 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4327 _ => CausalGraph::fraud_detection_template(),
4328 };
4329
4330 let scm = StructuralCausalModel::new(graph.clone())
4331 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4332
4333 let n_samples = self.config.causal.sample_size;
4334 let samples = scm
4335 .generate(n_samples, self.seed)
4336 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4337
4338 let validation_passed = if self.config.causal.validate {
4340 let report = CausalValidator::validate_causal_structure(&samples, &graph);
4341 if report.valid {
4342 info!(
4343 "Causal validation passed: all {} checks OK",
4344 report.checks.len()
4345 );
4346 } else {
4347 warn!(
4348 "Causal validation: {} violations detected: {:?}",
4349 report.violations.len(),
4350 report.violations
4351 );
4352 }
4353 Some(report.valid)
4354 } else {
4355 None
4356 };
4357
4358 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4359 }));
4360
4361 match result {
4362 Ok(Ok((sample_count, validation_passed))) => {
4363 stats.causal_samples_generated = sample_count;
4364 stats.causal_validation_passed = validation_passed;
4365 let elapsed = start.elapsed();
4366 stats.causal_generation_ms = elapsed.as_millis() as u64;
4367 info!(
4368 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4369 sample_count, stats.causal_generation_ms, validation_passed,
4370 );
4371 }
4372 Ok(Err(e)) => {
4373 let elapsed = start.elapsed();
4374 stats.causal_generation_ms = elapsed.as_millis() as u64;
4375 warn!("Phase 13: Causal generation failed: {}", e);
4376 }
4377 Err(_) => {
4378 let elapsed = start.elapsed();
4379 stats.causal_generation_ms = elapsed.as_millis() as u64;
4380 warn!("Phase 13: Causal generation failed (panic caught), continuing");
4381 }
4382 }
4383 }
4384
4385 fn phase_sourcing_data(
4387 &mut self,
4388 stats: &mut EnhancedGenerationStatistics,
4389 ) -> SynthResult<SourcingSnapshot> {
4390 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4391 debug!("Phase 14: Skipped (sourcing generation disabled)");
4392 return Ok(SourcingSnapshot::default());
4393 }
4394 let degradation = self.check_resources()?;
4395 if degradation >= DegradationLevel::Reduced {
4396 debug!(
4397 "Phase skipped due to resource pressure (degradation: {:?})",
4398 degradation
4399 );
4400 return Ok(SourcingSnapshot::default());
4401 }
4402
4403 info!("Phase 14: Generating S2C Sourcing Data");
4404 let seed = self.seed;
4405
4406 let vendor_ids: Vec<String> = self
4408 .master_data
4409 .vendors
4410 .iter()
4411 .map(|v| v.vendor_id.clone())
4412 .collect();
4413 if vendor_ids.is_empty() {
4414 debug!("Phase 14: Skipped (no vendors available)");
4415 return Ok(SourcingSnapshot::default());
4416 }
4417
4418 let categories: Vec<(String, String)> = vec![
4419 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4420 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4421 ("CAT-IT".to_string(), "IT Equipment".to_string()),
4422 ("CAT-SVC".to_string(), "Professional Services".to_string()),
4423 ("CAT-LOG".to_string(), "Logistics".to_string()),
4424 ];
4425 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4426 .iter()
4427 .map(|(id, name)| {
4428 (
4429 id.clone(),
4430 name.clone(),
4431 rust_decimal::Decimal::from(100_000),
4432 )
4433 })
4434 .collect();
4435
4436 let company_code = self
4437 .config
4438 .companies
4439 .first()
4440 .map(|c| c.code.as_str())
4441 .unwrap_or("1000");
4442 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4443 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4444 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4445 let fiscal_year = start_date.year() as u16;
4446 let owner_ids: Vec<String> = self
4447 .master_data
4448 .employees
4449 .iter()
4450 .take(5)
4451 .map(|e| e.employee_id.clone())
4452 .collect();
4453 let owner_id = owner_ids
4454 .first()
4455 .map(std::string::String::as_str)
4456 .unwrap_or("BUYER-001");
4457
4458 let mut spend_gen = SpendAnalysisGenerator::new(seed);
4460 let spend_analyses =
4461 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4462
4463 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4465 let sourcing_projects = if owner_ids.is_empty() {
4466 Vec::new()
4467 } else {
4468 project_gen.generate(
4469 company_code,
4470 &categories_with_spend,
4471 &owner_ids,
4472 start_date,
4473 self.config.global.period_months,
4474 )
4475 };
4476 stats.sourcing_project_count = sourcing_projects.len();
4477
4478 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4480 let mut qual_gen = QualificationGenerator::new(seed + 2);
4481 let qualifications = qual_gen.generate(
4482 company_code,
4483 &qual_vendor_ids,
4484 sourcing_projects.first().map(|p| p.project_id.as_str()),
4485 owner_id,
4486 start_date,
4487 );
4488
4489 let mut rfx_gen = RfxGenerator::new(seed + 3);
4491 let rfx_events: Vec<RfxEvent> = sourcing_projects
4492 .iter()
4493 .map(|proj| {
4494 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4495 rfx_gen.generate(
4496 company_code,
4497 &proj.project_id,
4498 &proj.category_id,
4499 &qualified_vids,
4500 owner_id,
4501 start_date,
4502 50000.0,
4503 )
4504 })
4505 .collect();
4506 stats.rfx_event_count = rfx_events.len();
4507
4508 let mut bid_gen = BidGenerator::new(seed + 4);
4510 let mut all_bids = Vec::new();
4511 for rfx in &rfx_events {
4512 let bidder_count = vendor_ids.len().clamp(2, 5);
4513 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4514 let bids = bid_gen.generate(rfx, &responding, start_date);
4515 all_bids.extend(bids);
4516 }
4517 stats.bid_count = all_bids.len();
4518
4519 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4521 let bid_evaluations: Vec<BidEvaluation> = rfx_events
4522 .iter()
4523 .map(|rfx| {
4524 let rfx_bids: Vec<SupplierBid> = all_bids
4525 .iter()
4526 .filter(|b| b.rfx_id == rfx.rfx_id)
4527 .cloned()
4528 .collect();
4529 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4530 })
4531 .collect();
4532
4533 let mut contract_gen = ContractGenerator::new(seed + 6);
4535 let contracts: Vec<ProcurementContract> = bid_evaluations
4536 .iter()
4537 .zip(rfx_events.iter())
4538 .filter_map(|(eval, rfx)| {
4539 eval.ranked_bids.first().and_then(|winner| {
4540 all_bids
4541 .iter()
4542 .find(|b| b.bid_id == winner.bid_id)
4543 .map(|winning_bid| {
4544 contract_gen.generate_from_bid(
4545 winning_bid,
4546 Some(&rfx.sourcing_project_id),
4547 &rfx.category_id,
4548 owner_id,
4549 start_date,
4550 )
4551 })
4552 })
4553 })
4554 .collect();
4555 stats.contract_count = contracts.len();
4556
4557 let mut catalog_gen = CatalogGenerator::new(seed + 7);
4559 let catalog_items = catalog_gen.generate(&contracts);
4560 stats.catalog_item_count = catalog_items.len();
4561
4562 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4564 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4565 .iter()
4566 .fold(
4567 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4568 |mut acc, c| {
4569 acc.entry(c.vendor_id.clone()).or_default().push(c);
4570 acc
4571 },
4572 )
4573 .into_iter()
4574 .collect();
4575 let scorecards = scorecard_gen.generate(
4576 company_code,
4577 &vendor_contracts,
4578 start_date,
4579 end_date,
4580 owner_id,
4581 );
4582 stats.scorecard_count = scorecards.len();
4583
4584 let mut sourcing_projects = sourcing_projects;
4587 for project in &mut sourcing_projects {
4588 project.rfx_ids = rfx_events
4590 .iter()
4591 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4592 .map(|rfx| rfx.rfx_id.clone())
4593 .collect();
4594
4595 project.contract_id = contracts
4597 .iter()
4598 .find(|c| {
4599 c.sourcing_project_id
4600 .as_deref()
4601 .is_some_and(|sp| sp == project.project_id)
4602 })
4603 .map(|c| c.contract_id.clone());
4604
4605 project.spend_analysis_id = spend_analyses
4607 .iter()
4608 .find(|sa| sa.category_id == project.category_id)
4609 .map(|sa| sa.category_id.clone());
4610 }
4611
4612 info!(
4613 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4614 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4615 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4616 );
4617 self.check_resources_with_log("post-sourcing")?;
4618
4619 Ok(SourcingSnapshot {
4620 spend_analyses,
4621 sourcing_projects,
4622 qualifications,
4623 rfx_events,
4624 bids: all_bids,
4625 bid_evaluations,
4626 contracts,
4627 catalog_items,
4628 scorecards,
4629 })
4630 }
4631
4632 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4638 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4639
4640 let parent_code = self
4641 .config
4642 .companies
4643 .first()
4644 .map(|c| c.code.clone())
4645 .unwrap_or_else(|| "PARENT".to_string());
4646
4647 let mut group = GroupStructure::new(parent_code);
4648
4649 for company in self.config.companies.iter().skip(1) {
4650 let sub =
4651 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4652 group.add_subsidiary(sub);
4653 }
4654
4655 group
4656 }
4657
4658 fn phase_intercompany(
4660 &mut self,
4661 journal_entries: &[JournalEntry],
4662 stats: &mut EnhancedGenerationStatistics,
4663 ) -> SynthResult<IntercompanySnapshot> {
4664 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4666 debug!("Phase 14b: Skipped (intercompany generation disabled)");
4667 return Ok(IntercompanySnapshot::default());
4668 }
4669
4670 if self.config.companies.len() < 2 {
4672 debug!(
4673 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4674 self.config.companies.len()
4675 );
4676 return Ok(IntercompanySnapshot::default());
4677 }
4678
4679 info!("Phase 14b: Generating Intercompany Transactions");
4680
4681 let group_structure = self.build_group_structure();
4684 debug!(
4685 "Group structure built: parent={}, subsidiaries={}",
4686 group_structure.parent_entity,
4687 group_structure.subsidiaries.len()
4688 );
4689
4690 let seed = self.seed;
4691 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4692 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4693 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4694
4695 let parent_code = self.config.companies[0].code.clone();
4698 let mut ownership_structure =
4699 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4700
4701 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4702 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4703 format!("REL{:03}", i + 1),
4704 parent_code.clone(),
4705 company.code.clone(),
4706 rust_decimal::Decimal::from(100), start_date,
4708 );
4709 ownership_structure.add_relationship(relationship);
4710 }
4711
4712 let tp_method = match self.config.intercompany.transfer_pricing_method {
4714 datasynth_config::schema::TransferPricingMethod::CostPlus => {
4715 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4716 }
4717 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4718 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4719 }
4720 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4721 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4722 }
4723 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4724 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4725 }
4726 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4727 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4728 }
4729 };
4730
4731 let ic_currency = self
4733 .config
4734 .companies
4735 .first()
4736 .map(|c| c.currency.clone())
4737 .unwrap_or_else(|| "USD".to_string());
4738 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4739 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4740 transfer_pricing_method: tp_method,
4741 markup_percent: rust_decimal::Decimal::from_f64_retain(
4742 self.config.intercompany.markup_percent,
4743 )
4744 .unwrap_or(rust_decimal::Decimal::from(5)),
4745 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4746 default_currency: ic_currency,
4747 ..Default::default()
4748 };
4749
4750 let mut ic_generator = datasynth_generators::ICGenerator::new(
4752 ic_gen_config,
4753 ownership_structure.clone(),
4754 seed + 50,
4755 );
4756
4757 let transactions_per_day = 3;
4760 let matched_pairs = ic_generator.generate_transactions_for_period(
4761 start_date,
4762 end_date,
4763 transactions_per_day,
4764 );
4765
4766 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4768 debug!(
4769 "Generated {} IC seller invoices, {} IC buyer POs",
4770 ic_doc_chains.seller_invoices.len(),
4771 ic_doc_chains.buyer_orders.len()
4772 );
4773
4774 let mut seller_entries = Vec::new();
4776 let mut buyer_entries = Vec::new();
4777 let fiscal_year = start_date.year();
4778
4779 for pair in &matched_pairs {
4780 let fiscal_period = pair.posting_date.month();
4781 let (seller_je, buyer_je) =
4782 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4783 seller_entries.push(seller_je);
4784 buyer_entries.push(buyer_je);
4785 }
4786
4787 let matching_config = datasynth_generators::ICMatchingConfig {
4789 base_currency: self
4790 .config
4791 .companies
4792 .first()
4793 .map(|c| c.currency.clone())
4794 .unwrap_or_else(|| "USD".to_string()),
4795 ..Default::default()
4796 };
4797 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4798 matching_engine.load_matched_pairs(&matched_pairs);
4799 let matching_result = matching_engine.run_matching(end_date);
4800
4801 let mut elimination_entries = Vec::new();
4803 if self.config.intercompany.generate_eliminations {
4804 let elim_config = datasynth_generators::EliminationConfig {
4805 consolidation_entity: "GROUP".to_string(),
4806 base_currency: self
4807 .config
4808 .companies
4809 .first()
4810 .map(|c| c.currency.clone())
4811 .unwrap_or_else(|| "USD".to_string()),
4812 ..Default::default()
4813 };
4814
4815 let mut elim_generator =
4816 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4817
4818 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4819 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4820 matching_result
4821 .matched_balances
4822 .iter()
4823 .chain(matching_result.unmatched_balances.iter())
4824 .cloned()
4825 .collect();
4826
4827 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4839 std::collections::HashMap::new();
4840 let mut equity_amounts: std::collections::HashMap<
4841 String,
4842 std::collections::HashMap<String, rust_decimal::Decimal>,
4843 > = std::collections::HashMap::new();
4844 {
4845 use rust_decimal::Decimal;
4846 let hundred = Decimal::from(100u32);
4847 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
4851 for sub in &group_structure.subsidiaries {
4852 let net_assets = {
4853 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4854 if na > Decimal::ZERO {
4855 na
4856 } else {
4857 Decimal::from(1_000_000u64)
4858 }
4859 };
4860 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4862 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4863
4864 let mut eq_map = std::collections::HashMap::new();
4867 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4868 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4869 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4870 equity_amounts.insert(sub.entity_code.clone(), eq_map);
4871 }
4872 }
4873
4874 let journal = elim_generator.generate_eliminations(
4875 &fiscal_period,
4876 end_date,
4877 &all_balances,
4878 &matched_pairs,
4879 &investment_amounts,
4880 &equity_amounts,
4881 );
4882
4883 elimination_entries = journal.entries.clone();
4884 }
4885
4886 let matched_pair_count = matched_pairs.len();
4887 let elimination_entry_count = elimination_entries.len();
4888 let match_rate = matching_result.match_rate;
4889
4890 stats.ic_matched_pair_count = matched_pair_count;
4891 stats.ic_elimination_count = elimination_entry_count;
4892 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4893
4894 info!(
4895 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4896 matched_pair_count,
4897 stats.ic_transaction_count,
4898 seller_entries.len(),
4899 buyer_entries.len(),
4900 elimination_entry_count,
4901 match_rate * 100.0
4902 );
4903 self.check_resources_with_log("post-intercompany")?;
4904
4905 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4909 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4910 use rust_decimal::Decimal;
4911
4912 let eight_pct = Decimal::new(8, 2); group_structure
4915 .subsidiaries
4916 .iter()
4917 .filter(|sub| {
4918 sub.nci_percentage > Decimal::ZERO
4919 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4920 })
4921 .map(|sub| {
4922 let net_assets_from_jes =
4926 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4927
4928 let net_assets = if net_assets_from_jes > Decimal::ZERO {
4929 net_assets_from_jes.round_dp(2)
4930 } else {
4931 Decimal::from(1_000_000u64)
4933 };
4934
4935 let net_income = (net_assets * eight_pct).round_dp(2);
4937
4938 NciMeasurement::compute(
4939 sub.entity_code.clone(),
4940 sub.nci_percentage,
4941 net_assets,
4942 net_income,
4943 )
4944 })
4945 .collect()
4946 };
4947
4948 if !nci_measurements.is_empty() {
4949 info!(
4950 "NCI measurements: {} subsidiaries with non-controlling interests",
4951 nci_measurements.len()
4952 );
4953 }
4954
4955 Ok(IntercompanySnapshot {
4956 group_structure: Some(group_structure),
4957 matched_pairs,
4958 seller_journal_entries: seller_entries,
4959 buyer_journal_entries: buyer_entries,
4960 elimination_entries,
4961 nci_measurements,
4962 ic_document_chains: Some(ic_doc_chains),
4963 matched_pair_count,
4964 elimination_entry_count,
4965 match_rate,
4966 })
4967 }
4968
4969 fn phase_financial_reporting(
4971 &mut self,
4972 document_flows: &DocumentFlowSnapshot,
4973 journal_entries: &[JournalEntry],
4974 coa: &Arc<ChartOfAccounts>,
4975 _hr: &HrSnapshot,
4976 _audit: &AuditSnapshot,
4977 stats: &mut EnhancedGenerationStatistics,
4978 ) -> SynthResult<FinancialReportingSnapshot> {
4979 let fs_enabled = self.phase_config.generate_financial_statements
4980 || self.config.financial_reporting.enabled;
4981 let br_enabled = self.phase_config.generate_bank_reconciliation;
4982
4983 if !fs_enabled && !br_enabled {
4984 debug!("Phase 15: Skipped (financial reporting disabled)");
4985 return Ok(FinancialReportingSnapshot::default());
4986 }
4987
4988 info!("Phase 15: Generating Financial Reporting Data");
4989
4990 let seed = self.seed;
4991 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4992 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4993
4994 let mut financial_statements = Vec::new();
4995 let mut bank_reconciliations = Vec::new();
4996 let mut trial_balances = Vec::new();
4997 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4998 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4999 Vec::new();
5000 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5002 std::collections::HashMap::new();
5003 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5005 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5007
5008 if fs_enabled {
5016 let has_journal_entries = !journal_entries.is_empty();
5017
5018 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5021 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5023
5024 let elimination_entries: Vec<&JournalEntry> = journal_entries
5026 .iter()
5027 .filter(|je| je.header.is_elimination)
5028 .collect();
5029
5030 for period in 0..self.config.global.period_months {
5032 let period_start = start_date + chrono::Months::new(period);
5033 let period_end =
5034 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5035 let fiscal_year = period_end.year() as u16;
5036 let fiscal_period = period_end.month() as u8;
5037 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5038
5039 let mut entity_tb_map: std::collections::HashMap<
5042 String,
5043 std::collections::HashMap<String, rust_decimal::Decimal>,
5044 > = std::collections::HashMap::new();
5045
5046 for (company_idx, company) in self.config.companies.iter().enumerate() {
5048 let company_code = company.code.as_str();
5049 let currency = company.currency.as_str();
5050 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5053 let mut company_fs_gen =
5054 FinancialStatementGenerator::new(seed + company_seed_offset);
5055
5056 if has_journal_entries {
5057 let tb_entries = Self::build_cumulative_trial_balance(
5058 journal_entries,
5059 coa,
5060 company_code,
5061 start_date,
5062 period_end,
5063 fiscal_year,
5064 fiscal_period,
5065 );
5066
5067 let entity_cat_map =
5069 entity_tb_map.entry(company_code.to_string()).or_default();
5070 for tb_entry in &tb_entries {
5071 let net = tb_entry.debit_balance - tb_entry.credit_balance;
5072 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5073 }
5074
5075 let stmts = company_fs_gen.generate(
5076 company_code,
5077 currency,
5078 &tb_entries,
5079 period_start,
5080 period_end,
5081 fiscal_year,
5082 fiscal_period,
5083 None,
5084 "SYS-AUTOCLOSE",
5085 );
5086
5087 let mut entity_stmts = Vec::new();
5088 for stmt in stmts {
5089 if stmt.statement_type == StatementType::CashFlowStatement {
5090 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5091 let cf_items = Self::build_cash_flow_from_trial_balances(
5092 &tb_entries,
5093 None,
5094 net_income,
5095 );
5096 entity_stmts.push(FinancialStatement {
5097 cash_flow_items: cf_items,
5098 ..stmt
5099 });
5100 } else {
5101 entity_stmts.push(stmt);
5102 }
5103 }
5104
5105 financial_statements.extend(entity_stmts.clone());
5107
5108 standalone_statements
5110 .entry(company_code.to_string())
5111 .or_default()
5112 .extend(entity_stmts);
5113
5114 if company_idx == 0 {
5117 trial_balances.push(PeriodTrialBalance {
5118 fiscal_year,
5119 fiscal_period,
5120 period_start,
5121 period_end,
5122 entries: tb_entries,
5123 });
5124 }
5125 } else {
5126 let tb_entries = Self::build_trial_balance_from_entries(
5128 journal_entries,
5129 coa,
5130 company_code,
5131 fiscal_year,
5132 fiscal_period,
5133 );
5134
5135 let stmts = company_fs_gen.generate(
5136 company_code,
5137 currency,
5138 &tb_entries,
5139 period_start,
5140 period_end,
5141 fiscal_year,
5142 fiscal_period,
5143 None,
5144 "SYS-AUTOCLOSE",
5145 );
5146 financial_statements.extend(stmts.clone());
5147 standalone_statements
5148 .entry(company_code.to_string())
5149 .or_default()
5150 .extend(stmts);
5151
5152 if company_idx == 0 && !tb_entries.is_empty() {
5153 trial_balances.push(PeriodTrialBalance {
5154 fiscal_year,
5155 fiscal_period,
5156 period_start,
5157 period_end,
5158 entries: tb_entries,
5159 });
5160 }
5161 }
5162 }
5163
5164 let group_currency = self
5167 .config
5168 .companies
5169 .first()
5170 .map(|c| c.currency.as_str())
5171 .unwrap_or("USD");
5172
5173 let period_eliminations: Vec<JournalEntry> = elimination_entries
5175 .iter()
5176 .filter(|je| {
5177 je.header.fiscal_year == fiscal_year
5178 && je.header.fiscal_period == fiscal_period
5179 })
5180 .map(|je| (*je).clone())
5181 .collect();
5182
5183 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5184 &entity_tb_map,
5185 &period_eliminations,
5186 &period_label,
5187 );
5188
5189 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5192 .line_items
5193 .iter()
5194 .map(|li| {
5195 let net = li.post_elimination_total;
5196 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5197 (net, rust_decimal::Decimal::ZERO)
5198 } else {
5199 (rust_decimal::Decimal::ZERO, -net)
5200 };
5201 datasynth_generators::TrialBalanceEntry {
5202 account_code: li.account_category.clone(),
5203 account_name: li.account_category.clone(),
5204 category: li.account_category.clone(),
5205 debit_balance: debit,
5206 credit_balance: credit,
5207 }
5208 })
5209 .collect();
5210
5211 let mut cons_stmts = cons_gen.generate(
5212 "GROUP",
5213 group_currency,
5214 &cons_tb,
5215 period_start,
5216 period_end,
5217 fiscal_year,
5218 fiscal_period,
5219 None,
5220 "SYS-AUTOCLOSE",
5221 );
5222
5223 let bs_categories: &[&str] = &[
5227 "CASH",
5228 "RECEIVABLES",
5229 "INVENTORY",
5230 "FIXEDASSETS",
5231 "PAYABLES",
5232 "ACCRUEDLIABILITIES",
5233 "LONGTERMDEBT",
5234 "EQUITY",
5235 ];
5236 let (bs_items, is_items): (Vec<_>, Vec<_>) =
5237 cons_line_items.into_iter().partition(|li| {
5238 let upper = li.label.to_uppercase();
5239 bs_categories.iter().any(|c| upper == *c)
5240 });
5241
5242 for stmt in &mut cons_stmts {
5243 stmt.is_consolidated = true;
5244 match stmt.statement_type {
5245 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5246 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5247 _ => {} }
5249 }
5250
5251 consolidated_statements.extend(cons_stmts);
5252 consolidation_schedules.push(schedule);
5253 }
5254
5255 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
5261 info!(
5262 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5263 stats.financial_statement_count,
5264 consolidated_statements.len(),
5265 has_journal_entries
5266 );
5267
5268 let entity_seeds: Vec<SegmentSeed> = self
5273 .config
5274 .companies
5275 .iter()
5276 .map(|c| SegmentSeed {
5277 code: c.code.clone(),
5278 name: c.name.clone(),
5279 currency: c.currency.clone(),
5280 })
5281 .collect();
5282
5283 let mut seg_gen = SegmentGenerator::new(seed + 30);
5284
5285 for period in 0..self.config.global.period_months {
5290 let period_end =
5291 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5292 let fiscal_year = period_end.year() as u16;
5293 let fiscal_period = period_end.month() as u8;
5294 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5295
5296 use datasynth_core::models::StatementType;
5297
5298 let cons_is = consolidated_statements.iter().find(|s| {
5300 s.fiscal_year == fiscal_year
5301 && s.fiscal_period == fiscal_period
5302 && s.statement_type == StatementType::IncomeStatement
5303 });
5304 let cons_bs = consolidated_statements.iter().find(|s| {
5305 s.fiscal_year == fiscal_year
5306 && s.fiscal_period == fiscal_period
5307 && s.statement_type == StatementType::BalanceSheet
5308 });
5309
5310 let is_stmt = cons_is.or_else(|| {
5312 financial_statements.iter().find(|s| {
5313 s.fiscal_year == fiscal_year
5314 && s.fiscal_period == fiscal_period
5315 && s.statement_type == StatementType::IncomeStatement
5316 })
5317 });
5318 let bs_stmt = cons_bs.or_else(|| {
5319 financial_statements.iter().find(|s| {
5320 s.fiscal_year == fiscal_year
5321 && s.fiscal_period == fiscal_period
5322 && s.statement_type == StatementType::BalanceSheet
5323 })
5324 });
5325
5326 let consolidated_revenue = is_stmt
5327 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5328 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
5330
5331 let consolidated_profit = is_stmt
5332 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5333 .map(|li| li.amount)
5334 .unwrap_or(rust_decimal::Decimal::ZERO);
5335
5336 let consolidated_assets = bs_stmt
5337 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5338 .map(|li| li.amount)
5339 .unwrap_or(rust_decimal::Decimal::ZERO);
5340
5341 if consolidated_revenue == rust_decimal::Decimal::ZERO
5343 && consolidated_assets == rust_decimal::Decimal::ZERO
5344 {
5345 continue;
5346 }
5347
5348 let group_code = self
5349 .config
5350 .companies
5351 .first()
5352 .map(|c| c.code.as_str())
5353 .unwrap_or("GROUP");
5354
5355 let total_depr: rust_decimal::Decimal = journal_entries
5358 .iter()
5359 .filter(|je| je.header.document_type == "CL")
5360 .flat_map(|je| je.lines.iter())
5361 .filter(|l| l.gl_account.starts_with("6000"))
5362 .map(|l| l.debit_amount)
5363 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5364 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5365 Some(total_depr)
5366 } else {
5367 None
5368 };
5369
5370 let (segs, recon) = seg_gen.generate(
5371 group_code,
5372 &period_label,
5373 consolidated_revenue,
5374 consolidated_profit,
5375 consolidated_assets,
5376 &entity_seeds,
5377 depr_param,
5378 );
5379 segment_reports.extend(segs);
5380 segment_reconciliations.push(recon);
5381 }
5382
5383 info!(
5384 "Segment reports generated: {} segments, {} reconciliations",
5385 segment_reports.len(),
5386 segment_reconciliations.len()
5387 );
5388 }
5389
5390 if br_enabled && !document_flows.payments.is_empty() {
5392 let employee_ids: Vec<String> = self
5393 .master_data
5394 .employees
5395 .iter()
5396 .map(|e| e.employee_id.clone())
5397 .collect();
5398 let mut br_gen =
5399 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5400
5401 for company in &self.config.companies {
5403 let company_payments: Vec<PaymentReference> = document_flows
5404 .payments
5405 .iter()
5406 .filter(|p| p.header.company_code == company.code)
5407 .map(|p| PaymentReference {
5408 id: p.header.document_id.clone(),
5409 amount: if p.is_vendor { p.amount } else { -p.amount },
5410 date: p.header.document_date,
5411 reference: p
5412 .check_number
5413 .clone()
5414 .or_else(|| p.wire_reference.clone())
5415 .unwrap_or_else(|| p.header.document_id.clone()),
5416 })
5417 .collect();
5418
5419 if company_payments.is_empty() {
5420 continue;
5421 }
5422
5423 let bank_account_id = format!("{}-MAIN", company.code);
5424
5425 for period in 0..self.config.global.period_months {
5427 let period_start = start_date + chrono::Months::new(period);
5428 let period_end =
5429 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5430
5431 let period_payments: Vec<PaymentReference> = company_payments
5432 .iter()
5433 .filter(|p| p.date >= period_start && p.date <= period_end)
5434 .cloned()
5435 .collect();
5436
5437 let recon = br_gen.generate(
5438 &company.code,
5439 &bank_account_id,
5440 period_start,
5441 period_end,
5442 &company.currency,
5443 &period_payments,
5444 );
5445 bank_reconciliations.push(recon);
5446 }
5447 }
5448 info!(
5449 "Bank reconciliations generated: {} reconciliations",
5450 bank_reconciliations.len()
5451 );
5452 }
5453
5454 stats.bank_reconciliation_count = bank_reconciliations.len();
5455 self.check_resources_with_log("post-financial-reporting")?;
5456
5457 if !trial_balances.is_empty() {
5458 info!(
5459 "Period-close trial balances captured: {} periods",
5460 trial_balances.len()
5461 );
5462 }
5463
5464 let notes_to_financial_statements = Vec::new();
5468
5469 Ok(FinancialReportingSnapshot {
5470 financial_statements,
5471 standalone_statements,
5472 consolidated_statements,
5473 consolidation_schedules,
5474 bank_reconciliations,
5475 trial_balances,
5476 segment_reports,
5477 segment_reconciliations,
5478 notes_to_financial_statements,
5479 })
5480 }
5481
5482 fn generate_notes_to_financial_statements(
5489 &self,
5490 financial_reporting: &mut FinancialReportingSnapshot,
5491 accounting_standards: &AccountingStandardsSnapshot,
5492 tax: &TaxSnapshot,
5493 hr: &HrSnapshot,
5494 audit: &AuditSnapshot,
5495 treasury: &TreasurySnapshot,
5496 ) {
5497 use datasynth_config::schema::AccountingFrameworkConfig;
5498 use datasynth_core::models::StatementType;
5499 use datasynth_generators::period_close::notes_generator::{
5500 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5501 };
5502
5503 let seed = self.seed;
5504 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5505 {
5506 Ok(d) => d,
5507 Err(_) => return,
5508 };
5509
5510 let mut notes_gen = NotesGenerator::new(seed + 4235);
5511
5512 for company in &self.config.companies {
5513 let last_period_end = start_date
5514 + chrono::Months::new(self.config.global.period_months)
5515 - chrono::Days::new(1);
5516 let fiscal_year = last_period_end.year() as u16;
5517
5518 let entity_is = financial_reporting
5520 .standalone_statements
5521 .get(&company.code)
5522 .and_then(|stmts| {
5523 stmts.iter().find(|s| {
5524 s.fiscal_year == fiscal_year
5525 && s.statement_type == StatementType::IncomeStatement
5526 })
5527 });
5528 let entity_bs = financial_reporting
5529 .standalone_statements
5530 .get(&company.code)
5531 .and_then(|stmts| {
5532 stmts.iter().find(|s| {
5533 s.fiscal_year == fiscal_year
5534 && s.statement_type == StatementType::BalanceSheet
5535 })
5536 });
5537
5538 let revenue_amount = entity_is
5540 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5541 .map(|li| li.amount);
5542 let ppe_gross = entity_bs
5543 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5544 .map(|li| li.amount);
5545
5546 let framework = match self
5547 .config
5548 .accounting_standards
5549 .framework
5550 .unwrap_or_default()
5551 {
5552 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5553 "IFRS".to_string()
5554 }
5555 _ => "US GAAP".to_string(),
5556 };
5557
5558 let (entity_dta, entity_dtl) = {
5561 let mut dta = rust_decimal::Decimal::ZERO;
5562 let mut dtl = rust_decimal::Decimal::ZERO;
5563 for rf in &tax.deferred_tax.rollforwards {
5564 if rf.entity_code == company.code {
5565 dta += rf.closing_dta;
5566 dtl += rf.closing_dtl;
5567 }
5568 }
5569 (
5570 if dta > rust_decimal::Decimal::ZERO {
5571 Some(dta)
5572 } else {
5573 None
5574 },
5575 if dtl > rust_decimal::Decimal::ZERO {
5576 Some(dtl)
5577 } else {
5578 None
5579 },
5580 )
5581 };
5582
5583 let entity_provisions: Vec<_> = accounting_standards
5586 .provisions
5587 .iter()
5588 .filter(|p| p.entity_code == company.code)
5589 .collect();
5590 let provision_count = entity_provisions.len();
5591 let total_provisions = if provision_count > 0 {
5592 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5593 } else {
5594 None
5595 };
5596
5597 let entity_pension_plan_count = hr
5599 .pension_plans
5600 .iter()
5601 .filter(|p| p.entity_code == company.code)
5602 .count();
5603 let entity_total_dbo: Option<rust_decimal::Decimal> = {
5604 let sum: rust_decimal::Decimal = hr
5605 .pension_disclosures
5606 .iter()
5607 .filter(|d| {
5608 hr.pension_plans
5609 .iter()
5610 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5611 })
5612 .map(|d| d.net_pension_liability)
5613 .sum();
5614 let plan_assets_sum: rust_decimal::Decimal = hr
5615 .pension_plan_assets
5616 .iter()
5617 .filter(|a| {
5618 hr.pension_plans
5619 .iter()
5620 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5621 })
5622 .map(|a| a.fair_value_closing)
5623 .sum();
5624 if entity_pension_plan_count > 0 {
5625 Some(sum + plan_assets_sum)
5626 } else {
5627 None
5628 }
5629 };
5630 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5631 let sum: rust_decimal::Decimal = hr
5632 .pension_plan_assets
5633 .iter()
5634 .filter(|a| {
5635 hr.pension_plans
5636 .iter()
5637 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5638 })
5639 .map(|a| a.fair_value_closing)
5640 .sum();
5641 if entity_pension_plan_count > 0 {
5642 Some(sum)
5643 } else {
5644 None
5645 }
5646 };
5647
5648 let rp_count = audit.related_party_transactions.len();
5651 let se_count = audit.subsequent_events.len();
5652 let adjusting_count = audit
5653 .subsequent_events
5654 .iter()
5655 .filter(|e| {
5656 matches!(
5657 e.classification,
5658 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5659 )
5660 })
5661 .count();
5662
5663 let ctx = NotesGeneratorContext {
5664 entity_code: company.code.clone(),
5665 framework,
5666 period: format!("FY{}", fiscal_year),
5667 period_end: last_period_end,
5668 currency: company.currency.clone(),
5669 revenue_amount,
5670 total_ppe_gross: ppe_gross,
5671 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5672 deferred_tax_asset: entity_dta,
5674 deferred_tax_liability: entity_dtl,
5675 provision_count,
5677 total_provisions,
5678 pension_plan_count: entity_pension_plan_count,
5680 total_dbo: entity_total_dbo,
5681 total_plan_assets: entity_total_plan_assets,
5682 related_party_transaction_count: rp_count,
5684 subsequent_event_count: se_count,
5685 adjusting_event_count: adjusting_count,
5686 ..NotesGeneratorContext::default()
5687 };
5688
5689 let entity_notes = notes_gen.generate(&ctx);
5690 let standard_note_count = entity_notes.len() as u32;
5691 info!(
5692 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5693 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5694 );
5695 financial_reporting
5696 .notes_to_financial_statements
5697 .extend(entity_notes);
5698
5699 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5701 .debt_instruments
5702 .iter()
5703 .filter(|d| d.entity_id == company.code)
5704 .map(|d| {
5705 (
5706 format!("{:?}", d.instrument_type),
5707 d.principal,
5708 d.maturity_date.to_string(),
5709 )
5710 })
5711 .collect();
5712
5713 let hedge_count = treasury.hedge_relationships.len();
5714 let effective_hedges = treasury
5715 .hedge_relationships
5716 .iter()
5717 .filter(|h| h.is_effective)
5718 .count();
5719 let total_notional: rust_decimal::Decimal = treasury
5720 .hedging_instruments
5721 .iter()
5722 .map(|h| h.notional_amount)
5723 .sum();
5724 let total_fair_value: rust_decimal::Decimal = treasury
5725 .hedging_instruments
5726 .iter()
5727 .map(|h| h.fair_value)
5728 .sum();
5729
5730 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5732 .provisions
5733 .iter()
5734 .filter(|p| p.entity_code == company.code)
5735 .map(|p| p.id.as_str())
5736 .collect();
5737 let provision_movements: Vec<(
5738 String,
5739 rust_decimal::Decimal,
5740 rust_decimal::Decimal,
5741 rust_decimal::Decimal,
5742 )> = accounting_standards
5743 .provision_movements
5744 .iter()
5745 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5746 .map(|m| {
5747 let prov_type = accounting_standards
5748 .provisions
5749 .iter()
5750 .find(|p| p.id == m.provision_id)
5751 .map(|p| format!("{:?}", p.provision_type))
5752 .unwrap_or_else(|| "Unknown".to_string());
5753 (prov_type, m.opening, m.additions, m.closing)
5754 })
5755 .collect();
5756
5757 let enhanced_ctx = EnhancedNotesContext {
5758 entity_code: company.code.clone(),
5759 period: format!("FY{}", fiscal_year),
5760 currency: company.currency.clone(),
5761 finished_goods_value: rust_decimal::Decimal::ZERO,
5763 wip_value: rust_decimal::Decimal::ZERO,
5764 raw_materials_value: rust_decimal::Decimal::ZERO,
5765 debt_instruments,
5766 hedge_count,
5767 effective_hedges,
5768 total_notional,
5769 total_fair_value,
5770 provision_movements,
5771 };
5772
5773 let enhanced_notes =
5774 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5775 if !enhanced_notes.is_empty() {
5776 info!(
5777 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5778 company.code,
5779 enhanced_notes.len(),
5780 enhanced_ctx.debt_instruments.len(),
5781 hedge_count,
5782 enhanced_ctx.provision_movements.len(),
5783 );
5784 financial_reporting
5785 .notes_to_financial_statements
5786 .extend(enhanced_notes);
5787 }
5788 }
5789 }
5790
5791 fn build_trial_balance_from_entries(
5797 journal_entries: &[JournalEntry],
5798 coa: &ChartOfAccounts,
5799 company_code: &str,
5800 fiscal_year: u16,
5801 fiscal_period: u8,
5802 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5803 use rust_decimal::Decimal;
5804
5805 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5807 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5808
5809 for je in journal_entries {
5810 if je.header.company_code != company_code
5812 || je.header.fiscal_year != fiscal_year
5813 || je.header.fiscal_period != fiscal_period
5814 {
5815 continue;
5816 }
5817
5818 for line in &je.lines {
5819 let acct = &line.gl_account;
5820 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5821 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5822 }
5823 }
5824
5825 let mut all_accounts: Vec<&String> = account_debits
5827 .keys()
5828 .chain(account_credits.keys())
5829 .collect::<std::collections::HashSet<_>>()
5830 .into_iter()
5831 .collect();
5832 all_accounts.sort();
5833
5834 let mut entries = Vec::new();
5835
5836 for acct_number in all_accounts {
5837 let debit = account_debits
5838 .get(acct_number)
5839 .copied()
5840 .unwrap_or(Decimal::ZERO);
5841 let credit = account_credits
5842 .get(acct_number)
5843 .copied()
5844 .unwrap_or(Decimal::ZERO);
5845
5846 if debit.is_zero() && credit.is_zero() {
5847 continue;
5848 }
5849
5850 let account_name = coa
5852 .get_account(acct_number)
5853 .map(|gl| gl.short_description.clone())
5854 .unwrap_or_else(|| format!("Account {acct_number}"));
5855
5856 let category = Self::category_from_account_code(acct_number);
5861
5862 entries.push(datasynth_generators::TrialBalanceEntry {
5863 account_code: acct_number.clone(),
5864 account_name,
5865 category,
5866 debit_balance: debit,
5867 credit_balance: credit,
5868 });
5869 }
5870
5871 entries
5872 }
5873
5874 fn build_cumulative_trial_balance(
5881 journal_entries: &[JournalEntry],
5882 coa: &ChartOfAccounts,
5883 company_code: &str,
5884 start_date: NaiveDate,
5885 period_end: NaiveDate,
5886 fiscal_year: u16,
5887 fiscal_period: u8,
5888 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5889 use rust_decimal::Decimal;
5890
5891 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5893 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5894
5895 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5897 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5898
5899 for je in journal_entries {
5900 if je.header.company_code != company_code {
5901 continue;
5902 }
5903
5904 for line in &je.lines {
5905 let acct = &line.gl_account;
5906 let category = Self::category_from_account_code(acct);
5907 let is_bs_account = matches!(
5908 category.as_str(),
5909 "Cash"
5910 | "Receivables"
5911 | "Inventory"
5912 | "FixedAssets"
5913 | "Payables"
5914 | "AccruedLiabilities"
5915 | "LongTermDebt"
5916 | "Equity"
5917 );
5918
5919 if is_bs_account {
5920 if je.header.document_date <= period_end
5922 && je.header.document_date >= start_date
5923 {
5924 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5925 line.debit_amount;
5926 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5927 line.credit_amount;
5928 }
5929 } else {
5930 if je.header.fiscal_year == fiscal_year
5932 && je.header.fiscal_period == fiscal_period
5933 {
5934 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5935 line.debit_amount;
5936 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5937 line.credit_amount;
5938 }
5939 }
5940 }
5941 }
5942
5943 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5945 all_accounts.extend(bs_debits.keys().cloned());
5946 all_accounts.extend(bs_credits.keys().cloned());
5947 all_accounts.extend(is_debits.keys().cloned());
5948 all_accounts.extend(is_credits.keys().cloned());
5949
5950 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5951 sorted_accounts.sort();
5952
5953 let mut entries = Vec::new();
5954
5955 for acct_number in &sorted_accounts {
5956 let category = Self::category_from_account_code(acct_number);
5957 let is_bs_account = matches!(
5958 category.as_str(),
5959 "Cash"
5960 | "Receivables"
5961 | "Inventory"
5962 | "FixedAssets"
5963 | "Payables"
5964 | "AccruedLiabilities"
5965 | "LongTermDebt"
5966 | "Equity"
5967 );
5968
5969 let (debit, credit) = if is_bs_account {
5970 (
5971 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5972 bs_credits
5973 .get(acct_number)
5974 .copied()
5975 .unwrap_or(Decimal::ZERO),
5976 )
5977 } else {
5978 (
5979 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5980 is_credits
5981 .get(acct_number)
5982 .copied()
5983 .unwrap_or(Decimal::ZERO),
5984 )
5985 };
5986
5987 if debit.is_zero() && credit.is_zero() {
5988 continue;
5989 }
5990
5991 let account_name = coa
5992 .get_account(acct_number)
5993 .map(|gl| gl.short_description.clone())
5994 .unwrap_or_else(|| format!("Account {acct_number}"));
5995
5996 entries.push(datasynth_generators::TrialBalanceEntry {
5997 account_code: acct_number.clone(),
5998 account_name,
5999 category,
6000 debit_balance: debit,
6001 credit_balance: credit,
6002 });
6003 }
6004
6005 entries
6006 }
6007
6008 fn build_cash_flow_from_trial_balances(
6013 current_tb: &[datasynth_generators::TrialBalanceEntry],
6014 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6015 net_income: rust_decimal::Decimal,
6016 ) -> Vec<CashFlowItem> {
6017 use rust_decimal::Decimal;
6018
6019 let aggregate =
6021 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6022 let mut map: HashMap<String, Decimal> = HashMap::new();
6023 for entry in tb {
6024 let net = entry.debit_balance - entry.credit_balance;
6025 *map.entry(entry.category.clone()).or_default() += net;
6026 }
6027 map
6028 };
6029
6030 let current = aggregate(current_tb);
6031 let prior = prior_tb.map(aggregate);
6032
6033 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6035 *map.get(key).unwrap_or(&Decimal::ZERO)
6036 };
6037
6038 let change = |key: &str| -> Decimal {
6040 let curr = get(¤t, key);
6041 match &prior {
6042 Some(p) => curr - get(p, key),
6043 None => curr,
6044 }
6045 };
6046
6047 let fixed_asset_change = change("FixedAssets");
6050 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6051 -fixed_asset_change
6052 } else {
6053 Decimal::ZERO
6054 };
6055
6056 let ar_change = change("Receivables");
6058 let inventory_change = change("Inventory");
6059 let ap_change = change("Payables");
6061 let accrued_change = change("AccruedLiabilities");
6062
6063 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6064 + (-ap_change)
6065 + (-accrued_change);
6066
6067 let capex = if fixed_asset_change > Decimal::ZERO {
6069 -fixed_asset_change
6070 } else {
6071 Decimal::ZERO
6072 };
6073 let investing_cf = capex;
6074
6075 let debt_change = -change("LongTermDebt");
6077 let equity_change = -change("Equity");
6078 let financing_cf = debt_change + equity_change;
6079
6080 let net_change = operating_cf + investing_cf + financing_cf;
6081
6082 vec![
6083 CashFlowItem {
6084 item_code: "CF-NI".to_string(),
6085 label: "Net Income".to_string(),
6086 category: CashFlowCategory::Operating,
6087 amount: net_income,
6088 amount_prior: None,
6089 sort_order: 1,
6090 is_total: false,
6091 },
6092 CashFlowItem {
6093 item_code: "CF-DEP".to_string(),
6094 label: "Depreciation & Amortization".to_string(),
6095 category: CashFlowCategory::Operating,
6096 amount: depreciation_addback,
6097 amount_prior: None,
6098 sort_order: 2,
6099 is_total: false,
6100 },
6101 CashFlowItem {
6102 item_code: "CF-AR".to_string(),
6103 label: "Change in Accounts Receivable".to_string(),
6104 category: CashFlowCategory::Operating,
6105 amount: -ar_change,
6106 amount_prior: None,
6107 sort_order: 3,
6108 is_total: false,
6109 },
6110 CashFlowItem {
6111 item_code: "CF-AP".to_string(),
6112 label: "Change in Accounts Payable".to_string(),
6113 category: CashFlowCategory::Operating,
6114 amount: -ap_change,
6115 amount_prior: None,
6116 sort_order: 4,
6117 is_total: false,
6118 },
6119 CashFlowItem {
6120 item_code: "CF-INV".to_string(),
6121 label: "Change in Inventory".to_string(),
6122 category: CashFlowCategory::Operating,
6123 amount: -inventory_change,
6124 amount_prior: None,
6125 sort_order: 5,
6126 is_total: false,
6127 },
6128 CashFlowItem {
6129 item_code: "CF-OP".to_string(),
6130 label: "Net Cash from Operating Activities".to_string(),
6131 category: CashFlowCategory::Operating,
6132 amount: operating_cf,
6133 amount_prior: None,
6134 sort_order: 6,
6135 is_total: true,
6136 },
6137 CashFlowItem {
6138 item_code: "CF-CAPEX".to_string(),
6139 label: "Capital Expenditures".to_string(),
6140 category: CashFlowCategory::Investing,
6141 amount: capex,
6142 amount_prior: None,
6143 sort_order: 7,
6144 is_total: false,
6145 },
6146 CashFlowItem {
6147 item_code: "CF-INV-T".to_string(),
6148 label: "Net Cash from Investing Activities".to_string(),
6149 category: CashFlowCategory::Investing,
6150 amount: investing_cf,
6151 amount_prior: None,
6152 sort_order: 8,
6153 is_total: true,
6154 },
6155 CashFlowItem {
6156 item_code: "CF-DEBT".to_string(),
6157 label: "Net Borrowings / (Repayments)".to_string(),
6158 category: CashFlowCategory::Financing,
6159 amount: debt_change,
6160 amount_prior: None,
6161 sort_order: 9,
6162 is_total: false,
6163 },
6164 CashFlowItem {
6165 item_code: "CF-EQ".to_string(),
6166 label: "Equity Changes".to_string(),
6167 category: CashFlowCategory::Financing,
6168 amount: equity_change,
6169 amount_prior: None,
6170 sort_order: 10,
6171 is_total: false,
6172 },
6173 CashFlowItem {
6174 item_code: "CF-FIN-T".to_string(),
6175 label: "Net Cash from Financing Activities".to_string(),
6176 category: CashFlowCategory::Financing,
6177 amount: financing_cf,
6178 amount_prior: None,
6179 sort_order: 11,
6180 is_total: true,
6181 },
6182 CashFlowItem {
6183 item_code: "CF-NET".to_string(),
6184 label: "Net Change in Cash".to_string(),
6185 category: CashFlowCategory::Operating,
6186 amount: net_change,
6187 amount_prior: None,
6188 sort_order: 12,
6189 is_total: true,
6190 },
6191 ]
6192 }
6193
6194 fn calculate_net_income_from_tb(
6198 tb: &[datasynth_generators::TrialBalanceEntry],
6199 ) -> rust_decimal::Decimal {
6200 use rust_decimal::Decimal;
6201
6202 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6203 for entry in tb {
6204 let net = entry.debit_balance - entry.credit_balance;
6205 *aggregated.entry(entry.category.clone()).or_default() += net;
6206 }
6207
6208 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6209 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6210 let opex = *aggregated
6211 .get("OperatingExpenses")
6212 .unwrap_or(&Decimal::ZERO);
6213 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6214 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6215
6216 let operating_income = revenue - cogs - opex - other_expenses - other_income;
6219 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
6221 operating_income - tax
6222 }
6223
6224 fn category_from_account_code(code: &str) -> String {
6231 let prefix: String = code.chars().take(2).collect();
6232 match prefix.as_str() {
6233 "10" => "Cash",
6234 "11" => "Receivables",
6235 "12" | "13" | "14" => "Inventory",
6236 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6237 "20" => "Payables",
6238 "21" | "22" | "23" | "24" => "AccruedLiabilities",
6239 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6240 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6241 "40" | "41" | "42" | "43" | "44" => "Revenue",
6242 "50" | "51" | "52" => "CostOfSales",
6243 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6244 "OperatingExpenses"
6245 }
6246 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6247 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6248 _ => "OperatingExpenses",
6249 }
6250 .to_string()
6251 }
6252
6253 fn phase_hr_data(
6255 &mut self,
6256 stats: &mut EnhancedGenerationStatistics,
6257 ) -> SynthResult<HrSnapshot> {
6258 if !self.phase_config.generate_hr {
6259 debug!("Phase 16: Skipped (HR generation disabled)");
6260 return Ok(HrSnapshot::default());
6261 }
6262
6263 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6264
6265 let seed = self.seed;
6266 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6267 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6268 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6269 let company_code = self
6270 .config
6271 .companies
6272 .first()
6273 .map(|c| c.code.as_str())
6274 .unwrap_or("1000");
6275 let currency = self
6276 .config
6277 .companies
6278 .first()
6279 .map(|c| c.currency.as_str())
6280 .unwrap_or("USD");
6281
6282 let employee_ids: Vec<String> = self
6283 .master_data
6284 .employees
6285 .iter()
6286 .map(|e| e.employee_id.clone())
6287 .collect();
6288
6289 if employee_ids.is_empty() {
6290 debug!("Phase 16: Skipped (no employees available)");
6291 return Ok(HrSnapshot::default());
6292 }
6293
6294 let cost_center_ids: Vec<String> = self
6297 .master_data
6298 .employees
6299 .iter()
6300 .filter_map(|e| e.cost_center.clone())
6301 .collect::<std::collections::HashSet<_>>()
6302 .into_iter()
6303 .collect();
6304
6305 let mut snapshot = HrSnapshot::default();
6306
6307 if self.config.hr.payroll.enabled {
6309 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6310 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6311
6312 let payroll_pack = self.primary_pack();
6314
6315 payroll_gen.set_country_pack(payroll_pack.clone());
6318
6319 let employees_with_salary: Vec<(
6320 String,
6321 rust_decimal::Decimal,
6322 Option<String>,
6323 Option<String>,
6324 )> = self
6325 .master_data
6326 .employees
6327 .iter()
6328 .map(|e| {
6329 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6332 e.base_salary
6333 } else {
6334 rust_decimal::Decimal::from(60_000)
6335 };
6336 (
6337 e.employee_id.clone(),
6338 annual, e.cost_center.clone(),
6340 e.department_id.clone(),
6341 )
6342 })
6343 .collect();
6344
6345 let change_history = &self.master_data.employee_change_history;
6348 let has_changes = !change_history.is_empty();
6349 if has_changes {
6350 debug!(
6351 "Payroll will incorporate {} employee change events",
6352 change_history.len()
6353 );
6354 }
6355
6356 for month in 0..self.config.global.period_months {
6357 let period_start = start_date + chrono::Months::new(month);
6358 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6359 let (run, items) = if has_changes {
6360 payroll_gen.generate_with_changes(
6361 company_code,
6362 &employees_with_salary,
6363 period_start,
6364 period_end,
6365 currency,
6366 change_history,
6367 )
6368 } else {
6369 payroll_gen.generate(
6370 company_code,
6371 &employees_with_salary,
6372 period_start,
6373 period_end,
6374 currency,
6375 )
6376 };
6377 snapshot.payroll_runs.push(run);
6378 snapshot.payroll_run_count += 1;
6379 snapshot.payroll_line_item_count += items.len();
6380 snapshot.payroll_line_items.extend(items);
6381 }
6382 }
6383
6384 if self.config.hr.time_attendance.enabled {
6386 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6387 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6388 let entries = time_gen.generate(
6389 &employee_ids,
6390 start_date,
6391 end_date,
6392 &self.config.hr.time_attendance,
6393 );
6394 snapshot.time_entry_count = entries.len();
6395 snapshot.time_entries = entries;
6396 }
6397
6398 if self.config.hr.expenses.enabled {
6400 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6401 .with_pools(employee_ids.clone(), cost_center_ids.clone());
6402 expense_gen.set_country_pack(self.primary_pack().clone());
6403 let company_currency = self
6404 .config
6405 .companies
6406 .first()
6407 .map(|c| c.currency.as_str())
6408 .unwrap_or("USD");
6409 let reports = expense_gen.generate_with_currency(
6410 &employee_ids,
6411 start_date,
6412 end_date,
6413 &self.config.hr.expenses,
6414 company_currency,
6415 );
6416 snapshot.expense_report_count = reports.len();
6417 snapshot.expense_reports = reports;
6418 }
6419
6420 if self.config.hr.payroll.enabled {
6422 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6423 let employee_pairs: Vec<(String, String)> = self
6424 .master_data
6425 .employees
6426 .iter()
6427 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6428 .collect();
6429 let enrollments =
6430 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6431 snapshot.benefit_enrollment_count = enrollments.len();
6432 snapshot.benefit_enrollments = enrollments;
6433 }
6434
6435 if self.phase_config.generate_hr {
6437 let entity_name = self
6438 .config
6439 .companies
6440 .first()
6441 .map(|c| c.name.as_str())
6442 .unwrap_or("Entity");
6443 let period_months = self.config.global.period_months;
6444 let period_label = {
6445 let y = start_date.year();
6446 let m = start_date.month();
6447 if period_months >= 12 {
6448 format!("FY{y}")
6449 } else {
6450 format!("{y}-{m:02}")
6451 }
6452 };
6453 let reporting_date =
6454 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6455
6456 let avg_salary: Option<rust_decimal::Decimal> = {
6461 let employee_count = employee_ids.len();
6462 if self.config.hr.payroll.enabled
6463 && employee_count > 0
6464 && !snapshot.payroll_runs.is_empty()
6465 {
6466 let total_gross: rust_decimal::Decimal = snapshot
6468 .payroll_runs
6469 .iter()
6470 .filter(|r| r.company_code == company_code)
6471 .map(|r| r.total_gross)
6472 .sum();
6473 if total_gross > rust_decimal::Decimal::ZERO {
6474 let annual_total = if period_months > 0 && period_months < 12 {
6476 total_gross * rust_decimal::Decimal::from(12u32)
6477 / rust_decimal::Decimal::from(period_months)
6478 } else {
6479 total_gross
6480 };
6481 Some(
6482 (annual_total / rust_decimal::Decimal::from(employee_count))
6483 .round_dp(2),
6484 )
6485 } else {
6486 None
6487 }
6488 } else {
6489 None
6490 }
6491 };
6492
6493 let mut pension_gen =
6494 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6495 let pension_snap = pension_gen.generate(
6496 company_code,
6497 entity_name,
6498 &period_label,
6499 reporting_date,
6500 employee_ids.len(),
6501 currency,
6502 avg_salary,
6503 period_months,
6504 );
6505 snapshot.pension_plan_count = pension_snap.plans.len();
6506 snapshot.pension_plans = pension_snap.plans;
6507 snapshot.pension_obligations = pension_snap.obligations;
6508 snapshot.pension_plan_assets = pension_snap.plan_assets;
6509 snapshot.pension_disclosures = pension_snap.disclosures;
6510 snapshot.pension_journal_entries = pension_snap.journal_entries;
6515 }
6516
6517 if self.phase_config.generate_hr && !employee_ids.is_empty() {
6519 let period_months = self.config.global.period_months;
6520 let period_label = {
6521 let y = start_date.year();
6522 let m = start_date.month();
6523 if period_months >= 12 {
6524 format!("FY{y}")
6525 } else {
6526 format!("{y}-{m:02}")
6527 }
6528 };
6529 let reporting_date =
6530 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6531
6532 let mut stock_comp_gen =
6533 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6534 let stock_snap = stock_comp_gen.generate(
6535 company_code,
6536 &employee_ids,
6537 start_date,
6538 &period_label,
6539 reporting_date,
6540 currency,
6541 );
6542 snapshot.stock_grant_count = stock_snap.grants.len();
6543 snapshot.stock_grants = stock_snap.grants;
6544 snapshot.stock_comp_expenses = stock_snap.expenses;
6545 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6546 }
6547
6548 stats.payroll_run_count = snapshot.payroll_run_count;
6549 stats.time_entry_count = snapshot.time_entry_count;
6550 stats.expense_report_count = snapshot.expense_report_count;
6551 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6552 stats.pension_plan_count = snapshot.pension_plan_count;
6553 stats.stock_grant_count = snapshot.stock_grant_count;
6554
6555 info!(
6556 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6557 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6558 snapshot.time_entry_count, snapshot.expense_report_count,
6559 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6560 snapshot.stock_grant_count
6561 );
6562 self.check_resources_with_log("post-hr")?;
6563
6564 Ok(snapshot)
6565 }
6566
6567 fn phase_accounting_standards(
6569 &mut self,
6570 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6571 journal_entries: &[JournalEntry],
6572 stats: &mut EnhancedGenerationStatistics,
6573 ) -> SynthResult<AccountingStandardsSnapshot> {
6574 if !self.phase_config.generate_accounting_standards {
6575 debug!("Phase 17: Skipped (accounting standards generation disabled)");
6576 return Ok(AccountingStandardsSnapshot::default());
6577 }
6578 info!("Phase 17: Generating Accounting Standards Data");
6579
6580 let seed = self.seed;
6581 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6582 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6583 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6584 let company_code = self
6585 .config
6586 .companies
6587 .first()
6588 .map(|c| c.code.as_str())
6589 .unwrap_or("1000");
6590 let currency = self
6591 .config
6592 .companies
6593 .first()
6594 .map(|c| c.currency.as_str())
6595 .unwrap_or("USD");
6596
6597 let framework = match self.config.accounting_standards.framework {
6602 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6603 datasynth_standards::framework::AccountingFramework::UsGaap
6604 }
6605 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6606 datasynth_standards::framework::AccountingFramework::Ifrs
6607 }
6608 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6609 datasynth_standards::framework::AccountingFramework::DualReporting
6610 }
6611 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6612 datasynth_standards::framework::AccountingFramework::FrenchGaap
6613 }
6614 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6615 datasynth_standards::framework::AccountingFramework::GermanGaap
6616 }
6617 None => {
6618 let pack = self.primary_pack();
6620 let pack_fw = pack.accounting.framework.as_str();
6621 match pack_fw {
6622 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6623 "dual_reporting" => {
6624 datasynth_standards::framework::AccountingFramework::DualReporting
6625 }
6626 "french_gaap" => {
6627 datasynth_standards::framework::AccountingFramework::FrenchGaap
6628 }
6629 "german_gaap" | "hgb" => {
6630 datasynth_standards::framework::AccountingFramework::GermanGaap
6631 }
6632 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6634 }
6635 }
6636 };
6637
6638 let mut snapshot = AccountingStandardsSnapshot::default();
6639
6640 if self.config.accounting_standards.revenue_recognition.enabled {
6642 let customer_ids: Vec<String> = self
6643 .master_data
6644 .customers
6645 .iter()
6646 .map(|c| c.customer_id.clone())
6647 .collect();
6648
6649 if !customer_ids.is_empty() {
6650 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6651 let contracts = rev_gen.generate(
6652 company_code,
6653 &customer_ids,
6654 start_date,
6655 end_date,
6656 currency,
6657 &self.config.accounting_standards.revenue_recognition,
6658 framework,
6659 );
6660 snapshot.revenue_contract_count = contracts.len();
6661 snapshot.contracts = contracts;
6662 }
6663 }
6664
6665 if self.config.accounting_standards.impairment.enabled {
6667 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6668 .master_data
6669 .assets
6670 .iter()
6671 .map(|a| {
6672 (
6673 a.asset_id.clone(),
6674 a.description.clone(),
6675 a.acquisition_cost,
6676 )
6677 })
6678 .collect();
6679
6680 if !asset_data.is_empty() {
6681 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6682 let tests = imp_gen.generate(
6683 company_code,
6684 &asset_data,
6685 end_date,
6686 &self.config.accounting_standards.impairment,
6687 framework,
6688 );
6689 snapshot.impairment_test_count = tests.len();
6690 snapshot.impairment_tests = tests;
6691 }
6692 }
6693
6694 if self
6696 .config
6697 .accounting_standards
6698 .business_combinations
6699 .enabled
6700 {
6701 let bc_config = &self.config.accounting_standards.business_combinations;
6702 let framework_str = match framework {
6703 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6704 _ => "US_GAAP",
6705 };
6706 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6707 let bc_snap = bc_gen.generate(
6708 company_code,
6709 currency,
6710 start_date,
6711 end_date,
6712 bc_config.acquisition_count,
6713 framework_str,
6714 );
6715 snapshot.business_combination_count = bc_snap.combinations.len();
6716 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6717 snapshot.business_combinations = bc_snap.combinations;
6718 }
6719
6720 if self
6722 .config
6723 .accounting_standards
6724 .expected_credit_loss
6725 .enabled
6726 {
6727 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6728 let framework_str = match framework {
6729 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6730 _ => "ASC_326",
6731 };
6732
6733 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6736
6737 let mut ecl_gen = EclGenerator::new(seed + 43);
6738
6739 let bucket_exposures: Vec<(
6741 datasynth_core::models::subledger::ar::AgingBucket,
6742 rust_decimal::Decimal,
6743 )> = if ar_aging_reports.is_empty() {
6744 use datasynth_core::models::subledger::ar::AgingBucket;
6746 vec![
6747 (
6748 AgingBucket::Current,
6749 rust_decimal::Decimal::from(500_000_u32),
6750 ),
6751 (
6752 AgingBucket::Days1To30,
6753 rust_decimal::Decimal::from(120_000_u32),
6754 ),
6755 (
6756 AgingBucket::Days31To60,
6757 rust_decimal::Decimal::from(45_000_u32),
6758 ),
6759 (
6760 AgingBucket::Days61To90,
6761 rust_decimal::Decimal::from(15_000_u32),
6762 ),
6763 (
6764 AgingBucket::Over90Days,
6765 rust_decimal::Decimal::from(8_000_u32),
6766 ),
6767 ]
6768 } else {
6769 use datasynth_core::models::subledger::ar::AgingBucket;
6770 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6772 std::collections::HashMap::new();
6773 for report in ar_aging_reports {
6774 for (bucket, amount) in &report.bucket_totals {
6775 *totals.entry(*bucket).or_default() += amount;
6776 }
6777 }
6778 AgingBucket::all()
6779 .into_iter()
6780 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6781 .collect()
6782 };
6783
6784 let ecl_snap = ecl_gen.generate(
6785 company_code,
6786 end_date,
6787 &bucket_exposures,
6788 ecl_config,
6789 &period_label,
6790 framework_str,
6791 );
6792
6793 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6794 snapshot.ecl_models = ecl_snap.ecl_models;
6795 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6796 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6797 }
6798
6799 {
6801 let framework_str = match framework {
6802 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6803 _ => "US_GAAP",
6804 };
6805
6806 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6811 .max(rust_decimal::Decimal::from(100_000_u32));
6812
6813 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6814
6815 let mut prov_gen = ProvisionGenerator::new(seed + 44);
6816 let prov_snap = prov_gen.generate(
6817 company_code,
6818 currency,
6819 revenue_proxy,
6820 end_date,
6821 &period_label,
6822 framework_str,
6823 None, );
6825
6826 snapshot.provision_count = prov_snap.provisions.len();
6827 snapshot.provisions = prov_snap.provisions;
6828 snapshot.provision_movements = prov_snap.movements;
6829 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6830 snapshot.provision_journal_entries = prov_snap.journal_entries;
6831 }
6832
6833 {
6837 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6838
6839 let presentation_currency = self
6840 .config
6841 .global
6842 .presentation_currency
6843 .clone()
6844 .unwrap_or_else(|| self.config.global.group_currency.clone());
6845
6846 let mut rate_table = FxRateTable::new(&presentation_currency);
6849
6850 let base_rates = base_rates_usd();
6854 for (ccy, rate) in &base_rates {
6855 rate_table.add_rate(FxRate::new(
6856 ccy,
6857 "USD",
6858 RateType::Closing,
6859 end_date,
6860 *rate,
6861 "SYNTHETIC",
6862 ));
6863 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6866 rate_table.add_rate(FxRate::new(
6867 ccy,
6868 "USD",
6869 RateType::Average,
6870 end_date,
6871 avg,
6872 "SYNTHETIC",
6873 ));
6874 }
6875
6876 let mut translation_results = Vec::new();
6877 for company in &self.config.companies {
6878 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6881 .max(rust_decimal::Decimal::from(100_000_u32));
6882
6883 let func_ccy = company
6884 .functional_currency
6885 .clone()
6886 .unwrap_or_else(|| company.currency.clone());
6887
6888 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6889 &company.code,
6890 &func_ccy,
6891 &presentation_currency,
6892 &ias21_period_label,
6893 end_date,
6894 company_revenue,
6895 &rate_table,
6896 );
6897 translation_results.push(result);
6898 }
6899
6900 snapshot.currency_translation_count = translation_results.len();
6901 snapshot.currency_translation_results = translation_results;
6902 }
6903
6904 stats.revenue_contract_count = snapshot.revenue_contract_count;
6905 stats.impairment_test_count = snapshot.impairment_test_count;
6906 stats.business_combination_count = snapshot.business_combination_count;
6907 stats.ecl_model_count = snapshot.ecl_model_count;
6908 stats.provision_count = snapshot.provision_count;
6909
6910 info!(
6911 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6912 snapshot.revenue_contract_count,
6913 snapshot.impairment_test_count,
6914 snapshot.business_combination_count,
6915 snapshot.ecl_model_count,
6916 snapshot.provision_count,
6917 snapshot.currency_translation_count
6918 );
6919 self.check_resources_with_log("post-accounting-standards")?;
6920
6921 Ok(snapshot)
6922 }
6923
6924 fn phase_manufacturing(
6926 &mut self,
6927 stats: &mut EnhancedGenerationStatistics,
6928 ) -> SynthResult<ManufacturingSnapshot> {
6929 if !self.phase_config.generate_manufacturing {
6930 debug!("Phase 18: Skipped (manufacturing generation disabled)");
6931 return Ok(ManufacturingSnapshot::default());
6932 }
6933 info!("Phase 18: Generating Manufacturing Data");
6934
6935 let seed = self.seed;
6936 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6937 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6938 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6939 let company_code = self
6940 .config
6941 .companies
6942 .first()
6943 .map(|c| c.code.as_str())
6944 .unwrap_or("1000");
6945
6946 let material_data: Vec<(String, String)> = self
6947 .master_data
6948 .materials
6949 .iter()
6950 .map(|m| (m.material_id.clone(), m.description.clone()))
6951 .collect();
6952
6953 if material_data.is_empty() {
6954 debug!("Phase 18: Skipped (no materials available)");
6955 return Ok(ManufacturingSnapshot::default());
6956 }
6957
6958 let mut snapshot = ManufacturingSnapshot::default();
6959
6960 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6962 let production_orders = prod_gen.generate(
6963 company_code,
6964 &material_data,
6965 start_date,
6966 end_date,
6967 &self.config.manufacturing.production_orders,
6968 &self.config.manufacturing.costing,
6969 &self.config.manufacturing.routing,
6970 );
6971 snapshot.production_order_count = production_orders.len();
6972
6973 let inspection_data: Vec<(String, String, String)> = production_orders
6975 .iter()
6976 .map(|po| {
6977 (
6978 po.order_id.clone(),
6979 po.material_id.clone(),
6980 po.material_description.clone(),
6981 )
6982 })
6983 .collect();
6984
6985 snapshot.production_orders = production_orders;
6986
6987 if !inspection_data.is_empty() {
6988 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6989 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6990 snapshot.quality_inspection_count = inspections.len();
6991 snapshot.quality_inspections = inspections;
6992 }
6993
6994 let storage_locations: Vec<(String, String)> = material_data
6996 .iter()
6997 .enumerate()
6998 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6999 .collect();
7000
7001 let employee_ids: Vec<String> = self
7002 .master_data
7003 .employees
7004 .iter()
7005 .map(|e| e.employee_id.clone())
7006 .collect();
7007 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7008 .with_employee_pool(employee_ids);
7009 let mut cycle_count_total = 0usize;
7010 for month in 0..self.config.global.period_months {
7011 let count_date = start_date + chrono::Months::new(month);
7012 let items_per_count = storage_locations.len().clamp(10, 50);
7013 let cc = cc_gen.generate(
7014 company_code,
7015 &storage_locations,
7016 count_date,
7017 items_per_count,
7018 );
7019 snapshot.cycle_counts.push(cc);
7020 cycle_count_total += 1;
7021 }
7022 snapshot.cycle_count_count = cycle_count_total;
7023
7024 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7026 let bom_components = bom_gen.generate(company_code, &material_data);
7027 snapshot.bom_component_count = bom_components.len();
7028 snapshot.bom_components = bom_components;
7029
7030 let currency = self
7032 .config
7033 .companies
7034 .first()
7035 .map(|c| c.currency.as_str())
7036 .unwrap_or("USD");
7037 let production_order_ids: Vec<String> = snapshot
7038 .production_orders
7039 .iter()
7040 .map(|po| po.order_id.clone())
7041 .collect();
7042 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7043 let inventory_movements = inv_mov_gen.generate_with_production_orders(
7044 company_code,
7045 &material_data,
7046 start_date,
7047 end_date,
7048 2,
7049 currency,
7050 &production_order_ids,
7051 );
7052 snapshot.inventory_movement_count = inventory_movements.len();
7053 snapshot.inventory_movements = inventory_movements;
7054
7055 stats.production_order_count = snapshot.production_order_count;
7056 stats.quality_inspection_count = snapshot.quality_inspection_count;
7057 stats.cycle_count_count = snapshot.cycle_count_count;
7058 stats.bom_component_count = snapshot.bom_component_count;
7059 stats.inventory_movement_count = snapshot.inventory_movement_count;
7060
7061 info!(
7062 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7063 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7064 snapshot.bom_component_count, snapshot.inventory_movement_count
7065 );
7066 self.check_resources_with_log("post-manufacturing")?;
7067
7068 Ok(snapshot)
7069 }
7070
7071 fn phase_sales_kpi_budgets(
7073 &mut self,
7074 coa: &Arc<ChartOfAccounts>,
7075 financial_reporting: &FinancialReportingSnapshot,
7076 stats: &mut EnhancedGenerationStatistics,
7077 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7078 if !self.phase_config.generate_sales_kpi_budgets {
7079 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7080 return Ok(SalesKpiBudgetsSnapshot::default());
7081 }
7082 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7083
7084 let seed = self.seed;
7085 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7086 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7087 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7088 let company_code = self
7089 .config
7090 .companies
7091 .first()
7092 .map(|c| c.code.as_str())
7093 .unwrap_or("1000");
7094
7095 let mut snapshot = SalesKpiBudgetsSnapshot::default();
7096
7097 if self.config.sales_quotes.enabled {
7099 let customer_data: Vec<(String, String)> = self
7100 .master_data
7101 .customers
7102 .iter()
7103 .map(|c| (c.customer_id.clone(), c.name.clone()))
7104 .collect();
7105 let material_data: Vec<(String, String)> = self
7106 .master_data
7107 .materials
7108 .iter()
7109 .map(|m| (m.material_id.clone(), m.description.clone()))
7110 .collect();
7111
7112 if !customer_data.is_empty() && !material_data.is_empty() {
7113 let employee_ids: Vec<String> = self
7114 .master_data
7115 .employees
7116 .iter()
7117 .map(|e| e.employee_id.clone())
7118 .collect();
7119 let customer_ids: Vec<String> = self
7120 .master_data
7121 .customers
7122 .iter()
7123 .map(|c| c.customer_id.clone())
7124 .collect();
7125 let company_currency = self
7126 .config
7127 .companies
7128 .first()
7129 .map(|c| c.currency.as_str())
7130 .unwrap_or("USD");
7131
7132 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7133 .with_pools(employee_ids, customer_ids);
7134 let quotes = quote_gen.generate_with_currency(
7135 company_code,
7136 &customer_data,
7137 &material_data,
7138 start_date,
7139 end_date,
7140 &self.config.sales_quotes,
7141 company_currency,
7142 );
7143 snapshot.sales_quote_count = quotes.len();
7144 snapshot.sales_quotes = quotes;
7145 }
7146 }
7147
7148 if self.config.financial_reporting.management_kpis.enabled {
7150 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7151 let mut kpis = kpi_gen.generate(
7152 company_code,
7153 start_date,
7154 end_date,
7155 &self.config.financial_reporting.management_kpis,
7156 );
7157
7158 {
7160 use rust_decimal::Decimal;
7161
7162 if let Some(income_stmt) =
7163 financial_reporting.financial_statements.iter().find(|fs| {
7164 fs.statement_type == StatementType::IncomeStatement
7165 && fs.company_code == company_code
7166 })
7167 {
7168 let total_revenue: Decimal = income_stmt
7170 .line_items
7171 .iter()
7172 .filter(|li| li.section.contains("Revenue") && !li.is_total)
7173 .map(|li| li.amount)
7174 .sum();
7175 let total_cogs: Decimal = income_stmt
7176 .line_items
7177 .iter()
7178 .filter(|li| {
7179 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7180 && !li.is_total
7181 })
7182 .map(|li| li.amount.abs())
7183 .sum();
7184 let total_opex: Decimal = income_stmt
7185 .line_items
7186 .iter()
7187 .filter(|li| {
7188 li.section.contains("Expense")
7189 && !li.is_total
7190 && !li.section.contains("Cost")
7191 })
7192 .map(|li| li.amount.abs())
7193 .sum();
7194
7195 if total_revenue > Decimal::ZERO {
7196 let hundred = Decimal::from(100);
7197 let gross_margin_pct =
7198 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7199 let operating_income = total_revenue - total_cogs - total_opex;
7200 let op_margin_pct =
7201 (operating_income * hundred / total_revenue).round_dp(2);
7202
7203 for kpi in &mut kpis {
7205 if kpi.name == "Gross Margin" {
7206 kpi.value = gross_margin_pct;
7207 } else if kpi.name == "Operating Margin" {
7208 kpi.value = op_margin_pct;
7209 }
7210 }
7211 }
7212 }
7213
7214 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7216 fs.statement_type == StatementType::BalanceSheet
7217 && fs.company_code == company_code
7218 }) {
7219 let current_assets: Decimal = bs
7220 .line_items
7221 .iter()
7222 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7223 .map(|li| li.amount)
7224 .sum();
7225 let current_liabilities: Decimal = bs
7226 .line_items
7227 .iter()
7228 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7229 .map(|li| li.amount.abs())
7230 .sum();
7231
7232 if current_liabilities > Decimal::ZERO {
7233 let current_ratio = (current_assets / current_liabilities).round_dp(2);
7234 for kpi in &mut kpis {
7235 if kpi.name == "Current Ratio" {
7236 kpi.value = current_ratio;
7237 }
7238 }
7239 }
7240 }
7241 }
7242
7243 snapshot.kpi_count = kpis.len();
7244 snapshot.kpis = kpis;
7245 }
7246
7247 if self.config.financial_reporting.budgets.enabled {
7249 let account_data: Vec<(String, String)> = coa
7250 .accounts
7251 .iter()
7252 .map(|a| (a.account_number.clone(), a.short_description.clone()))
7253 .collect();
7254
7255 if !account_data.is_empty() {
7256 let fiscal_year = start_date.year() as u32;
7257 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7258 let budget = budget_gen.generate(
7259 company_code,
7260 fiscal_year,
7261 &account_data,
7262 &self.config.financial_reporting.budgets,
7263 );
7264 snapshot.budget_line_count = budget.line_items.len();
7265 snapshot.budgets.push(budget);
7266 }
7267 }
7268
7269 stats.sales_quote_count = snapshot.sales_quote_count;
7270 stats.kpi_count = snapshot.kpi_count;
7271 stats.budget_line_count = snapshot.budget_line_count;
7272
7273 info!(
7274 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7275 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7276 );
7277 self.check_resources_with_log("post-sales-kpi-budgets")?;
7278
7279 Ok(snapshot)
7280 }
7281
7282 fn compute_pre_tax_income(
7289 company_code: &str,
7290 journal_entries: &[JournalEntry],
7291 ) -> rust_decimal::Decimal {
7292 use datasynth_core::accounts::AccountCategory;
7293 use rust_decimal::Decimal;
7294
7295 let mut total_revenue = Decimal::ZERO;
7296 let mut total_expenses = Decimal::ZERO;
7297
7298 for je in journal_entries {
7299 if je.header.company_code != company_code {
7300 continue;
7301 }
7302 for line in &je.lines {
7303 let cat = AccountCategory::from_account(&line.gl_account);
7304 match cat {
7305 AccountCategory::Revenue => {
7306 total_revenue += line.credit_amount - line.debit_amount;
7307 }
7308 AccountCategory::Cogs
7309 | AccountCategory::OperatingExpense
7310 | AccountCategory::OtherIncomeExpense => {
7311 total_expenses += line.debit_amount - line.credit_amount;
7312 }
7313 _ => {}
7314 }
7315 }
7316 }
7317
7318 let pti = (total_revenue - total_expenses).round_dp(2);
7319 if pti == rust_decimal::Decimal::ZERO {
7320 rust_decimal::Decimal::from(1_000_000u32)
7323 } else {
7324 pti
7325 }
7326 }
7327
7328 fn phase_tax_generation(
7330 &mut self,
7331 document_flows: &DocumentFlowSnapshot,
7332 journal_entries: &[JournalEntry],
7333 stats: &mut EnhancedGenerationStatistics,
7334 ) -> SynthResult<TaxSnapshot> {
7335 if !self.phase_config.generate_tax {
7336 debug!("Phase 20: Skipped (tax generation disabled)");
7337 return Ok(TaxSnapshot::default());
7338 }
7339 info!("Phase 20: Generating Tax Data");
7340
7341 let seed = self.seed;
7342 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7343 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7344 let fiscal_year = start_date.year();
7345 let company_code = self
7346 .config
7347 .companies
7348 .first()
7349 .map(|c| c.code.as_str())
7350 .unwrap_or("1000");
7351
7352 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7353 seed + 370,
7354 self.config.tax.clone(),
7355 );
7356
7357 let pack = self.primary_pack().clone();
7358 let (jurisdictions, codes) =
7359 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7360
7361 let mut provisions = Vec::new();
7363 if self.config.tax.provisions.enabled {
7364 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7365 for company in &self.config.companies {
7366 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7367 let statutory_rate = rust_decimal::Decimal::new(
7368 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7369 2,
7370 );
7371 let provision = provision_gen.generate(
7372 &company.code,
7373 start_date,
7374 pre_tax_income,
7375 statutory_rate,
7376 );
7377 provisions.push(provision);
7378 }
7379 }
7380
7381 let mut tax_lines = Vec::new();
7383 if !codes.is_empty() {
7384 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7385 datasynth_generators::TaxLineGeneratorConfig::default(),
7386 codes.clone(),
7387 seed + 372,
7388 );
7389
7390 let buyer_country = self
7393 .config
7394 .companies
7395 .first()
7396 .map(|c| c.country.as_str())
7397 .unwrap_or("US");
7398 for vi in &document_flows.vendor_invoices {
7399 let lines = tax_line_gen.generate_for_document(
7400 datasynth_core::models::TaxableDocumentType::VendorInvoice,
7401 &vi.header.document_id,
7402 buyer_country, buyer_country,
7404 vi.payable_amount,
7405 vi.header.document_date,
7406 None,
7407 );
7408 tax_lines.extend(lines);
7409 }
7410
7411 for ci in &document_flows.customer_invoices {
7413 let lines = tax_line_gen.generate_for_document(
7414 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7415 &ci.header.document_id,
7416 buyer_country, buyer_country,
7418 ci.total_gross_amount,
7419 ci.header.document_date,
7420 None,
7421 );
7422 tax_lines.extend(lines);
7423 }
7424 }
7425
7426 let deferred_tax = {
7428 let companies: Vec<(&str, &str)> = self
7429 .config
7430 .companies
7431 .iter()
7432 .map(|c| (c.code.as_str(), c.country.as_str()))
7433 .collect();
7434 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7435 deferred_gen.generate(&companies, start_date, journal_entries)
7436 };
7437
7438 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7441 std::collections::HashMap::new();
7442 for vi in &document_flows.vendor_invoices {
7443 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7444 }
7445 for ci in &document_flows.customer_invoices {
7446 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7447 }
7448
7449 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7451 let tax_posting_journal_entries = if !tax_lines.is_empty() {
7452 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7453 &tax_lines,
7454 company_code,
7455 &doc_dates,
7456 end_date,
7457 );
7458 debug!("Generated {} tax posting JEs", jes.len());
7459 jes
7460 } else {
7461 Vec::new()
7462 };
7463
7464 let snapshot = TaxSnapshot {
7465 jurisdiction_count: jurisdictions.len(),
7466 code_count: codes.len(),
7467 jurisdictions,
7468 codes,
7469 tax_provisions: provisions,
7470 tax_lines,
7471 tax_returns: Vec::new(),
7472 withholding_records: Vec::new(),
7473 tax_anomaly_labels: Vec::new(),
7474 deferred_tax,
7475 tax_posting_journal_entries,
7476 };
7477
7478 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7479 stats.tax_code_count = snapshot.code_count;
7480 stats.tax_provision_count = snapshot.tax_provisions.len();
7481 stats.tax_line_count = snapshot.tax_lines.len();
7482
7483 info!(
7484 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7485 snapshot.jurisdiction_count,
7486 snapshot.code_count,
7487 snapshot.tax_provisions.len(),
7488 snapshot.deferred_tax.temporary_differences.len(),
7489 snapshot.deferred_tax.journal_entries.len(),
7490 snapshot.tax_posting_journal_entries.len(),
7491 );
7492 self.check_resources_with_log("post-tax")?;
7493
7494 Ok(snapshot)
7495 }
7496
7497 fn phase_esg_generation(
7499 &mut self,
7500 document_flows: &DocumentFlowSnapshot,
7501 manufacturing: &ManufacturingSnapshot,
7502 stats: &mut EnhancedGenerationStatistics,
7503 ) -> SynthResult<EsgSnapshot> {
7504 if !self.phase_config.generate_esg {
7505 debug!("Phase 21: Skipped (ESG generation disabled)");
7506 return Ok(EsgSnapshot::default());
7507 }
7508 let degradation = self.check_resources()?;
7509 if degradation >= DegradationLevel::Reduced {
7510 debug!(
7511 "Phase skipped due to resource pressure (degradation: {:?})",
7512 degradation
7513 );
7514 return Ok(EsgSnapshot::default());
7515 }
7516 info!("Phase 21: Generating ESG Data");
7517
7518 let seed = self.seed;
7519 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7520 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7521 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7522 let entity_id = self
7523 .config
7524 .companies
7525 .first()
7526 .map(|c| c.code.as_str())
7527 .unwrap_or("1000");
7528
7529 let esg_cfg = &self.config.esg;
7530 let mut snapshot = EsgSnapshot::default();
7531
7532 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7534 esg_cfg.environmental.energy.clone(),
7535 seed + 80,
7536 );
7537 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7538
7539 let facility_count = esg_cfg.environmental.energy.facility_count;
7541 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7542 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7543
7544 let mut waste_gen = datasynth_generators::WasteGenerator::new(
7546 seed + 82,
7547 esg_cfg.environmental.waste.diversion_target,
7548 facility_count,
7549 );
7550 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7551
7552 let mut emission_gen =
7554 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7555
7556 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7558 .iter()
7559 .map(|e| datasynth_generators::EnergyInput {
7560 facility_id: e.facility_id.clone(),
7561 energy_type: match e.energy_source {
7562 EnergySourceType::NaturalGas => {
7563 datasynth_generators::EnergyInputType::NaturalGas
7564 }
7565 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7566 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7567 _ => datasynth_generators::EnergyInputType::Electricity,
7568 },
7569 consumption_kwh: e.consumption_kwh,
7570 period: e.period,
7571 })
7572 .collect();
7573
7574 if !manufacturing.production_orders.is_empty() {
7576 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7577 &manufacturing.production_orders,
7578 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
7581 if !mfg_energy.is_empty() {
7582 info!(
7583 "ESG: {} energy inputs derived from {} production orders",
7584 mfg_energy.len(),
7585 manufacturing.production_orders.len(),
7586 );
7587 energy_inputs.extend(mfg_energy);
7588 }
7589 }
7590
7591 let mut emissions = Vec::new();
7592 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7593 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7594
7595 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7597 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7598 for payment in &document_flows.payments {
7599 if payment.is_vendor {
7600 *totals
7601 .entry(payment.business_partner_id.clone())
7602 .or_default() += payment.amount;
7603 }
7604 }
7605 totals
7606 };
7607 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7608 .master_data
7609 .vendors
7610 .iter()
7611 .map(|v| {
7612 let spend = vendor_payment_totals
7613 .get(&v.vendor_id)
7614 .copied()
7615 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7616 datasynth_generators::VendorSpendInput {
7617 vendor_id: v.vendor_id.clone(),
7618 category: format!("{:?}", v.vendor_type).to_lowercase(),
7619 spend,
7620 country: v.country.clone(),
7621 }
7622 })
7623 .collect();
7624 if !vendor_spend.is_empty() {
7625 emissions.extend(emission_gen.generate_scope3_purchased_goods(
7626 entity_id,
7627 &vendor_spend,
7628 start_date,
7629 end_date,
7630 ));
7631 }
7632
7633 let headcount = self.master_data.employees.len() as u32;
7635 if headcount > 0 {
7636 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7637 emissions.extend(emission_gen.generate_scope3_business_travel(
7638 entity_id,
7639 travel_spend,
7640 start_date,
7641 ));
7642 emissions
7643 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7644 }
7645
7646 snapshot.emission_count = emissions.len();
7647 snapshot.emissions = emissions;
7648 snapshot.energy = energy_records;
7649
7650 let mut workforce_gen =
7652 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7653 let total_headcount = headcount.max(100);
7654 snapshot.diversity =
7655 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7656 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7657
7658 if !self.master_data.employees.is_empty() {
7660 let hr_diversity = workforce_gen.generate_diversity_from_employees(
7661 entity_id,
7662 &self.master_data.employees,
7663 end_date,
7664 );
7665 if !hr_diversity.is_empty() {
7666 info!(
7667 "ESG: {} diversity metrics derived from {} actual employees",
7668 hr_diversity.len(),
7669 self.master_data.employees.len(),
7670 );
7671 snapshot.diversity.extend(hr_diversity);
7672 }
7673 }
7674
7675 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7676 entity_id,
7677 facility_count,
7678 start_date,
7679 end_date,
7680 );
7681
7682 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
7685 entity_id,
7686 &snapshot.safety_incidents,
7687 total_hours,
7688 start_date,
7689 );
7690 snapshot.safety_metrics = vec![safety_metric];
7691
7692 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7694 seed + 85,
7695 esg_cfg.governance.board_size,
7696 esg_cfg.governance.independence_target,
7697 );
7698 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7699
7700 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7702 esg_cfg.supply_chain_esg.clone(),
7703 seed + 86,
7704 );
7705 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7706 .master_data
7707 .vendors
7708 .iter()
7709 .map(|v| datasynth_generators::VendorInput {
7710 vendor_id: v.vendor_id.clone(),
7711 country: v.country.clone(),
7712 industry: format!("{:?}", v.vendor_type).to_lowercase(),
7713 quality_score: None,
7714 })
7715 .collect();
7716 snapshot.supplier_assessments =
7717 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7718
7719 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7721 seed + 87,
7722 esg_cfg.reporting.clone(),
7723 esg_cfg.climate_scenarios.clone(),
7724 );
7725 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7726 snapshot.disclosures = disclosure_gen.generate_disclosures(
7727 entity_id,
7728 &snapshot.materiality,
7729 start_date,
7730 end_date,
7731 );
7732 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7733 snapshot.disclosure_count = snapshot.disclosures.len();
7734
7735 if esg_cfg.anomaly_rate > 0.0 {
7737 let mut anomaly_injector =
7738 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7739 let mut labels = Vec::new();
7740 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7741 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7742 labels.extend(
7743 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7744 );
7745 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7746 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7747 snapshot.anomaly_labels = labels;
7748 }
7749
7750 stats.esg_emission_count = snapshot.emission_count;
7751 stats.esg_disclosure_count = snapshot.disclosure_count;
7752
7753 info!(
7754 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7755 snapshot.emission_count,
7756 snapshot.disclosure_count,
7757 snapshot.supplier_assessments.len()
7758 );
7759 self.check_resources_with_log("post-esg")?;
7760
7761 Ok(snapshot)
7762 }
7763
7764 fn phase_treasury_data(
7766 &mut self,
7767 document_flows: &DocumentFlowSnapshot,
7768 subledger: &SubledgerSnapshot,
7769 intercompany: &IntercompanySnapshot,
7770 stats: &mut EnhancedGenerationStatistics,
7771 ) -> SynthResult<TreasurySnapshot> {
7772 if !self.phase_config.generate_treasury {
7773 debug!("Phase 22: Skipped (treasury generation disabled)");
7774 return Ok(TreasurySnapshot::default());
7775 }
7776 let degradation = self.check_resources()?;
7777 if degradation >= DegradationLevel::Reduced {
7778 debug!(
7779 "Phase skipped due to resource pressure (degradation: {:?})",
7780 degradation
7781 );
7782 return Ok(TreasurySnapshot::default());
7783 }
7784 info!("Phase 22: Generating Treasury Data");
7785
7786 let seed = self.seed;
7787 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7788 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7789 let currency = self
7790 .config
7791 .companies
7792 .first()
7793 .map(|c| c.currency.as_str())
7794 .unwrap_or("USD");
7795 let entity_id = self
7796 .config
7797 .companies
7798 .first()
7799 .map(|c| c.code.as_str())
7800 .unwrap_or("1000");
7801
7802 let mut snapshot = TreasurySnapshot::default();
7803
7804 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7806 self.config.treasury.debt.clone(),
7807 seed + 90,
7808 );
7809 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7810
7811 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7813 self.config.treasury.hedging.clone(),
7814 seed + 91,
7815 );
7816 for debt in &snapshot.debt_instruments {
7817 if debt.rate_type == InterestRateType::Variable {
7818 let swap = hedge_gen.generate_ir_swap(
7819 currency,
7820 debt.principal,
7821 debt.origination_date,
7822 debt.maturity_date,
7823 );
7824 snapshot.hedging_instruments.push(swap);
7825 }
7826 }
7827
7828 {
7831 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7832 for payment in &document_flows.payments {
7833 if payment.currency != currency {
7834 let entry = fx_map
7835 .entry(payment.currency.clone())
7836 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7837 entry.0 += payment.amount;
7838 if payment.header.document_date > entry.1 {
7840 entry.1 = payment.header.document_date;
7841 }
7842 }
7843 }
7844 if !fx_map.is_empty() {
7845 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7846 .into_iter()
7847 .map(|(foreign_ccy, (net_amount, settlement_date))| {
7848 datasynth_generators::treasury::FxExposure {
7849 currency_pair: format!("{foreign_ccy}/{currency}"),
7850 foreign_currency: foreign_ccy,
7851 net_amount,
7852 settlement_date,
7853 description: "AP payment FX exposure".to_string(),
7854 }
7855 })
7856 .collect();
7857 let (fx_instruments, fx_relationships) =
7858 hedge_gen.generate(start_date, &fx_exposures);
7859 snapshot.hedging_instruments.extend(fx_instruments);
7860 snapshot.hedge_relationships.extend(fx_relationships);
7861 }
7862 }
7863
7864 if self.config.treasury.anomaly_rate > 0.0 {
7866 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7867 seed + 92,
7868 self.config.treasury.anomaly_rate,
7869 );
7870 let mut labels = Vec::new();
7871 labels.extend(
7872 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7873 );
7874 snapshot.treasury_anomaly_labels = labels;
7875 }
7876
7877 if self.config.treasury.cash_positioning.enabled {
7879 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7880
7881 for payment in &document_flows.payments {
7883 cash_flows.push(datasynth_generators::treasury::CashFlow {
7884 date: payment.header.document_date,
7885 account_id: format!("{entity_id}-MAIN"),
7886 amount: payment.amount,
7887 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7888 });
7889 }
7890
7891 for chain in &document_flows.o2c_chains {
7893 if let Some(ref receipt) = chain.customer_receipt {
7894 cash_flows.push(datasynth_generators::treasury::CashFlow {
7895 date: receipt.header.document_date,
7896 account_id: format!("{entity_id}-MAIN"),
7897 amount: receipt.amount,
7898 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7899 });
7900 }
7901 for receipt in &chain.remainder_receipts {
7903 cash_flows.push(datasynth_generators::treasury::CashFlow {
7904 date: receipt.header.document_date,
7905 account_id: format!("{entity_id}-MAIN"),
7906 amount: receipt.amount,
7907 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7908 });
7909 }
7910 }
7911
7912 if !cash_flows.is_empty() {
7913 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7914 self.config.treasury.cash_positioning.clone(),
7915 seed + 93,
7916 );
7917 let account_id = format!("{entity_id}-MAIN");
7918 snapshot.cash_positions = cash_gen.generate(
7919 entity_id,
7920 &account_id,
7921 currency,
7922 &cash_flows,
7923 start_date,
7924 start_date + chrono::Months::new(self.config.global.period_months),
7925 rust_decimal::Decimal::new(1_000_000, 0), );
7927 }
7928 }
7929
7930 if self.config.treasury.cash_forecasting.enabled {
7932 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7933
7934 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7936 .ar_invoices
7937 .iter()
7938 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7939 .map(|inv| {
7940 let days_past_due = if inv.due_date < end_date {
7941 (end_date - inv.due_date).num_days().max(0) as u32
7942 } else {
7943 0
7944 };
7945 datasynth_generators::treasury::ArAgingItem {
7946 expected_date: inv.due_date,
7947 amount: inv.amount_remaining,
7948 days_past_due,
7949 document_id: inv.invoice_number.clone(),
7950 }
7951 })
7952 .collect();
7953
7954 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7956 .ap_invoices
7957 .iter()
7958 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7959 .map(|inv| datasynth_generators::treasury::ApAgingItem {
7960 payment_date: inv.due_date,
7961 amount: inv.amount_remaining,
7962 document_id: inv.invoice_number.clone(),
7963 })
7964 .collect();
7965
7966 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7967 self.config.treasury.cash_forecasting.clone(),
7968 seed + 94,
7969 );
7970 let forecast = forecast_gen.generate(
7971 entity_id,
7972 currency,
7973 end_date,
7974 &ar_items,
7975 &ap_items,
7976 &[], );
7978 snapshot.cash_forecasts.push(forecast);
7979 }
7980
7981 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7983 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7984 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7985 self.config.treasury.cash_pooling.clone(),
7986 seed + 95,
7987 );
7988
7989 let account_ids: Vec<String> = snapshot
7991 .cash_positions
7992 .iter()
7993 .map(|cp| cp.bank_account_id.clone())
7994 .collect::<std::collections::HashSet<_>>()
7995 .into_iter()
7996 .collect();
7997
7998 if let Some(pool) =
7999 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8000 {
8001 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8003 for cp in &snapshot.cash_positions {
8004 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8005 }
8006
8007 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8008 latest_balances
8009 .into_iter()
8010 .filter(|(id, _)| pool.participant_accounts.contains(id))
8011 .map(
8012 |(id, balance)| datasynth_generators::treasury::AccountBalance {
8013 account_id: id,
8014 balance,
8015 },
8016 )
8017 .collect();
8018
8019 let sweeps =
8020 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8021 snapshot.cash_pool_sweeps = sweeps;
8022 snapshot.cash_pools.push(pool);
8023 }
8024 }
8025
8026 if self.config.treasury.bank_guarantees.enabled {
8028 let vendor_names: Vec<String> = self
8029 .master_data
8030 .vendors
8031 .iter()
8032 .map(|v| v.name.clone())
8033 .collect();
8034 if !vendor_names.is_empty() {
8035 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8036 self.config.treasury.bank_guarantees.clone(),
8037 seed + 96,
8038 );
8039 snapshot.bank_guarantees =
8040 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8041 }
8042 }
8043
8044 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8046 let entity_ids: Vec<String> = self
8047 .config
8048 .companies
8049 .iter()
8050 .map(|c| c.code.clone())
8051 .collect();
8052 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8053 .matched_pairs
8054 .iter()
8055 .map(|mp| {
8056 (
8057 mp.seller_company.clone(),
8058 mp.buyer_company.clone(),
8059 mp.amount,
8060 )
8061 })
8062 .collect();
8063 if entity_ids.len() >= 2 {
8064 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8065 self.config.treasury.netting.clone(),
8066 seed + 97,
8067 );
8068 snapshot.netting_runs = netting_gen.generate(
8069 &entity_ids,
8070 currency,
8071 start_date,
8072 self.config.global.period_months,
8073 &ic_amounts,
8074 );
8075 }
8076 }
8077
8078 {
8080 use datasynth_generators::treasury::TreasuryAccounting;
8081
8082 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8083 let mut treasury_jes = Vec::new();
8084
8085 if !snapshot.debt_instruments.is_empty() {
8087 let debt_jes =
8088 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8089 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8090 treasury_jes.extend(debt_jes);
8091 }
8092
8093 if !snapshot.hedging_instruments.is_empty() {
8095 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8096 &snapshot.hedging_instruments,
8097 &snapshot.hedge_relationships,
8098 end_date,
8099 entity_id,
8100 );
8101 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8102 treasury_jes.extend(hedge_jes);
8103 }
8104
8105 if !snapshot.cash_pool_sweeps.is_empty() {
8107 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8108 &snapshot.cash_pool_sweeps,
8109 entity_id,
8110 );
8111 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8112 treasury_jes.extend(sweep_jes);
8113 }
8114
8115 if !treasury_jes.is_empty() {
8116 debug!("Total treasury journal entries: {}", treasury_jes.len());
8117 }
8118 snapshot.journal_entries = treasury_jes;
8119 }
8120
8121 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8122 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8123 stats.cash_position_count = snapshot.cash_positions.len();
8124 stats.cash_forecast_count = snapshot.cash_forecasts.len();
8125 stats.cash_pool_count = snapshot.cash_pools.len();
8126
8127 info!(
8128 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8129 snapshot.debt_instruments.len(),
8130 snapshot.hedging_instruments.len(),
8131 snapshot.cash_positions.len(),
8132 snapshot.cash_forecasts.len(),
8133 snapshot.cash_pools.len(),
8134 snapshot.bank_guarantees.len(),
8135 snapshot.netting_runs.len(),
8136 snapshot.journal_entries.len(),
8137 );
8138 self.check_resources_with_log("post-treasury")?;
8139
8140 Ok(snapshot)
8141 }
8142
8143 fn phase_project_accounting(
8145 &mut self,
8146 document_flows: &DocumentFlowSnapshot,
8147 hr: &HrSnapshot,
8148 stats: &mut EnhancedGenerationStatistics,
8149 ) -> SynthResult<ProjectAccountingSnapshot> {
8150 if !self.phase_config.generate_project_accounting {
8151 debug!("Phase 23: Skipped (project accounting disabled)");
8152 return Ok(ProjectAccountingSnapshot::default());
8153 }
8154 let degradation = self.check_resources()?;
8155 if degradation >= DegradationLevel::Reduced {
8156 debug!(
8157 "Phase skipped due to resource pressure (degradation: {:?})",
8158 degradation
8159 );
8160 return Ok(ProjectAccountingSnapshot::default());
8161 }
8162 info!("Phase 23: Generating Project Accounting Data");
8163
8164 let seed = self.seed;
8165 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8166 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8167 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8168 let company_code = self
8169 .config
8170 .companies
8171 .first()
8172 .map(|c| c.code.as_str())
8173 .unwrap_or("1000");
8174
8175 let mut snapshot = ProjectAccountingSnapshot::default();
8176
8177 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8179 self.config.project_accounting.clone(),
8180 seed + 95,
8181 );
8182 let pool = project_gen.generate(company_code, start_date, end_date);
8183 snapshot.projects = pool.projects.clone();
8184
8185 {
8187 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8188 Vec::new();
8189
8190 for te in &hr.time_entries {
8192 let total_hours = te.hours_regular + te.hours_overtime;
8193 if total_hours > 0.0 {
8194 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8195 id: te.entry_id.clone(),
8196 entity_id: company_code.to_string(),
8197 date: te.date,
8198 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8199 .unwrap_or(rust_decimal::Decimal::ZERO),
8200 source_type: CostSourceType::TimeEntry,
8201 hours: Some(
8202 rust_decimal::Decimal::from_f64_retain(total_hours)
8203 .unwrap_or(rust_decimal::Decimal::ZERO),
8204 ),
8205 });
8206 }
8207 }
8208
8209 for er in &hr.expense_reports {
8211 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8212 id: er.report_id.clone(),
8213 entity_id: company_code.to_string(),
8214 date: er.submission_date,
8215 amount: er.total_amount,
8216 source_type: CostSourceType::ExpenseReport,
8217 hours: None,
8218 });
8219 }
8220
8221 for po in &document_flows.purchase_orders {
8223 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8224 id: po.header.document_id.clone(),
8225 entity_id: company_code.to_string(),
8226 date: po.header.document_date,
8227 amount: po.total_net_amount,
8228 source_type: CostSourceType::PurchaseOrder,
8229 hours: None,
8230 });
8231 }
8232
8233 for vi in &document_flows.vendor_invoices {
8235 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8236 id: vi.header.document_id.clone(),
8237 entity_id: company_code.to_string(),
8238 date: vi.header.document_date,
8239 amount: vi.payable_amount,
8240 source_type: CostSourceType::VendorInvoice,
8241 hours: None,
8242 });
8243 }
8244
8245 if !source_docs.is_empty() && !pool.projects.is_empty() {
8246 let mut cost_gen =
8247 datasynth_generators::project_accounting::ProjectCostGenerator::new(
8248 self.config.project_accounting.cost_allocation.clone(),
8249 seed + 99,
8250 );
8251 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8252 }
8253 }
8254
8255 if self.config.project_accounting.change_orders.enabled {
8257 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8258 self.config.project_accounting.change_orders.clone(),
8259 seed + 96,
8260 );
8261 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8262 }
8263
8264 if self.config.project_accounting.milestones.enabled {
8266 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8267 self.config.project_accounting.milestones.clone(),
8268 seed + 97,
8269 );
8270 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8271 }
8272
8273 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8275 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8276 self.config.project_accounting.earned_value.clone(),
8277 seed + 98,
8278 );
8279 snapshot.earned_value_metrics =
8280 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8281 }
8282
8283 if self.config.project_accounting.revenue_recognition.enabled
8285 && !snapshot.projects.is_empty()
8286 && !snapshot.cost_lines.is_empty()
8287 {
8288 use datasynth_generators::project_accounting::RevenueGenerator;
8289 let rev_config = self.config.project_accounting.revenue_recognition.clone();
8290 let avg_contract_value =
8291 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8292 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8293
8294 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8297 snapshot
8298 .projects
8299 .iter()
8300 .filter(|p| {
8301 matches!(
8302 p.project_type,
8303 datasynth_core::models::ProjectType::Customer
8304 )
8305 })
8306 .map(|p| {
8307 let cv = if p.budget > rust_decimal::Decimal::ZERO {
8308 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8309 } else {
8311 avg_contract_value
8312 };
8313 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
8315 })
8316 .collect();
8317
8318 if !contract_values.is_empty() {
8319 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8320 snapshot.revenue_records = rev_gen.generate(
8321 &snapshot.projects,
8322 &snapshot.cost_lines,
8323 &contract_values,
8324 start_date,
8325 end_date,
8326 );
8327 debug!(
8328 "Generated {} revenue recognition records for {} customer projects",
8329 snapshot.revenue_records.len(),
8330 contract_values.len()
8331 );
8332 }
8333 }
8334
8335 stats.project_count = snapshot.projects.len();
8336 stats.project_change_order_count = snapshot.change_orders.len();
8337 stats.project_cost_line_count = snapshot.cost_lines.len();
8338
8339 info!(
8340 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8341 snapshot.projects.len(),
8342 snapshot.change_orders.len(),
8343 snapshot.milestones.len(),
8344 snapshot.earned_value_metrics.len()
8345 );
8346 self.check_resources_with_log("post-project-accounting")?;
8347
8348 Ok(snapshot)
8349 }
8350
8351 fn phase_evolution_events(
8353 &mut self,
8354 stats: &mut EnhancedGenerationStatistics,
8355 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8356 if !self.phase_config.generate_evolution_events {
8357 debug!("Phase 24: Skipped (evolution events disabled)");
8358 return Ok((Vec::new(), Vec::new()));
8359 }
8360 info!("Phase 24: Generating Process Evolution + Organizational Events");
8361
8362 let seed = self.seed;
8363 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8364 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8365 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8366
8367 let mut proc_gen =
8369 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8370 seed + 100,
8371 );
8372 let process_events = proc_gen.generate_events(start_date, end_date);
8373
8374 let company_codes: Vec<String> = self
8376 .config
8377 .companies
8378 .iter()
8379 .map(|c| c.code.clone())
8380 .collect();
8381 let mut org_gen =
8382 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8383 seed + 101,
8384 );
8385 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8386
8387 stats.process_evolution_event_count = process_events.len();
8388 stats.organizational_event_count = org_events.len();
8389
8390 info!(
8391 "Evolution events generated: {} process evolution, {} organizational",
8392 process_events.len(),
8393 org_events.len()
8394 );
8395 self.check_resources_with_log("post-evolution-events")?;
8396
8397 Ok((process_events, org_events))
8398 }
8399
8400 fn phase_disruption_events(
8403 &self,
8404 stats: &mut EnhancedGenerationStatistics,
8405 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8406 if !self.config.organizational_events.enabled {
8407 debug!("Phase 24b: Skipped (organizational events disabled)");
8408 return Ok(Vec::new());
8409 }
8410 info!("Phase 24b: Generating Disruption Events");
8411
8412 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8413 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8414 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8415
8416 let company_codes: Vec<String> = self
8417 .config
8418 .companies
8419 .iter()
8420 .map(|c| c.code.clone())
8421 .collect();
8422
8423 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8424 let events = gen.generate(start_date, end_date, &company_codes);
8425
8426 stats.disruption_event_count = events.len();
8427 info!("Disruption events generated: {} events", events.len());
8428 self.check_resources_with_log("post-disruption-events")?;
8429
8430 Ok(events)
8431 }
8432
8433 fn phase_counterfactuals(
8440 &self,
8441 journal_entries: &[JournalEntry],
8442 stats: &mut EnhancedGenerationStatistics,
8443 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8444 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8445 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8446 return Ok(Vec::new());
8447 }
8448 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8449
8450 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8451
8452 let mut gen = CounterfactualGenerator::new(self.seed + 110);
8453
8454 let specs = [
8456 CounterfactualSpec::ScaleAmount { factor: 2.5 },
8457 CounterfactualSpec::ShiftDate { days: -14 },
8458 CounterfactualSpec::SelfApprove,
8459 CounterfactualSpec::SplitTransaction { split_count: 3 },
8460 ];
8461
8462 let pairs: Vec<_> = journal_entries
8463 .iter()
8464 .enumerate()
8465 .map(|(i, je)| {
8466 let spec = &specs[i % specs.len()];
8467 gen.generate(je, spec)
8468 })
8469 .collect();
8470
8471 stats.counterfactual_pair_count = pairs.len();
8472 info!(
8473 "Counterfactual pairs generated: {} pairs from {} journal entries",
8474 pairs.len(),
8475 journal_entries.len()
8476 );
8477 self.check_resources_with_log("post-counterfactuals")?;
8478
8479 Ok(pairs)
8480 }
8481
8482 fn phase_red_flags(
8489 &self,
8490 anomaly_labels: &AnomalyLabels,
8491 document_flows: &DocumentFlowSnapshot,
8492 stats: &mut EnhancedGenerationStatistics,
8493 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8494 if !self.config.fraud.enabled {
8495 debug!("Phase 26: Skipped (fraud generation disabled)");
8496 return Ok(Vec::new());
8497 }
8498 info!("Phase 26: Generating Fraud Red-Flag Indicators");
8499
8500 use datasynth_generators::fraud::RedFlagGenerator;
8501
8502 let generator = RedFlagGenerator::new();
8503 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8504
8505 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8507 .labels
8508 .iter()
8509 .filter(|label| label.anomaly_type.is_intentional())
8510 .map(|label| label.document_id.as_str())
8511 .collect();
8512
8513 let mut flags = Vec::new();
8514
8515 for chain in &document_flows.p2p_chains {
8517 let doc_id = &chain.purchase_order.header.document_id;
8518 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8519 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8520 }
8521
8522 for chain in &document_flows.o2c_chains {
8524 let doc_id = &chain.sales_order.header.document_id;
8525 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8526 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8527 }
8528
8529 stats.red_flag_count = flags.len();
8530 info!(
8531 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8532 flags.len(),
8533 document_flows.p2p_chains.len(),
8534 document_flows.o2c_chains.len(),
8535 fraud_doc_ids.len()
8536 );
8537 self.check_resources_with_log("post-red-flags")?;
8538
8539 Ok(flags)
8540 }
8541
8542 fn phase_collusion_rings(
8548 &mut self,
8549 stats: &mut EnhancedGenerationStatistics,
8550 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8551 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8552 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8553 return Ok(Vec::new());
8554 }
8555 info!("Phase 26b: Generating Collusion Rings");
8556
8557 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8558 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8559 let months = self.config.global.period_months;
8560
8561 let employee_ids: Vec<String> = self
8562 .master_data
8563 .employees
8564 .iter()
8565 .map(|e| e.employee_id.clone())
8566 .collect();
8567 let vendor_ids: Vec<String> = self
8568 .master_data
8569 .vendors
8570 .iter()
8571 .map(|v| v.vendor_id.clone())
8572 .collect();
8573
8574 let mut generator =
8575 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8576 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8577
8578 stats.collusion_ring_count = rings.len();
8579 info!(
8580 "Collusion rings generated: {} rings, total members: {}",
8581 rings.len(),
8582 rings
8583 .iter()
8584 .map(datasynth_generators::fraud::CollusionRing::size)
8585 .sum::<usize>()
8586 );
8587 self.check_resources_with_log("post-collusion-rings")?;
8588
8589 Ok(rings)
8590 }
8591
8592 fn phase_temporal_attributes(
8597 &mut self,
8598 stats: &mut EnhancedGenerationStatistics,
8599 ) -> SynthResult<
8600 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8601 > {
8602 if !self.config.temporal_attributes.enabled {
8603 debug!("Phase 27: Skipped (temporal attributes disabled)");
8604 return Ok(Vec::new());
8605 }
8606 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8607
8608 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8609 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8610
8611 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8615 || self.config.temporal_attributes.enabled;
8616 let temporal_config = {
8617 let ta = &self.config.temporal_attributes;
8618 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8619 .enabled(ta.enabled)
8620 .closed_probability(ta.valid_time.closed_probability)
8621 .avg_validity_days(ta.valid_time.avg_validity_days)
8622 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8623 .with_version_chains(if generate_version_chains {
8624 ta.avg_versions_per_entity
8625 } else {
8626 1.0
8627 })
8628 .build()
8629 };
8630 let temporal_config = if self
8632 .config
8633 .temporal_attributes
8634 .transaction_time
8635 .allow_backdating
8636 {
8637 let mut c = temporal_config;
8638 c.transaction_time.allow_backdating = true;
8639 c.transaction_time.backdating_probability = self
8640 .config
8641 .temporal_attributes
8642 .transaction_time
8643 .backdating_probability;
8644 c.transaction_time.max_backdate_days = self
8645 .config
8646 .temporal_attributes
8647 .transaction_time
8648 .max_backdate_days;
8649 c
8650 } else {
8651 temporal_config
8652 };
8653 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8654 temporal_config,
8655 self.seed + 130,
8656 start_date,
8657 );
8658
8659 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8660 self.seed + 130,
8661 datasynth_core::GeneratorType::Vendor,
8662 );
8663
8664 let chains: Vec<_> = self
8665 .master_data
8666 .vendors
8667 .iter()
8668 .map(|vendor| {
8669 let id = uuid_factory.next();
8670 gen.generate_version_chain(vendor.clone(), id)
8671 })
8672 .collect();
8673
8674 stats.temporal_version_chain_count = chains.len();
8675 info!("Temporal version chains generated: {} chains", chains.len());
8676 self.check_resources_with_log("post-temporal-attributes")?;
8677
8678 Ok(chains)
8679 }
8680
8681 fn phase_entity_relationships(
8691 &self,
8692 journal_entries: &[JournalEntry],
8693 document_flows: &DocumentFlowSnapshot,
8694 stats: &mut EnhancedGenerationStatistics,
8695 ) -> SynthResult<(
8696 Option<datasynth_core::models::EntityGraph>,
8697 Vec<datasynth_core::models::CrossProcessLink>,
8698 )> {
8699 use datasynth_generators::relationships::{
8700 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8701 TransactionSummary,
8702 };
8703
8704 let rs_enabled = self.config.relationship_strength.enabled;
8705 let cpl_enabled = self.config.cross_process_links.enabled
8706 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8707
8708 if !rs_enabled && !cpl_enabled {
8709 debug!(
8710 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8711 );
8712 return Ok((None, Vec::new()));
8713 }
8714
8715 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8716
8717 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8718 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8719
8720 let company_code = self
8721 .config
8722 .companies
8723 .first()
8724 .map(|c| c.code.as_str())
8725 .unwrap_or("1000");
8726
8727 let gen_config = EntityGraphConfig {
8729 enabled: rs_enabled,
8730 cross_process: datasynth_generators::relationships::CrossProcessConfig {
8731 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8732 enable_return_flows: false,
8733 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8734 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8735 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8737 1.0
8738 } else {
8739 0.30
8740 },
8741 ..Default::default()
8742 },
8743 strength_config: datasynth_generators::relationships::StrengthConfig {
8744 transaction_volume_weight: self
8745 .config
8746 .relationship_strength
8747 .calculation
8748 .transaction_volume_weight,
8749 transaction_count_weight: self
8750 .config
8751 .relationship_strength
8752 .calculation
8753 .transaction_count_weight,
8754 duration_weight: self
8755 .config
8756 .relationship_strength
8757 .calculation
8758 .relationship_duration_weight,
8759 recency_weight: self.config.relationship_strength.calculation.recency_weight,
8760 mutual_connections_weight: self
8761 .config
8762 .relationship_strength
8763 .calculation
8764 .mutual_connections_weight,
8765 recency_half_life_days: self
8766 .config
8767 .relationship_strength
8768 .calculation
8769 .recency_half_life_days,
8770 },
8771 ..Default::default()
8772 };
8773
8774 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8775
8776 let entity_graph = if rs_enabled {
8778 let vendor_summaries: Vec<EntitySummary> = self
8780 .master_data
8781 .vendors
8782 .iter()
8783 .map(|v| {
8784 EntitySummary::new(
8785 &v.vendor_id,
8786 &v.name,
8787 datasynth_core::models::GraphEntityType::Vendor,
8788 start_date,
8789 )
8790 })
8791 .collect();
8792
8793 let customer_summaries: Vec<EntitySummary> = self
8794 .master_data
8795 .customers
8796 .iter()
8797 .map(|c| {
8798 EntitySummary::new(
8799 &c.customer_id,
8800 &c.name,
8801 datasynth_core::models::GraphEntityType::Customer,
8802 start_date,
8803 )
8804 })
8805 .collect();
8806
8807 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8812 std::collections::HashMap::new();
8813
8814 for je in journal_entries {
8815 let cc = je.header.company_code.clone();
8816 let posting_date = je.header.posting_date;
8817 for line in &je.lines {
8818 if let Some(ref tp) = line.trading_partner {
8819 let amount = if line.debit_amount > line.credit_amount {
8820 line.debit_amount
8821 } else {
8822 line.credit_amount
8823 };
8824 let entry = txn_summaries
8825 .entry((cc.clone(), tp.clone()))
8826 .or_insert_with(|| TransactionSummary {
8827 total_volume: rust_decimal::Decimal::ZERO,
8828 transaction_count: 0,
8829 first_transaction_date: posting_date,
8830 last_transaction_date: posting_date,
8831 related_entities: std::collections::HashSet::new(),
8832 });
8833 entry.total_volume += amount;
8834 entry.transaction_count += 1;
8835 if posting_date < entry.first_transaction_date {
8836 entry.first_transaction_date = posting_date;
8837 }
8838 if posting_date > entry.last_transaction_date {
8839 entry.last_transaction_date = posting_date;
8840 }
8841 entry.related_entities.insert(cc.clone());
8842 }
8843 }
8844 }
8845
8846 for chain in &document_flows.p2p_chains {
8849 let cc = chain.purchase_order.header.company_code.clone();
8850 let vendor_id = chain.purchase_order.vendor_id.clone();
8851 let po_date = chain.purchase_order.header.document_date;
8852 let amount = chain.purchase_order.total_net_amount;
8853
8854 let entry = txn_summaries
8855 .entry((cc.clone(), vendor_id))
8856 .or_insert_with(|| TransactionSummary {
8857 total_volume: rust_decimal::Decimal::ZERO,
8858 transaction_count: 0,
8859 first_transaction_date: po_date,
8860 last_transaction_date: po_date,
8861 related_entities: std::collections::HashSet::new(),
8862 });
8863 entry.total_volume += amount;
8864 entry.transaction_count += 1;
8865 if po_date < entry.first_transaction_date {
8866 entry.first_transaction_date = po_date;
8867 }
8868 if po_date > entry.last_transaction_date {
8869 entry.last_transaction_date = po_date;
8870 }
8871 entry.related_entities.insert(cc);
8872 }
8873
8874 for chain in &document_flows.o2c_chains {
8876 let cc = chain.sales_order.header.company_code.clone();
8877 let customer_id = chain.sales_order.customer_id.clone();
8878 let so_date = chain.sales_order.header.document_date;
8879 let amount = chain.sales_order.total_net_amount;
8880
8881 let entry = txn_summaries
8882 .entry((cc.clone(), customer_id))
8883 .or_insert_with(|| TransactionSummary {
8884 total_volume: rust_decimal::Decimal::ZERO,
8885 transaction_count: 0,
8886 first_transaction_date: so_date,
8887 last_transaction_date: so_date,
8888 related_entities: std::collections::HashSet::new(),
8889 });
8890 entry.total_volume += amount;
8891 entry.transaction_count += 1;
8892 if so_date < entry.first_transaction_date {
8893 entry.first_transaction_date = so_date;
8894 }
8895 if so_date > entry.last_transaction_date {
8896 entry.last_transaction_date = so_date;
8897 }
8898 entry.related_entities.insert(cc);
8899 }
8900
8901 let as_of_date = journal_entries
8902 .last()
8903 .map(|je| je.header.posting_date)
8904 .unwrap_or(start_date);
8905
8906 let graph = gen.generate_entity_graph(
8907 company_code,
8908 as_of_date,
8909 &vendor_summaries,
8910 &customer_summaries,
8911 &txn_summaries,
8912 );
8913
8914 info!(
8915 "Entity relationship graph: {} nodes, {} edges",
8916 graph.nodes.len(),
8917 graph.edges.len()
8918 );
8919 stats.entity_relationship_node_count = graph.nodes.len();
8920 stats.entity_relationship_edge_count = graph.edges.len();
8921 Some(graph)
8922 } else {
8923 None
8924 };
8925
8926 let cross_process_links = if cpl_enabled {
8928 let gr_refs: Vec<GoodsReceiptRef> = document_flows
8930 .p2p_chains
8931 .iter()
8932 .flat_map(|chain| {
8933 let vendor_id = chain.purchase_order.vendor_id.clone();
8934 let cc = chain.purchase_order.header.company_code.clone();
8935 chain.goods_receipts.iter().flat_map(move |gr| {
8936 gr.items.iter().filter_map({
8937 let doc_id = gr.header.document_id.clone();
8938 let v_id = vendor_id.clone();
8939 let company = cc.clone();
8940 let receipt_date = gr.header.document_date;
8941 move |item| {
8942 item.base
8943 .material_id
8944 .as_ref()
8945 .map(|mat_id| GoodsReceiptRef {
8946 document_id: doc_id.clone(),
8947 material_id: mat_id.clone(),
8948 quantity: item.base.quantity,
8949 receipt_date,
8950 vendor_id: v_id.clone(),
8951 company_code: company.clone(),
8952 })
8953 }
8954 })
8955 })
8956 })
8957 .collect();
8958
8959 let del_refs: Vec<DeliveryRef> = document_flows
8961 .o2c_chains
8962 .iter()
8963 .flat_map(|chain| {
8964 let customer_id = chain.sales_order.customer_id.clone();
8965 let cc = chain.sales_order.header.company_code.clone();
8966 chain.deliveries.iter().flat_map(move |del| {
8967 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8968 del.items.iter().filter_map({
8969 let doc_id = del.header.document_id.clone();
8970 let c_id = customer_id.clone();
8971 let company = cc.clone();
8972 move |item| {
8973 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8974 document_id: doc_id.clone(),
8975 material_id: mat_id.clone(),
8976 quantity: item.base.quantity,
8977 delivery_date,
8978 customer_id: c_id.clone(),
8979 company_code: company.clone(),
8980 })
8981 }
8982 })
8983 })
8984 })
8985 .collect();
8986
8987 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8988 info!("Cross-process links generated: {} links", links.len());
8989 stats.cross_process_link_count = links.len();
8990 links
8991 } else {
8992 Vec::new()
8993 };
8994
8995 self.check_resources_with_log("post-entity-relationships")?;
8996 Ok((entity_graph, cross_process_links))
8997 }
8998
8999 fn phase_industry_data(
9001 &self,
9002 stats: &mut EnhancedGenerationStatistics,
9003 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9004 if !self.config.industry_specific.enabled {
9005 return None;
9006 }
9007 info!("Phase 29: Generating industry-specific data");
9008 let output = datasynth_generators::industry::factory::generate_industry_output(
9009 self.config.global.industry,
9010 );
9011 stats.industry_gl_account_count = output.gl_accounts.len();
9012 info!(
9013 "Industry data generated: {} GL accounts for {:?}",
9014 output.gl_accounts.len(),
9015 self.config.global.industry
9016 );
9017 Some(output)
9018 }
9019
9020 fn phase_opening_balances(
9022 &mut self,
9023 coa: &Arc<ChartOfAccounts>,
9024 stats: &mut EnhancedGenerationStatistics,
9025 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9026 if !self.config.balance.generate_opening_balances {
9027 debug!("Phase 3b: Skipped (opening balance generation disabled)");
9028 return Ok(Vec::new());
9029 }
9030 info!("Phase 3b: Generating Opening Balances");
9031
9032 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9033 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9034 let fiscal_year = start_date.year();
9035
9036 let industry = match self.config.global.industry {
9037 IndustrySector::Manufacturing => IndustryType::Manufacturing,
9038 IndustrySector::Retail => IndustryType::Retail,
9039 IndustrySector::FinancialServices => IndustryType::Financial,
9040 IndustrySector::Healthcare => IndustryType::Healthcare,
9041 IndustrySector::Technology => IndustryType::Technology,
9042 _ => IndustryType::Manufacturing,
9043 };
9044
9045 let config = datasynth_generators::OpeningBalanceConfig {
9046 industry,
9047 ..Default::default()
9048 };
9049 let mut gen =
9050 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9051
9052 let mut results = Vec::new();
9053 for company in &self.config.companies {
9054 let spec = OpeningBalanceSpec::new(
9055 company.code.clone(),
9056 start_date,
9057 fiscal_year,
9058 company.currency.clone(),
9059 rust_decimal::Decimal::new(10_000_000, 0),
9060 industry,
9061 );
9062 let ob = gen.generate(&spec, coa, start_date, &company.code);
9063 results.push(ob);
9064 }
9065
9066 stats.opening_balance_count = results.len();
9067 info!("Opening balances generated: {} companies", results.len());
9068 self.check_resources_with_log("post-opening-balances")?;
9069
9070 Ok(results)
9071 }
9072
9073 fn phase_subledger_reconciliation(
9075 &mut self,
9076 subledger: &SubledgerSnapshot,
9077 entries: &[JournalEntry],
9078 stats: &mut EnhancedGenerationStatistics,
9079 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9080 if !self.config.balance.reconcile_subledgers {
9081 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9082 return Ok(Vec::new());
9083 }
9084 info!("Phase 9b: Reconciling GL to subledger balances");
9085
9086 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9087 .map(|d| d + chrono::Months::new(self.config.global.period_months))
9088 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9089
9090 let tracker_config = BalanceTrackerConfig {
9092 validate_on_each_entry: false,
9093 track_history: false,
9094 fail_on_validation_error: false,
9095 ..Default::default()
9096 };
9097 let recon_currency = self
9098 .config
9099 .companies
9100 .first()
9101 .map(|c| c.currency.clone())
9102 .unwrap_or_else(|| "USD".to_string());
9103 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9104 let validation_errors = tracker.apply_entries(entries);
9105 if !validation_errors.is_empty() {
9106 warn!(
9107 error_count = validation_errors.len(),
9108 "Balance tracker encountered validation errors during subledger reconciliation"
9109 );
9110 for err in &validation_errors {
9111 debug!("Balance validation error: {:?}", err);
9112 }
9113 }
9114
9115 let mut engine = datasynth_generators::ReconciliationEngine::new(
9116 datasynth_generators::ReconciliationConfig::default(),
9117 );
9118
9119 let mut results = Vec::new();
9120 let company_code = self
9121 .config
9122 .companies
9123 .first()
9124 .map(|c| c.code.as_str())
9125 .unwrap_or("1000");
9126
9127 if !subledger.ar_invoices.is_empty() {
9129 let gl_balance = tracker
9130 .get_account_balance(
9131 company_code,
9132 datasynth_core::accounts::control_accounts::AR_CONTROL,
9133 )
9134 .map(|b| b.closing_balance)
9135 .unwrap_or_default();
9136 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9137 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9138 }
9139
9140 if !subledger.ap_invoices.is_empty() {
9142 let gl_balance = tracker
9143 .get_account_balance(
9144 company_code,
9145 datasynth_core::accounts::control_accounts::AP_CONTROL,
9146 )
9147 .map(|b| b.closing_balance)
9148 .unwrap_or_default();
9149 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9150 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9151 }
9152
9153 if !subledger.fa_records.is_empty() {
9155 let gl_asset_balance = tracker
9156 .get_account_balance(
9157 company_code,
9158 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9159 )
9160 .map(|b| b.closing_balance)
9161 .unwrap_or_default();
9162 let gl_accum_depr_balance = tracker
9163 .get_account_balance(
9164 company_code,
9165 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9166 )
9167 .map(|b| b.closing_balance)
9168 .unwrap_or_default();
9169 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9170 subledger.fa_records.iter().collect();
9171 let (asset_recon, depr_recon) = engine.reconcile_fa(
9172 company_code,
9173 end_date,
9174 gl_asset_balance,
9175 gl_accum_depr_balance,
9176 &fa_refs,
9177 );
9178 results.push(asset_recon);
9179 results.push(depr_recon);
9180 }
9181
9182 if !subledger.inventory_positions.is_empty() {
9184 let gl_balance = tracker
9185 .get_account_balance(
9186 company_code,
9187 datasynth_core::accounts::control_accounts::INVENTORY,
9188 )
9189 .map(|b| b.closing_balance)
9190 .unwrap_or_default();
9191 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9192 subledger.inventory_positions.iter().collect();
9193 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9194 }
9195
9196 stats.subledger_reconciliation_count = results.len();
9197 let passed = results.iter().filter(|r| r.is_balanced()).count();
9198 let failed = results.len() - passed;
9199 info!(
9200 "Subledger reconciliation: {} checks, {} passed, {} failed",
9201 results.len(),
9202 passed,
9203 failed
9204 );
9205 self.check_resources_with_log("post-subledger-reconciliation")?;
9206
9207 Ok(results)
9208 }
9209
9210 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9212 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9213
9214 let coa_framework = self.resolve_coa_framework();
9215
9216 let mut gen = ChartOfAccountsGenerator::new(
9217 self.config.chart_of_accounts.complexity,
9218 self.config.global.industry,
9219 self.seed,
9220 )
9221 .with_coa_framework(coa_framework);
9222
9223 let coa = Arc::new(gen.generate());
9224 self.coa = Some(Arc::clone(&coa));
9225
9226 if let Some(pb) = pb {
9227 pb.finish_with_message("Chart of Accounts complete");
9228 }
9229
9230 Ok(coa)
9231 }
9232
9233 fn generate_master_data(&mut self) -> SynthResult<()> {
9235 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9236 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9237 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9238
9239 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
9241
9242 let pack = self.primary_pack().clone();
9244
9245 let vendors_per_company = self.phase_config.vendors_per_company;
9247 let customers_per_company = self.phase_config.customers_per_company;
9248 let materials_per_company = self.phase_config.materials_per_company;
9249 let assets_per_company = self.phase_config.assets_per_company;
9250 let coa_framework = self.resolve_coa_framework();
9251
9252 let per_company_results: Vec<_> = self
9255 .config
9256 .companies
9257 .par_iter()
9258 .enumerate()
9259 .map(|(i, company)| {
9260 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9261 let pack = pack.clone();
9262
9263 let mut vendor_gen = VendorGenerator::new(company_seed);
9265 vendor_gen.set_country_pack(pack.clone());
9266 vendor_gen.set_coa_framework(coa_framework);
9267 vendor_gen.set_counter_offset(i * vendors_per_company);
9268 if self.config.vendor_network.enabled {
9270 let vn = &self.config.vendor_network;
9271 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9272 enabled: true,
9273 depth: vn.depth,
9274 tier1_count: datasynth_generators::TierCountConfig::new(
9275 vn.tier1.min,
9276 vn.tier1.max,
9277 ),
9278 tier2_per_parent: datasynth_generators::TierCountConfig::new(
9279 vn.tier2_per_parent.min,
9280 vn.tier2_per_parent.max,
9281 ),
9282 tier3_per_parent: datasynth_generators::TierCountConfig::new(
9283 vn.tier3_per_parent.min,
9284 vn.tier3_per_parent.max,
9285 ),
9286 cluster_distribution: datasynth_generators::ClusterDistribution {
9287 reliable_strategic: vn.clusters.reliable_strategic,
9288 standard_operational: vn.clusters.standard_operational,
9289 transactional: vn.clusters.transactional,
9290 problematic: vn.clusters.problematic,
9291 },
9292 concentration_limits: datasynth_generators::ConcentrationLimits {
9293 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9294 max_top5: vn.dependencies.top_5_concentration,
9295 },
9296 ..datasynth_generators::VendorNetworkConfig::default()
9297 });
9298 }
9299 let vendor_pool =
9300 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9301
9302 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9304 customer_gen.set_country_pack(pack.clone());
9305 customer_gen.set_coa_framework(coa_framework);
9306 customer_gen.set_counter_offset(i * customers_per_company);
9307 if self.config.customer_segmentation.enabled {
9309 let cs = &self.config.customer_segmentation;
9310 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9311 enabled: true,
9312 segment_distribution: datasynth_generators::SegmentDistribution {
9313 enterprise: cs.value_segments.enterprise.customer_share,
9314 mid_market: cs.value_segments.mid_market.customer_share,
9315 smb: cs.value_segments.smb.customer_share,
9316 consumer: cs.value_segments.consumer.customer_share,
9317 },
9318 referral_config: datasynth_generators::ReferralConfig {
9319 enabled: cs.networks.referrals.enabled,
9320 referral_rate: cs.networks.referrals.referral_rate,
9321 ..Default::default()
9322 },
9323 hierarchy_config: datasynth_generators::HierarchyConfig {
9324 enabled: cs.networks.corporate_hierarchies.enabled,
9325 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9326 ..Default::default()
9327 },
9328 ..Default::default()
9329 };
9330 customer_gen.set_segmentation_config(seg_cfg);
9331 }
9332 let customer_pool = customer_gen.generate_customer_pool(
9333 customers_per_company,
9334 &company.code,
9335 start_date,
9336 );
9337
9338 let mut material_gen = MaterialGenerator::new(company_seed + 200);
9340 material_gen.set_country_pack(pack.clone());
9341 material_gen.set_counter_offset(i * materials_per_company);
9342 let material_pool = material_gen.generate_material_pool(
9343 materials_per_company,
9344 &company.code,
9345 start_date,
9346 );
9347
9348 let mut asset_gen = AssetGenerator::new(company_seed + 300);
9350 let asset_pool = asset_gen.generate_asset_pool(
9351 assets_per_company,
9352 &company.code,
9353 (start_date, end_date),
9354 );
9355
9356 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9358 employee_gen.set_country_pack(pack);
9359 let employee_pool =
9360 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9361
9362 let employee_change_history =
9364 employee_gen.generate_all_change_history(&employee_pool, end_date);
9365
9366 let employee_ids: Vec<String> = employee_pool
9368 .employees
9369 .iter()
9370 .map(|e| e.employee_id.clone())
9371 .collect();
9372 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9373 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9374
9375 (
9376 vendor_pool.vendors,
9377 customer_pool.customers,
9378 material_pool.materials,
9379 asset_pool.assets,
9380 employee_pool.employees,
9381 employee_change_history,
9382 cost_centers,
9383 )
9384 })
9385 .collect();
9386
9387 for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9389 per_company_results
9390 {
9391 self.master_data.vendors.extend(vendors);
9392 self.master_data.customers.extend(customers);
9393 self.master_data.materials.extend(materials);
9394 self.master_data.assets.extend(assets);
9395 self.master_data.employees.extend(employees);
9396 self.master_data.cost_centers.extend(cost_centers);
9397 self.master_data
9398 .employee_change_history
9399 .extend(change_history);
9400 }
9401
9402 if let Some(pb) = &pb {
9403 pb.inc(total);
9404 }
9405 if let Some(pb) = pb {
9406 pb.finish_with_message("Master data generation complete");
9407 }
9408
9409 Ok(())
9410 }
9411
9412 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9414 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9415 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9416
9417 let months = (self.config.global.period_months as usize).max(1);
9420 let p2p_count = self
9421 .phase_config
9422 .p2p_chains
9423 .min(self.master_data.vendors.len() * 2 * months);
9424 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9425
9426 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9428 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9429 p2p_gen.set_country_pack(self.primary_pack().clone());
9430
9431 for i in 0..p2p_count {
9432 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9433 let materials: Vec<&Material> = self
9434 .master_data
9435 .materials
9436 .iter()
9437 .skip(i % self.master_data.materials.len().max(1))
9438 .take(2.min(self.master_data.materials.len()))
9439 .collect();
9440
9441 if materials.is_empty() {
9442 continue;
9443 }
9444
9445 let company = &self.config.companies[i % self.config.companies.len()];
9446 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9447 let fiscal_period = po_date.month() as u8;
9448 let created_by = if self.master_data.employees.is_empty() {
9449 "SYSTEM"
9450 } else {
9451 self.master_data.employees[i % self.master_data.employees.len()]
9452 .user_id
9453 .as_str()
9454 };
9455
9456 let chain = p2p_gen.generate_chain(
9457 &company.code,
9458 vendor,
9459 &materials,
9460 po_date,
9461 start_date.year() as u16,
9462 fiscal_period,
9463 created_by,
9464 );
9465
9466 flows.purchase_orders.push(chain.purchase_order.clone());
9468 flows.goods_receipts.extend(chain.goods_receipts.clone());
9469 if let Some(vi) = &chain.vendor_invoice {
9470 flows.vendor_invoices.push(vi.clone());
9471 }
9472 if let Some(payment) = &chain.payment {
9473 flows.payments.push(payment.clone());
9474 }
9475 for remainder in &chain.remainder_payments {
9476 flows.payments.push(remainder.clone());
9477 }
9478 flows.p2p_chains.push(chain);
9479
9480 if let Some(pb) = &pb {
9481 pb.inc(1);
9482 }
9483 }
9484
9485 if let Some(pb) = pb {
9486 pb.finish_with_message("P2P document flows complete");
9487 }
9488
9489 let o2c_count = self
9492 .phase_config
9493 .o2c_chains
9494 .min(self.master_data.customers.len() * 2 * months);
9495 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9496
9497 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9499 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9500 o2c_gen.set_country_pack(self.primary_pack().clone());
9501
9502 for i in 0..o2c_count {
9503 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9504 let materials: Vec<&Material> = self
9505 .master_data
9506 .materials
9507 .iter()
9508 .skip(i % self.master_data.materials.len().max(1))
9509 .take(2.min(self.master_data.materials.len()))
9510 .collect();
9511
9512 if materials.is_empty() {
9513 continue;
9514 }
9515
9516 let company = &self.config.companies[i % self.config.companies.len()];
9517 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9518 let fiscal_period = so_date.month() as u8;
9519 let created_by = if self.master_data.employees.is_empty() {
9520 "SYSTEM"
9521 } else {
9522 self.master_data.employees[i % self.master_data.employees.len()]
9523 .user_id
9524 .as_str()
9525 };
9526
9527 let chain = o2c_gen.generate_chain(
9528 &company.code,
9529 customer,
9530 &materials,
9531 so_date,
9532 start_date.year() as u16,
9533 fiscal_period,
9534 created_by,
9535 );
9536
9537 flows.sales_orders.push(chain.sales_order.clone());
9539 flows.deliveries.extend(chain.deliveries.clone());
9540 if let Some(ci) = &chain.customer_invoice {
9541 flows.customer_invoices.push(ci.clone());
9542 }
9543 if let Some(receipt) = &chain.customer_receipt {
9544 flows.payments.push(receipt.clone());
9545 }
9546 for receipt in &chain.remainder_receipts {
9548 flows.payments.push(receipt.clone());
9549 }
9550 flows.o2c_chains.push(chain);
9551
9552 if let Some(pb) = &pb {
9553 pb.inc(1);
9554 }
9555 }
9556
9557 if let Some(pb) = pb {
9558 pb.finish_with_message("O2C document flows complete");
9559 }
9560
9561 {
9565 let mut refs = Vec::new();
9566 for doc in &flows.purchase_orders {
9567 refs.extend(doc.header.document_references.iter().cloned());
9568 }
9569 for doc in &flows.goods_receipts {
9570 refs.extend(doc.header.document_references.iter().cloned());
9571 }
9572 for doc in &flows.vendor_invoices {
9573 refs.extend(doc.header.document_references.iter().cloned());
9574 }
9575 for doc in &flows.sales_orders {
9576 refs.extend(doc.header.document_references.iter().cloned());
9577 }
9578 for doc in &flows.deliveries {
9579 refs.extend(doc.header.document_references.iter().cloned());
9580 }
9581 for doc in &flows.customer_invoices {
9582 refs.extend(doc.header.document_references.iter().cloned());
9583 }
9584 for doc in &flows.payments {
9585 refs.extend(doc.header.document_references.iter().cloned());
9586 }
9587 debug!(
9588 "Collected {} document cross-references from document headers",
9589 refs.len()
9590 );
9591 flows.document_references = refs;
9592 }
9593
9594 Ok(())
9595 }
9596
9597 fn generate_journal_entries(
9599 &mut self,
9600 coa: &Arc<ChartOfAccounts>,
9601 ) -> SynthResult<Vec<JournalEntry>> {
9602 use datasynth_core::traits::ParallelGenerator;
9603
9604 let total = self.calculate_total_transactions();
9605 let pb = self.create_progress_bar(total, "Generating Journal Entries");
9606
9607 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9608 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9609 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9610
9611 let company_codes: Vec<String> = self
9612 .config
9613 .companies
9614 .iter()
9615 .map(|c| c.code.clone())
9616 .collect();
9617
9618 let generator = JournalEntryGenerator::new_with_params(
9619 self.config.transactions.clone(),
9620 Arc::clone(coa),
9621 company_codes,
9622 start_date,
9623 end_date,
9624 self.seed,
9625 );
9626
9627 let je_pack = self.primary_pack();
9631
9632 let mut generator = generator
9633 .with_master_data(
9634 &self.master_data.vendors,
9635 &self.master_data.customers,
9636 &self.master_data.materials,
9637 )
9638 .with_country_pack_names(je_pack)
9639 .with_country_pack_temporal(
9640 self.config.temporal_patterns.clone(),
9641 self.seed + 200,
9642 je_pack,
9643 )
9644 .with_persona_errors(true)
9645 .with_fraud_config(self.config.fraud.clone());
9646
9647 if self.config.temporal.enabled {
9649 let drift_config = self.config.temporal.to_core_config();
9650 generator = generator.with_drift_config(drift_config, self.seed + 100);
9651 }
9652
9653 self.check_memory_limit()?;
9655
9656 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9658
9659 let entries = if total >= 10_000 && num_threads > 1 {
9663 let sub_generators = generator.split(num_threads);
9666 let entries_per_thread = total as usize / num_threads;
9667 let remainder = total as usize % num_threads;
9668
9669 let batches: Vec<Vec<JournalEntry>> = sub_generators
9670 .into_par_iter()
9671 .enumerate()
9672 .map(|(i, mut gen)| {
9673 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9674 gen.generate_batch(count)
9675 })
9676 .collect();
9677
9678 let entries = JournalEntryGenerator::merge_results(batches);
9680
9681 if let Some(pb) = &pb {
9682 pb.inc(total);
9683 }
9684 entries
9685 } else {
9686 let mut entries = Vec::with_capacity(total as usize);
9688 for _ in 0..total {
9689 let entry = generator.generate();
9690 entries.push(entry);
9691 if let Some(pb) = &pb {
9692 pb.inc(1);
9693 }
9694 }
9695 entries
9696 };
9697
9698 if let Some(pb) = pb {
9699 pb.finish_with_message("Journal entries complete");
9700 }
9701
9702 Ok(entries)
9703 }
9704
9705 fn generate_jes_from_document_flows(
9710 &mut self,
9711 flows: &DocumentFlowSnapshot,
9712 ) -> SynthResult<Vec<JournalEntry>> {
9713 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9714 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9715
9716 let je_config = match self.resolve_coa_framework() {
9717 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9718 CoAFramework::GermanSkr04 => {
9719 let fa = datasynth_core::FrameworkAccounts::german_gaap();
9720 DocumentFlowJeConfig::from(&fa)
9721 }
9722 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9723 };
9724
9725 let populate_fec = je_config.populate_fec_fields;
9726 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9727
9728 if populate_fec {
9732 let mut aux_lookup = std::collections::HashMap::new();
9733 for vendor in &self.master_data.vendors {
9734 if let Some(ref aux) = vendor.auxiliary_gl_account {
9735 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9736 }
9737 }
9738 for customer in &self.master_data.customers {
9739 if let Some(ref aux) = customer.auxiliary_gl_account {
9740 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9741 }
9742 }
9743 if !aux_lookup.is_empty() {
9744 generator.set_auxiliary_account_lookup(aux_lookup);
9745 }
9746 }
9747
9748 let mut entries = Vec::new();
9749
9750 for chain in &flows.p2p_chains {
9752 let chain_entries = generator.generate_from_p2p_chain(chain);
9753 entries.extend(chain_entries);
9754 if let Some(pb) = &pb {
9755 pb.inc(1);
9756 }
9757 }
9758
9759 for chain in &flows.o2c_chains {
9761 let chain_entries = generator.generate_from_o2c_chain(chain);
9762 entries.extend(chain_entries);
9763 if let Some(pb) = &pb {
9764 pb.inc(1);
9765 }
9766 }
9767
9768 if let Some(pb) = pb {
9769 pb.finish_with_message(format!(
9770 "Generated {} JEs from document flows",
9771 entries.len()
9772 ));
9773 }
9774
9775 Ok(entries)
9776 }
9777
9778 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9784 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9785
9786 let mut jes = Vec::with_capacity(payroll_runs.len());
9787
9788 for run in payroll_runs {
9789 let mut je = JournalEntry::new_simple(
9790 format!("JE-PAYROLL-{}", run.payroll_id),
9791 run.company_code.clone(),
9792 run.run_date,
9793 format!("Payroll {}", run.payroll_id),
9794 );
9795
9796 je.add_line(JournalEntryLine {
9798 line_number: 1,
9799 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9800 debit_amount: run.total_gross,
9801 reference: Some(run.payroll_id.clone()),
9802 text: Some(format!(
9803 "Payroll {} ({} employees)",
9804 run.payroll_id, run.employee_count
9805 )),
9806 ..Default::default()
9807 });
9808
9809 je.add_line(JournalEntryLine {
9811 line_number: 2,
9812 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9813 credit_amount: run.total_gross,
9814 reference: Some(run.payroll_id.clone()),
9815 ..Default::default()
9816 });
9817
9818 jes.push(je);
9819 }
9820
9821 jes
9822 }
9823
9824 fn link_document_flows_to_subledgers(
9829 &mut self,
9830 flows: &DocumentFlowSnapshot,
9831 ) -> SynthResult<SubledgerSnapshot> {
9832 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9833 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9834
9835 let vendor_names: std::collections::HashMap<String, String> = self
9837 .master_data
9838 .vendors
9839 .iter()
9840 .map(|v| (v.vendor_id.clone(), v.name.clone()))
9841 .collect();
9842 let customer_names: std::collections::HashMap<String, String> = self
9843 .master_data
9844 .customers
9845 .iter()
9846 .map(|c| (c.customer_id.clone(), c.name.clone()))
9847 .collect();
9848
9849 let mut linker = DocumentFlowLinker::new()
9850 .with_vendor_names(vendor_names)
9851 .with_customer_names(customer_names);
9852
9853 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9855 if let Some(pb) = &pb {
9856 pb.inc(flows.vendor_invoices.len() as u64);
9857 }
9858
9859 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9861 if let Some(pb) = &pb {
9862 pb.inc(flows.customer_invoices.len() as u64);
9863 }
9864
9865 if let Some(pb) = pb {
9866 pb.finish_with_message(format!(
9867 "Linked {} AP and {} AR invoices",
9868 ap_invoices.len(),
9869 ar_invoices.len()
9870 ));
9871 }
9872
9873 Ok(SubledgerSnapshot {
9874 ap_invoices,
9875 ar_invoices,
9876 fa_records: Vec::new(),
9877 inventory_positions: Vec::new(),
9878 inventory_movements: Vec::new(),
9879 ar_aging_reports: Vec::new(),
9881 ap_aging_reports: Vec::new(),
9882 depreciation_runs: Vec::new(),
9884 inventory_valuations: Vec::new(),
9885 dunning_runs: Vec::new(),
9887 dunning_letters: Vec::new(),
9888 })
9889 }
9890
9891 #[allow(clippy::too_many_arguments)]
9896 fn generate_ocpm_events(
9897 &mut self,
9898 flows: &DocumentFlowSnapshot,
9899 sourcing: &SourcingSnapshot,
9900 hr: &HrSnapshot,
9901 manufacturing: &ManufacturingSnapshot,
9902 banking: &BankingSnapshot,
9903 audit: &AuditSnapshot,
9904 financial_reporting: &FinancialReportingSnapshot,
9905 ) -> SynthResult<OcpmSnapshot> {
9906 let total_chains = flows.p2p_chains.len()
9907 + flows.o2c_chains.len()
9908 + sourcing.sourcing_projects.len()
9909 + hr.payroll_runs.len()
9910 + manufacturing.production_orders.len()
9911 + banking.customers.len()
9912 + audit.engagements.len()
9913 + financial_reporting.bank_reconciliations.len();
9914 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9915
9916 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9918 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9919
9920 let ocpm_config = OcpmGeneratorConfig {
9922 generate_p2p: true,
9923 generate_o2c: true,
9924 generate_s2c: !sourcing.sourcing_projects.is_empty(),
9925 generate_h2r: !hr.payroll_runs.is_empty(),
9926 generate_mfg: !manufacturing.production_orders.is_empty(),
9927 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9928 generate_bank: !banking.customers.is_empty(),
9929 generate_audit: !audit.engagements.is_empty(),
9930 happy_path_rate: 0.75,
9931 exception_path_rate: 0.20,
9932 error_path_rate: 0.05,
9933 add_duration_variability: true,
9934 duration_std_dev_factor: 0.3,
9935 };
9936 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9937 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9938
9939 let available_users: Vec<String> = self
9941 .master_data
9942 .employees
9943 .iter()
9944 .take(20)
9945 .map(|e| e.user_id.clone())
9946 .collect();
9947
9948 let fallback_date =
9950 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9951 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9952 .unwrap_or(fallback_date);
9953 let base_midnight = base_date
9954 .and_hms_opt(0, 0, 0)
9955 .expect("midnight is always valid");
9956 let base_datetime =
9957 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9958
9959 let add_result = |event_log: &mut OcpmEventLog,
9961 result: datasynth_ocpm::CaseGenerationResult| {
9962 for event in result.events {
9963 event_log.add_event(event);
9964 }
9965 for object in result.objects {
9966 event_log.add_object(object);
9967 }
9968 for relationship in result.relationships {
9969 event_log.add_relationship(relationship);
9970 }
9971 for corr in result.correlation_events {
9972 event_log.add_correlation_event(corr);
9973 }
9974 event_log.add_case(result.case_trace);
9975 };
9976
9977 for chain in &flows.p2p_chains {
9979 let po = &chain.purchase_order;
9980 let documents = P2pDocuments::new(
9981 &po.header.document_id,
9982 &po.vendor_id,
9983 &po.header.company_code,
9984 po.total_net_amount,
9985 &po.header.currency,
9986 &ocpm_uuid_factory,
9987 )
9988 .with_goods_receipt(
9989 chain
9990 .goods_receipts
9991 .first()
9992 .map(|gr| gr.header.document_id.as_str())
9993 .unwrap_or(""),
9994 &ocpm_uuid_factory,
9995 )
9996 .with_invoice(
9997 chain
9998 .vendor_invoice
9999 .as_ref()
10000 .map(|vi| vi.header.document_id.as_str())
10001 .unwrap_or(""),
10002 &ocpm_uuid_factory,
10003 )
10004 .with_payment(
10005 chain
10006 .payment
10007 .as_ref()
10008 .map(|p| p.header.document_id.as_str())
10009 .unwrap_or(""),
10010 &ocpm_uuid_factory,
10011 );
10012
10013 let start_time =
10014 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10015 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10016 add_result(&mut event_log, result);
10017
10018 if let Some(pb) = &pb {
10019 pb.inc(1);
10020 }
10021 }
10022
10023 for chain in &flows.o2c_chains {
10025 let so = &chain.sales_order;
10026 let documents = O2cDocuments::new(
10027 &so.header.document_id,
10028 &so.customer_id,
10029 &so.header.company_code,
10030 so.total_net_amount,
10031 &so.header.currency,
10032 &ocpm_uuid_factory,
10033 )
10034 .with_delivery(
10035 chain
10036 .deliveries
10037 .first()
10038 .map(|d| d.header.document_id.as_str())
10039 .unwrap_or(""),
10040 &ocpm_uuid_factory,
10041 )
10042 .with_invoice(
10043 chain
10044 .customer_invoice
10045 .as_ref()
10046 .map(|ci| ci.header.document_id.as_str())
10047 .unwrap_or(""),
10048 &ocpm_uuid_factory,
10049 )
10050 .with_receipt(
10051 chain
10052 .customer_receipt
10053 .as_ref()
10054 .map(|r| r.header.document_id.as_str())
10055 .unwrap_or(""),
10056 &ocpm_uuid_factory,
10057 );
10058
10059 let start_time =
10060 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10061 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10062 add_result(&mut event_log, result);
10063
10064 if let Some(pb) = &pb {
10065 pb.inc(1);
10066 }
10067 }
10068
10069 for project in &sourcing.sourcing_projects {
10071 let vendor_id = sourcing
10073 .contracts
10074 .iter()
10075 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10076 .map(|c| c.vendor_id.clone())
10077 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10078 .or_else(|| {
10079 self.master_data
10080 .vendors
10081 .first()
10082 .map(|v| v.vendor_id.clone())
10083 })
10084 .unwrap_or_else(|| "V000".to_string());
10085 let mut docs = S2cDocuments::new(
10086 &project.project_id,
10087 &vendor_id,
10088 &project.company_code,
10089 project.estimated_annual_spend,
10090 &ocpm_uuid_factory,
10091 );
10092 if let Some(rfx) = sourcing
10094 .rfx_events
10095 .iter()
10096 .find(|r| r.sourcing_project_id == project.project_id)
10097 {
10098 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10099 if let Some(bid) = sourcing.bids.iter().find(|b| {
10101 b.rfx_id == rfx.rfx_id
10102 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10103 }) {
10104 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10105 }
10106 }
10107 if let Some(contract) = sourcing
10109 .contracts
10110 .iter()
10111 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10112 {
10113 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10114 }
10115 let start_time = base_datetime - chrono::Duration::days(90);
10116 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10117 add_result(&mut event_log, result);
10118
10119 if let Some(pb) = &pb {
10120 pb.inc(1);
10121 }
10122 }
10123
10124 for run in &hr.payroll_runs {
10126 let employee_id = hr
10128 .payroll_line_items
10129 .iter()
10130 .find(|li| li.payroll_id == run.payroll_id)
10131 .map(|li| li.employee_id.as_str())
10132 .unwrap_or("EMP000");
10133 let docs = H2rDocuments::new(
10134 &run.payroll_id,
10135 employee_id,
10136 &run.company_code,
10137 run.total_gross,
10138 &ocpm_uuid_factory,
10139 )
10140 .with_time_entries(
10141 hr.time_entries
10142 .iter()
10143 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10144 .take(5)
10145 .map(|t| t.entry_id.as_str())
10146 .collect(),
10147 );
10148 let start_time = base_datetime - chrono::Duration::days(30);
10149 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10150 add_result(&mut event_log, result);
10151
10152 if let Some(pb) = &pb {
10153 pb.inc(1);
10154 }
10155 }
10156
10157 for order in &manufacturing.production_orders {
10159 let mut docs = MfgDocuments::new(
10160 &order.order_id,
10161 &order.material_id,
10162 &order.company_code,
10163 order.planned_quantity,
10164 &ocpm_uuid_factory,
10165 )
10166 .with_operations(
10167 order
10168 .operations
10169 .iter()
10170 .map(|o| format!("OP-{:04}", o.operation_number))
10171 .collect::<Vec<_>>()
10172 .iter()
10173 .map(std::string::String::as_str)
10174 .collect(),
10175 );
10176 if let Some(insp) = manufacturing
10178 .quality_inspections
10179 .iter()
10180 .find(|i| i.reference_id == order.order_id)
10181 {
10182 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10183 }
10184 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10186 cc.items
10187 .iter()
10188 .any(|item| item.material_id == order.material_id)
10189 }) {
10190 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10191 }
10192 let start_time = base_datetime - chrono::Duration::days(60);
10193 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10194 add_result(&mut event_log, result);
10195
10196 if let Some(pb) = &pb {
10197 pb.inc(1);
10198 }
10199 }
10200
10201 for customer in &banking.customers {
10203 let customer_id_str = customer.customer_id.to_string();
10204 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10205 if let Some(account) = banking
10207 .accounts
10208 .iter()
10209 .find(|a| a.primary_owner_id == customer.customer_id)
10210 {
10211 let account_id_str = account.account_id.to_string();
10212 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10213 let txn_strs: Vec<String> = banking
10215 .transactions
10216 .iter()
10217 .filter(|t| t.account_id == account.account_id)
10218 .take(10)
10219 .map(|t| t.transaction_id.to_string())
10220 .collect();
10221 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10222 let txn_amounts: Vec<rust_decimal::Decimal> = banking
10223 .transactions
10224 .iter()
10225 .filter(|t| t.account_id == account.account_id)
10226 .take(10)
10227 .map(|t| t.amount)
10228 .collect();
10229 if !txn_ids.is_empty() {
10230 docs = docs.with_transactions(txn_ids, txn_amounts);
10231 }
10232 }
10233 let start_time = base_datetime - chrono::Duration::days(180);
10234 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10235 add_result(&mut event_log, result);
10236
10237 if let Some(pb) = &pb {
10238 pb.inc(1);
10239 }
10240 }
10241
10242 for engagement in &audit.engagements {
10244 let engagement_id_str = engagement.engagement_id.to_string();
10245 let docs = AuditDocuments::new(
10246 &engagement_id_str,
10247 &engagement.client_entity_id,
10248 &ocpm_uuid_factory,
10249 )
10250 .with_workpapers(
10251 audit
10252 .workpapers
10253 .iter()
10254 .filter(|w| w.engagement_id == engagement.engagement_id)
10255 .take(10)
10256 .map(|w| w.workpaper_id.to_string())
10257 .collect::<Vec<_>>()
10258 .iter()
10259 .map(std::string::String::as_str)
10260 .collect(),
10261 )
10262 .with_evidence(
10263 audit
10264 .evidence
10265 .iter()
10266 .filter(|e| e.engagement_id == engagement.engagement_id)
10267 .take(10)
10268 .map(|e| e.evidence_id.to_string())
10269 .collect::<Vec<_>>()
10270 .iter()
10271 .map(std::string::String::as_str)
10272 .collect(),
10273 )
10274 .with_risks(
10275 audit
10276 .risk_assessments
10277 .iter()
10278 .filter(|r| r.engagement_id == engagement.engagement_id)
10279 .take(5)
10280 .map(|r| r.risk_id.to_string())
10281 .collect::<Vec<_>>()
10282 .iter()
10283 .map(std::string::String::as_str)
10284 .collect(),
10285 )
10286 .with_findings(
10287 audit
10288 .findings
10289 .iter()
10290 .filter(|f| f.engagement_id == engagement.engagement_id)
10291 .take(5)
10292 .map(|f| f.finding_id.to_string())
10293 .collect::<Vec<_>>()
10294 .iter()
10295 .map(std::string::String::as_str)
10296 .collect(),
10297 )
10298 .with_judgments(
10299 audit
10300 .judgments
10301 .iter()
10302 .filter(|j| j.engagement_id == engagement.engagement_id)
10303 .take(5)
10304 .map(|j| j.judgment_id.to_string())
10305 .collect::<Vec<_>>()
10306 .iter()
10307 .map(std::string::String::as_str)
10308 .collect(),
10309 );
10310 let start_time = base_datetime - chrono::Duration::days(120);
10311 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10312 add_result(&mut event_log, result);
10313
10314 if let Some(pb) = &pb {
10315 pb.inc(1);
10316 }
10317 }
10318
10319 for recon in &financial_reporting.bank_reconciliations {
10321 let docs = BankReconDocuments::new(
10322 &recon.reconciliation_id,
10323 &recon.bank_account_id,
10324 &recon.company_code,
10325 recon.bank_ending_balance,
10326 &ocpm_uuid_factory,
10327 )
10328 .with_statement_lines(
10329 recon
10330 .statement_lines
10331 .iter()
10332 .take(20)
10333 .map(|l| l.line_id.as_str())
10334 .collect(),
10335 )
10336 .with_reconciling_items(
10337 recon
10338 .reconciling_items
10339 .iter()
10340 .take(10)
10341 .map(|i| i.item_id.as_str())
10342 .collect(),
10343 );
10344 let start_time = base_datetime - chrono::Duration::days(30);
10345 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10346 add_result(&mut event_log, result);
10347
10348 if let Some(pb) = &pb {
10349 pb.inc(1);
10350 }
10351 }
10352
10353 event_log.compute_variants();
10355
10356 let summary = event_log.summary();
10357
10358 if let Some(pb) = pb {
10359 pb.finish_with_message(format!(
10360 "Generated {} OCPM events, {} objects",
10361 summary.event_count, summary.object_count
10362 ));
10363 }
10364
10365 Ok(OcpmSnapshot {
10366 event_count: summary.event_count,
10367 object_count: summary.object_count,
10368 case_count: summary.case_count,
10369 event_log: Some(event_log),
10370 })
10371 }
10372
10373 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10375 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10376
10377 let total_rate = if self.config.anomaly_injection.enabled {
10380 self.config.anomaly_injection.rates.total_rate
10381 } else if self.config.fraud.enabled {
10382 self.config.fraud.fraud_rate
10383 } else {
10384 0.02
10385 };
10386
10387 let fraud_rate = if self.config.anomaly_injection.enabled {
10388 self.config.anomaly_injection.rates.fraud_rate
10389 } else {
10390 AnomalyRateConfig::default().fraud_rate
10391 };
10392
10393 let error_rate = if self.config.anomaly_injection.enabled {
10394 self.config.anomaly_injection.rates.error_rate
10395 } else {
10396 AnomalyRateConfig::default().error_rate
10397 };
10398
10399 let process_issue_rate = if self.config.anomaly_injection.enabled {
10400 self.config.anomaly_injection.rates.process_rate
10401 } else {
10402 AnomalyRateConfig::default().process_issue_rate
10403 };
10404
10405 let anomaly_config = AnomalyInjectorConfig {
10406 rates: AnomalyRateConfig {
10407 total_rate,
10408 fraud_rate,
10409 error_rate,
10410 process_issue_rate,
10411 ..Default::default()
10412 },
10413 seed: self.seed + 5000,
10414 ..Default::default()
10415 };
10416
10417 let mut injector = AnomalyInjector::new(anomaly_config);
10418 let result = injector.process_entries(entries);
10419
10420 if let Some(pb) = &pb {
10421 pb.inc(entries.len() as u64);
10422 pb.finish_with_message("Anomaly injection complete");
10423 }
10424
10425 let mut by_type = HashMap::new();
10426 for label in &result.labels {
10427 *by_type
10428 .entry(format!("{:?}", label.anomaly_type))
10429 .or_insert(0) += 1;
10430 }
10431
10432 Ok(AnomalyLabels {
10433 labels: result.labels,
10434 summary: Some(result.summary),
10435 by_type,
10436 })
10437 }
10438
10439 fn validate_journal_entries(
10448 &mut self,
10449 entries: &[JournalEntry],
10450 ) -> SynthResult<BalanceValidationResult> {
10451 let clean_entries: Vec<&JournalEntry> = entries
10453 .iter()
10454 .filter(|e| {
10455 e.header
10456 .header_text
10457 .as_ref()
10458 .map(|t| !t.contains("[HUMAN_ERROR:"))
10459 .unwrap_or(true)
10460 })
10461 .collect();
10462
10463 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10464
10465 let config = BalanceTrackerConfig {
10467 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
10471 };
10472 let validation_currency = self
10473 .config
10474 .companies
10475 .first()
10476 .map(|c| c.currency.clone())
10477 .unwrap_or_else(|| "USD".to_string());
10478
10479 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10480
10481 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10483 let errors = tracker.apply_entries(&clean_refs);
10484
10485 if let Some(pb) = &pb {
10486 pb.inc(entries.len() as u64);
10487 }
10488
10489 let has_unbalanced = tracker
10492 .get_validation_errors()
10493 .iter()
10494 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10495
10496 let mut all_errors = errors;
10499 all_errors.extend(tracker.get_validation_errors().iter().cloned());
10500 let company_codes: Vec<String> = self
10501 .config
10502 .companies
10503 .iter()
10504 .map(|c| c.code.clone())
10505 .collect();
10506
10507 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10508 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10509 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10510
10511 for company_code in &company_codes {
10512 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10513 all_errors.push(e);
10514 }
10515 }
10516
10517 let stats = tracker.get_statistics();
10519
10520 let is_balanced = all_errors.is_empty();
10522
10523 if let Some(pb) = pb {
10524 let msg = if is_balanced {
10525 "Balance validation passed"
10526 } else {
10527 "Balance validation completed with errors"
10528 };
10529 pb.finish_with_message(msg);
10530 }
10531
10532 Ok(BalanceValidationResult {
10533 validated: true,
10534 is_balanced,
10535 entries_processed: stats.entries_processed,
10536 total_debits: stats.total_debits,
10537 total_credits: stats.total_credits,
10538 accounts_tracked: stats.accounts_tracked,
10539 companies_tracked: stats.companies_tracked,
10540 validation_errors: all_errors,
10541 has_unbalanced_entries: has_unbalanced,
10542 })
10543 }
10544
10545 fn inject_data_quality(
10550 &mut self,
10551 entries: &mut [JournalEntry],
10552 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10553 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10554
10555 let config = if self.config.data_quality.enabled {
10558 let dq = &self.config.data_quality;
10559 DataQualityConfig {
10560 enable_missing_values: dq.missing_values.enabled,
10561 missing_values: datasynth_generators::MissingValueConfig {
10562 global_rate: dq.effective_missing_rate(),
10563 ..Default::default()
10564 },
10565 enable_format_variations: dq.format_variations.enabled,
10566 format_variations: datasynth_generators::FormatVariationConfig {
10567 date_variation_rate: dq.format_variations.dates.rate,
10568 amount_variation_rate: dq.format_variations.amounts.rate,
10569 identifier_variation_rate: dq.format_variations.identifiers.rate,
10570 ..Default::default()
10571 },
10572 enable_duplicates: dq.duplicates.enabled,
10573 duplicates: datasynth_generators::DuplicateConfig {
10574 duplicate_rate: dq.effective_duplicate_rate(),
10575 ..Default::default()
10576 },
10577 enable_typos: dq.typos.enabled,
10578 typos: datasynth_generators::TypoConfig {
10579 char_error_rate: dq.effective_typo_rate(),
10580 ..Default::default()
10581 },
10582 enable_encoding_issues: dq.encoding_issues.enabled,
10583 encoding_issue_rate: dq.encoding_issues.rate,
10584 seed: self.seed.wrapping_add(77), track_statistics: true,
10586 }
10587 } else {
10588 DataQualityConfig::minimal()
10589 };
10590 let mut injector = DataQualityInjector::new(config);
10591
10592 injector.set_country_pack(self.primary_pack().clone());
10594
10595 let context = HashMap::new();
10597
10598 for entry in entries.iter_mut() {
10599 if let Some(text) = &entry.header.header_text {
10601 let processed = injector.process_text_field(
10602 "header_text",
10603 text,
10604 &entry.header.document_id.to_string(),
10605 &context,
10606 );
10607 match processed {
10608 Some(new_text) if new_text != *text => {
10609 entry.header.header_text = Some(new_text);
10610 }
10611 None => {
10612 entry.header.header_text = None; }
10614 _ => {}
10615 }
10616 }
10617
10618 if let Some(ref_text) = &entry.header.reference {
10620 let processed = injector.process_text_field(
10621 "reference",
10622 ref_text,
10623 &entry.header.document_id.to_string(),
10624 &context,
10625 );
10626 match processed {
10627 Some(new_text) if new_text != *ref_text => {
10628 entry.header.reference = Some(new_text);
10629 }
10630 None => {
10631 entry.header.reference = None;
10632 }
10633 _ => {}
10634 }
10635 }
10636
10637 let user_persona = entry.header.user_persona.clone();
10639 if let Some(processed) = injector.process_text_field(
10640 "user_persona",
10641 &user_persona,
10642 &entry.header.document_id.to_string(),
10643 &context,
10644 ) {
10645 if processed != user_persona {
10646 entry.header.user_persona = processed;
10647 }
10648 }
10649
10650 for line in &mut entry.lines {
10652 if let Some(ref text) = line.line_text {
10654 let processed = injector.process_text_field(
10655 "line_text",
10656 text,
10657 &entry.header.document_id.to_string(),
10658 &context,
10659 );
10660 match processed {
10661 Some(new_text) if new_text != *text => {
10662 line.line_text = Some(new_text);
10663 }
10664 None => {
10665 line.line_text = None;
10666 }
10667 _ => {}
10668 }
10669 }
10670
10671 if let Some(cc) = &line.cost_center {
10673 let processed = injector.process_text_field(
10674 "cost_center",
10675 cc,
10676 &entry.header.document_id.to_string(),
10677 &context,
10678 );
10679 match processed {
10680 Some(new_cc) if new_cc != *cc => {
10681 line.cost_center = Some(new_cc);
10682 }
10683 None => {
10684 line.cost_center = None;
10685 }
10686 _ => {}
10687 }
10688 }
10689 }
10690
10691 if let Some(pb) = &pb {
10692 pb.inc(1);
10693 }
10694 }
10695
10696 if let Some(pb) = pb {
10697 pb.finish_with_message("Data quality injection complete");
10698 }
10699
10700 let quality_issues = injector.issues().to_vec();
10701 Ok((injector.stats().clone(), quality_issues))
10702 }
10703
10704 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10715 let use_fsm = self
10717 .config
10718 .audit
10719 .fsm
10720 .as_ref()
10721 .map(|f| f.enabled)
10722 .unwrap_or(false);
10723
10724 if use_fsm {
10725 return self.generate_audit_data_with_fsm(entries);
10726 }
10727
10728 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10730 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10731 let fiscal_year = start_date.year() as u16;
10732 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10733
10734 let total_revenue: rust_decimal::Decimal = entries
10736 .iter()
10737 .flat_map(|e| e.lines.iter())
10738 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10739 .map(|l| l.credit_amount)
10740 .sum();
10741
10742 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10744
10745 let mut snapshot = AuditSnapshot::default();
10746
10747 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10749 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10750 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10751 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10752 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10753 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10754 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10755 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10756 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10757 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10758 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10759 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10760
10761 let accounts: Vec<String> = self
10763 .coa
10764 .as_ref()
10765 .map(|coa| {
10766 coa.get_postable_accounts()
10767 .iter()
10768 .map(|acc| acc.account_code().to_string())
10769 .collect()
10770 })
10771 .unwrap_or_default();
10772
10773 for (i, company) in self.config.companies.iter().enumerate() {
10775 let company_revenue = total_revenue
10777 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10778
10779 let engagements_for_company =
10781 self.phase_config.audit_engagements / self.config.companies.len().max(1);
10782 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10783 1
10784 } else {
10785 0
10786 };
10787
10788 for _eng_idx in 0..(engagements_for_company + extra) {
10789 let mut engagement = engagement_gen.generate_engagement(
10791 &company.code,
10792 &company.name,
10793 fiscal_year,
10794 period_end,
10795 company_revenue,
10796 None, );
10798
10799 if !self.master_data.employees.is_empty() {
10801 let emp_count = self.master_data.employees.len();
10802 let base = (i * 10 + _eng_idx) % emp_count;
10804 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10805 .employee_id
10806 .clone();
10807 engagement.engagement_manager_id = self.master_data.employees
10808 [(base + 1) % emp_count]
10809 .employee_id
10810 .clone();
10811 let real_team: Vec<String> = engagement
10812 .team_member_ids
10813 .iter()
10814 .enumerate()
10815 .map(|(j, _)| {
10816 self.master_data.employees[(base + 2 + j) % emp_count]
10817 .employee_id
10818 .clone()
10819 })
10820 .collect();
10821 engagement.team_member_ids = real_team;
10822 }
10823
10824 if let Some(pb) = &pb {
10825 pb.inc(1);
10826 }
10827
10828 let team_members: Vec<String> = engagement.team_member_ids.clone();
10830
10831 let workpapers =
10833 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10834
10835 for wp in &workpapers {
10836 if let Some(pb) = &pb {
10837 pb.inc(1);
10838 }
10839
10840 let evidence = evidence_gen.generate_evidence_for_workpaper(
10842 wp,
10843 &team_members,
10844 wp.preparer_date,
10845 );
10846
10847 for _ in &evidence {
10848 if let Some(pb) = &pb {
10849 pb.inc(1);
10850 }
10851 }
10852
10853 snapshot.evidence.extend(evidence);
10854 }
10855
10856 let risks =
10858 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10859
10860 for _ in &risks {
10861 if let Some(pb) = &pb {
10862 pb.inc(1);
10863 }
10864 }
10865 snapshot.risk_assessments.extend(risks);
10866
10867 let findings = finding_gen.generate_findings_for_engagement(
10869 &engagement,
10870 &workpapers,
10871 &team_members,
10872 );
10873
10874 for _ in &findings {
10875 if let Some(pb) = &pb {
10876 pb.inc(1);
10877 }
10878 }
10879 snapshot.findings.extend(findings);
10880
10881 let judgments =
10883 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10884
10885 for _ in &judgments {
10886 if let Some(pb) = &pb {
10887 pb.inc(1);
10888 }
10889 }
10890 snapshot.judgments.extend(judgments);
10891
10892 let (confs, resps) =
10894 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10895 snapshot.confirmations.extend(confs);
10896 snapshot.confirmation_responses.extend(resps);
10897
10898 let team_pairs: Vec<(String, String)> = team_members
10900 .iter()
10901 .map(|id| {
10902 let name = self
10903 .master_data
10904 .employees
10905 .iter()
10906 .find(|e| e.employee_id == *id)
10907 .map(|e| e.display_name.clone())
10908 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10909 (id.clone(), name)
10910 })
10911 .collect();
10912 for wp in &workpapers {
10913 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10914 snapshot.procedure_steps.extend(steps);
10915 }
10916
10917 for wp in &workpapers {
10919 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10920 snapshot.samples.push(sample);
10921 }
10922 }
10923
10924 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10926 snapshot.analytical_results.extend(analytical);
10927
10928 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10930 snapshot.ia_functions.push(ia_func);
10931 snapshot.ia_reports.extend(ia_reports);
10932
10933 let vendor_names: Vec<String> = self
10935 .master_data
10936 .vendors
10937 .iter()
10938 .map(|v| v.name.clone())
10939 .collect();
10940 let customer_names: Vec<String> = self
10941 .master_data
10942 .customers
10943 .iter()
10944 .map(|c| c.name.clone())
10945 .collect();
10946 let (parties, rp_txns) =
10947 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10948 snapshot.related_parties.extend(parties);
10949 snapshot.related_party_transactions.extend(rp_txns);
10950
10951 snapshot.workpapers.extend(workpapers);
10953
10954 {
10956 let scope_id = format!(
10957 "SCOPE-{}-{}",
10958 engagement.engagement_id.simple(),
10959 &engagement.client_entity_id
10960 );
10961 let scope = datasynth_core::models::audit::AuditScope::new(
10962 scope_id.clone(),
10963 engagement.engagement_id.to_string(),
10964 engagement.client_entity_id.clone(),
10965 engagement.materiality,
10966 );
10967 let mut eng = engagement;
10969 eng.scope_id = Some(scope_id);
10970 snapshot.audit_scopes.push(scope);
10971 snapshot.engagements.push(eng);
10972 }
10973 }
10974 }
10975
10976 if self.config.companies.len() > 1 {
10980 let group_materiality = snapshot
10983 .engagements
10984 .first()
10985 .map(|e| e.materiality)
10986 .unwrap_or_else(|| {
10987 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10988 total_revenue * pct
10989 });
10990
10991 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10992 let group_engagement_id = snapshot
10993 .engagements
10994 .first()
10995 .map(|e| e.engagement_id.to_string())
10996 .unwrap_or_else(|| "GROUP-ENG".to_string());
10997
10998 let component_snapshot = component_gen.generate(
10999 &self.config.companies,
11000 group_materiality,
11001 &group_engagement_id,
11002 period_end,
11003 );
11004
11005 snapshot.component_auditors = component_snapshot.component_auditors;
11006 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11007 snapshot.component_instructions = component_snapshot.component_instructions;
11008 snapshot.component_reports = component_snapshot.component_reports;
11009
11010 info!(
11011 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11012 snapshot.component_auditors.len(),
11013 snapshot.component_instructions.len(),
11014 snapshot.component_reports.len(),
11015 );
11016 }
11017
11018 {
11022 let applicable_framework = self
11023 .config
11024 .accounting_standards
11025 .framework
11026 .as_ref()
11027 .map(|f| format!("{f:?}"))
11028 .unwrap_or_else(|| "IFRS".to_string());
11029
11030 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11031 let entity_count = self.config.companies.len();
11032
11033 for engagement in &snapshot.engagements {
11034 let company = self
11035 .config
11036 .companies
11037 .iter()
11038 .find(|c| c.code == engagement.client_entity_id);
11039 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11040 let letter_date = engagement.planning_start;
11041 let letter = letter_gen.generate(
11042 &engagement.engagement_id.to_string(),
11043 &engagement.client_name,
11044 entity_count,
11045 engagement.period_end_date,
11046 currency,
11047 &applicable_framework,
11048 letter_date,
11049 );
11050 snapshot.engagement_letters.push(letter);
11051 }
11052
11053 info!(
11054 "ISA 210 engagement letters: {} generated",
11055 snapshot.engagement_letters.len()
11056 );
11057 }
11058
11059 {
11063 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11064 let entity_codes: Vec<String> = self
11065 .config
11066 .companies
11067 .iter()
11068 .map(|c| c.code.clone())
11069 .collect();
11070 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11071 info!(
11072 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11073 subsequent.len(),
11074 subsequent
11075 .iter()
11076 .filter(|e| matches!(
11077 e.classification,
11078 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11079 ))
11080 .count(),
11081 subsequent
11082 .iter()
11083 .filter(|e| matches!(
11084 e.classification,
11085 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11086 ))
11087 .count(),
11088 );
11089 snapshot.subsequent_events = subsequent;
11090 }
11091
11092 {
11096 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11097 let entity_codes: Vec<String> = self
11098 .config
11099 .companies
11100 .iter()
11101 .map(|c| c.code.clone())
11102 .collect();
11103 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11104 info!(
11105 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11106 soc_snapshot.service_organizations.len(),
11107 soc_snapshot.soc_reports.len(),
11108 soc_snapshot.user_entity_controls.len(),
11109 );
11110 snapshot.service_organizations = soc_snapshot.service_organizations;
11111 snapshot.soc_reports = soc_snapshot.soc_reports;
11112 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11113 }
11114
11115 {
11119 use datasynth_generators::audit::going_concern_generator::{
11120 GoingConcernGenerator, GoingConcernInput,
11121 };
11122 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11123 let entity_codes: Vec<String> = self
11124 .config
11125 .companies
11126 .iter()
11127 .map(|c| c.code.clone())
11128 .collect();
11129 let assessment_date = period_end + chrono::Duration::days(75);
11131 let period_label = format!("FY{}", period_end.year());
11132
11133 let gc_inputs: Vec<GoingConcernInput> = self
11144 .config
11145 .companies
11146 .iter()
11147 .map(|company| {
11148 let code = &company.code;
11149 let mut revenue = rust_decimal::Decimal::ZERO;
11150 let mut expenses = rust_decimal::Decimal::ZERO;
11151 let mut current_assets = rust_decimal::Decimal::ZERO;
11152 let mut current_liabs = rust_decimal::Decimal::ZERO;
11153 let mut total_debt = rust_decimal::Decimal::ZERO;
11154
11155 for je in entries.iter().filter(|je| &je.header.company_code == code) {
11156 for line in &je.lines {
11157 let acct = line.gl_account.as_str();
11158 let net = line.debit_amount - line.credit_amount;
11159 if acct.starts_with('4') {
11160 revenue -= net;
11162 } else if acct.starts_with('6') {
11163 expenses += net;
11165 }
11166 if acct.starts_with('1') {
11168 if let Ok(n) = acct.parse::<u32>() {
11170 if (1000..=1499).contains(&n) {
11171 current_assets += net;
11172 }
11173 }
11174 } else if acct.starts_with('2') {
11175 if let Ok(n) = acct.parse::<u32>() {
11176 if (2000..=2499).contains(&n) {
11177 current_liabs -= net; } else if (2500..=2999).contains(&n) {
11180 total_debt -= net;
11182 }
11183 }
11184 }
11185 }
11186 }
11187
11188 let net_income = revenue - expenses;
11189 let working_capital = current_assets - current_liabs;
11190 let operating_cash_flow = net_income;
11193
11194 GoingConcernInput {
11195 entity_code: code.clone(),
11196 net_income,
11197 working_capital,
11198 operating_cash_flow,
11199 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11200 assessment_date,
11201 }
11202 })
11203 .collect();
11204
11205 let assessments = if gc_inputs.is_empty() {
11206 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11207 } else {
11208 gc_gen.generate_for_entities_with_inputs(
11209 &entity_codes,
11210 &gc_inputs,
11211 assessment_date,
11212 &period_label,
11213 )
11214 };
11215 info!(
11216 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11217 assessments.len(),
11218 assessments.iter().filter(|a| matches!(
11219 a.auditor_conclusion,
11220 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11221 )).count(),
11222 assessments.iter().filter(|a| matches!(
11223 a.auditor_conclusion,
11224 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11225 )).count(),
11226 assessments.iter().filter(|a| matches!(
11227 a.auditor_conclusion,
11228 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11229 )).count(),
11230 );
11231 snapshot.going_concern_assessments = assessments;
11232 }
11233
11234 {
11238 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11239 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11240 let entity_codes: Vec<String> = self
11241 .config
11242 .companies
11243 .iter()
11244 .map(|c| c.code.clone())
11245 .collect();
11246 let estimates = est_gen.generate_for_entities(&entity_codes);
11247 info!(
11248 "ISA 540 accounting estimates: {} estimates across {} entities \
11249 ({} with retrospective reviews, {} with auditor point estimates)",
11250 estimates.len(),
11251 entity_codes.len(),
11252 estimates
11253 .iter()
11254 .filter(|e| e.retrospective_review.is_some())
11255 .count(),
11256 estimates
11257 .iter()
11258 .filter(|e| e.auditor_point_estimate.is_some())
11259 .count(),
11260 );
11261 snapshot.accounting_estimates = estimates;
11262 }
11263
11264 {
11268 use datasynth_generators::audit::audit_opinion_generator::{
11269 AuditOpinionGenerator, AuditOpinionInput,
11270 };
11271
11272 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11273
11274 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11276 .engagements
11277 .iter()
11278 .map(|eng| {
11279 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11281 .findings
11282 .iter()
11283 .filter(|f| f.engagement_id == eng.engagement_id)
11284 .cloned()
11285 .collect();
11286
11287 let gc = snapshot
11289 .going_concern_assessments
11290 .iter()
11291 .find(|g| g.entity_code == eng.client_entity_id)
11292 .cloned();
11293
11294 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11296 snapshot.component_reports.clone();
11297
11298 let auditor = self
11299 .master_data
11300 .employees
11301 .first()
11302 .map(|e| e.display_name.clone())
11303 .unwrap_or_else(|| "Global Audit LLP".into());
11304
11305 let partner = self
11306 .master_data
11307 .employees
11308 .get(1)
11309 .map(|e| e.display_name.clone())
11310 .unwrap_or_else(|| eng.engagement_partner_id.clone());
11311
11312 AuditOpinionInput {
11313 entity_code: eng.client_entity_id.clone(),
11314 entity_name: eng.client_name.clone(),
11315 engagement_id: eng.engagement_id,
11316 period_end: eng.period_end_date,
11317 findings: eng_findings,
11318 going_concern: gc,
11319 component_reports: comp_reports,
11320 is_us_listed: {
11322 let fw = &self.config.audit_standards.isa_compliance.framework;
11323 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11324 },
11325 auditor_name: auditor,
11326 engagement_partner: partner,
11327 }
11328 })
11329 .collect();
11330
11331 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11332
11333 for go in &generated_opinions {
11334 snapshot
11335 .key_audit_matters
11336 .extend(go.key_audit_matters.clone());
11337 }
11338 snapshot.audit_opinions = generated_opinions
11339 .into_iter()
11340 .map(|go| go.opinion)
11341 .collect();
11342
11343 info!(
11344 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11345 snapshot.audit_opinions.len(),
11346 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11347 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11348 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11349 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11350 );
11351 }
11352
11353 {
11357 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11358
11359 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11360
11361 for (i, company) in self.config.companies.iter().enumerate() {
11362 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11364 .engagements
11365 .iter()
11366 .filter(|e| e.client_entity_id == company.code)
11367 .map(|e| e.engagement_id)
11368 .collect();
11369
11370 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11371 .findings
11372 .iter()
11373 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11374 .cloned()
11375 .collect();
11376
11377 let emp_count = self.master_data.employees.len();
11379 let ceo_name = if emp_count > 0 {
11380 self.master_data.employees[i % emp_count]
11381 .display_name
11382 .clone()
11383 } else {
11384 format!("CEO of {}", company.name)
11385 };
11386 let cfo_name = if emp_count > 1 {
11387 self.master_data.employees[(i + 1) % emp_count]
11388 .display_name
11389 .clone()
11390 } else {
11391 format!("CFO of {}", company.name)
11392 };
11393
11394 let materiality = snapshot
11396 .engagements
11397 .iter()
11398 .find(|e| e.client_entity_id == company.code)
11399 .map(|e| e.materiality)
11400 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11401
11402 let input = SoxGeneratorInput {
11403 company_code: company.code.clone(),
11404 company_name: company.name.clone(),
11405 fiscal_year,
11406 period_end,
11407 findings: company_findings,
11408 ceo_name,
11409 cfo_name,
11410 materiality_threshold: materiality,
11411 revenue_percent: rust_decimal::Decimal::from(100),
11412 assets_percent: rust_decimal::Decimal::from(100),
11413 significant_accounts: vec![
11414 "Revenue".into(),
11415 "Accounts Receivable".into(),
11416 "Inventory".into(),
11417 "Fixed Assets".into(),
11418 "Accounts Payable".into(),
11419 ],
11420 };
11421
11422 let (certs, assessment) = sox_gen.generate(&input);
11423 snapshot.sox_302_certifications.extend(certs);
11424 snapshot.sox_404_assessments.push(assessment);
11425 }
11426
11427 info!(
11428 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11429 snapshot.sox_302_certifications.len(),
11430 snapshot.sox_404_assessments.len(),
11431 snapshot
11432 .sox_404_assessments
11433 .iter()
11434 .filter(|a| a.icfr_effective)
11435 .count(),
11436 snapshot
11437 .sox_404_assessments
11438 .iter()
11439 .filter(|a| !a.icfr_effective)
11440 .count(),
11441 );
11442 }
11443
11444 {
11448 use datasynth_generators::audit::materiality_generator::{
11449 MaterialityGenerator, MaterialityInput,
11450 };
11451
11452 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11453
11454 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11458
11459 for company in &self.config.companies {
11460 let company_code = company.code.clone();
11461
11462 let company_revenue: rust_decimal::Decimal = entries
11464 .iter()
11465 .filter(|e| e.company_code() == company_code)
11466 .flat_map(|e| e.lines.iter())
11467 .filter(|l| l.account_code.starts_with('4'))
11468 .map(|l| l.credit_amount)
11469 .sum();
11470
11471 let total_assets: rust_decimal::Decimal = entries
11473 .iter()
11474 .filter(|e| e.company_code() == company_code)
11475 .flat_map(|e| e.lines.iter())
11476 .filter(|l| l.account_code.starts_with('1'))
11477 .map(|l| l.debit_amount)
11478 .sum();
11479
11480 let total_expenses: rust_decimal::Decimal = entries
11482 .iter()
11483 .filter(|e| e.company_code() == company_code)
11484 .flat_map(|e| e.lines.iter())
11485 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11486 .map(|l| l.debit_amount)
11487 .sum();
11488
11489 let equity: rust_decimal::Decimal = entries
11491 .iter()
11492 .filter(|e| e.company_code() == company_code)
11493 .flat_map(|e| e.lines.iter())
11494 .filter(|l| l.account_code.starts_with('3'))
11495 .map(|l| l.credit_amount)
11496 .sum();
11497
11498 let pretax_income = company_revenue - total_expenses;
11499
11500 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11502 let w = rust_decimal::Decimal::try_from(company.volume_weight)
11503 .unwrap_or(rust_decimal::Decimal::ONE);
11504 (
11505 total_revenue * w,
11506 total_revenue * w * rust_decimal::Decimal::from(3),
11507 total_revenue * w * rust_decimal::Decimal::new(1, 1),
11508 total_revenue * w * rust_decimal::Decimal::from(2),
11509 )
11510 } else {
11511 (company_revenue, total_assets, pretax_income, equity)
11512 };
11513
11514 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
11517 entity_code: company_code,
11518 period: format!("FY{}", fiscal_year),
11519 revenue: rev,
11520 pretax_income: pti,
11521 total_assets: assets,
11522 equity: eq,
11523 gross_profit,
11524 });
11525 }
11526
11527 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11528
11529 info!(
11530 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11531 {} total assets, {} equity benchmarks)",
11532 snapshot.materiality_calculations.len(),
11533 snapshot
11534 .materiality_calculations
11535 .iter()
11536 .filter(|m| matches!(
11537 m.benchmark,
11538 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11539 ))
11540 .count(),
11541 snapshot
11542 .materiality_calculations
11543 .iter()
11544 .filter(|m| matches!(
11545 m.benchmark,
11546 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11547 ))
11548 .count(),
11549 snapshot
11550 .materiality_calculations
11551 .iter()
11552 .filter(|m| matches!(
11553 m.benchmark,
11554 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11555 ))
11556 .count(),
11557 snapshot
11558 .materiality_calculations
11559 .iter()
11560 .filter(|m| matches!(
11561 m.benchmark,
11562 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11563 ))
11564 .count(),
11565 );
11566 }
11567
11568 {
11572 use datasynth_generators::audit::cra_generator::CraGenerator;
11573
11574 let mut cra_gen = CraGenerator::new(self.seed + 8315);
11575
11576 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11578 .audit_scopes
11579 .iter()
11580 .map(|s| (s.entity_code.clone(), s.id.clone()))
11581 .collect();
11582
11583 for company in &self.config.companies {
11584 let cras = cra_gen.generate_for_entity(&company.code, None);
11585 let scope_id = entity_scope_map.get(&company.code).cloned();
11586 let cras_with_scope: Vec<_> = cras
11587 .into_iter()
11588 .map(|mut cra| {
11589 cra.scope_id = scope_id.clone();
11590 cra
11591 })
11592 .collect();
11593 snapshot.combined_risk_assessments.extend(cras_with_scope);
11594 }
11595
11596 let significant_count = snapshot
11597 .combined_risk_assessments
11598 .iter()
11599 .filter(|c| c.significant_risk)
11600 .count();
11601 let high_cra_count = snapshot
11602 .combined_risk_assessments
11603 .iter()
11604 .filter(|c| {
11605 matches!(
11606 c.combined_risk,
11607 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11608 )
11609 })
11610 .count();
11611
11612 info!(
11613 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11614 snapshot.combined_risk_assessments.len(),
11615 significant_count,
11616 high_cra_count,
11617 );
11618 }
11619
11620 {
11624 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11625
11626 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11627
11628 for company in &self.config.companies {
11630 let entity_code = company.code.clone();
11631
11632 let tolerable_error = snapshot
11634 .materiality_calculations
11635 .iter()
11636 .find(|m| m.entity_code == entity_code)
11637 .map(|m| m.tolerable_error);
11638
11639 let entity_cras: Vec<_> = snapshot
11641 .combined_risk_assessments
11642 .iter()
11643 .filter(|c| c.entity_code == entity_code)
11644 .cloned()
11645 .collect();
11646
11647 if !entity_cras.is_empty() {
11648 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11649 snapshot.sampling_plans.extend(plans);
11650 snapshot.sampled_items.extend(items);
11651 }
11652 }
11653
11654 let misstatement_count = snapshot
11655 .sampled_items
11656 .iter()
11657 .filter(|i| i.misstatement_found)
11658 .count();
11659
11660 info!(
11661 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11662 snapshot.sampling_plans.len(),
11663 snapshot.sampled_items.len(),
11664 misstatement_count,
11665 );
11666 }
11667
11668 {
11672 use datasynth_generators::audit::scots_generator::{
11673 ScotsGenerator, ScotsGeneratorConfig,
11674 };
11675
11676 let ic_enabled = self.config.intercompany.enabled;
11677
11678 let config = ScotsGeneratorConfig {
11679 intercompany_enabled: ic_enabled,
11680 ..ScotsGeneratorConfig::default()
11681 };
11682 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11683
11684 for company in &self.config.companies {
11685 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11686 snapshot
11687 .significant_transaction_classes
11688 .extend(entity_scots);
11689 }
11690
11691 let estimation_count = snapshot
11692 .significant_transaction_classes
11693 .iter()
11694 .filter(|s| {
11695 matches!(
11696 s.transaction_type,
11697 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11698 )
11699 })
11700 .count();
11701
11702 info!(
11703 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11704 snapshot.significant_transaction_classes.len(),
11705 estimation_count,
11706 );
11707 }
11708
11709 {
11713 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11714
11715 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11716 let entity_codes: Vec<String> = self
11717 .config
11718 .companies
11719 .iter()
11720 .map(|c| c.code.clone())
11721 .collect();
11722 let unusual_flags =
11723 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11724 info!(
11725 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11726 unusual_flags.len(),
11727 unusual_flags
11728 .iter()
11729 .filter(|f| matches!(
11730 f.severity,
11731 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11732 ))
11733 .count(),
11734 unusual_flags
11735 .iter()
11736 .filter(|f| matches!(
11737 f.severity,
11738 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11739 ))
11740 .count(),
11741 unusual_flags
11742 .iter()
11743 .filter(|f| matches!(
11744 f.severity,
11745 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11746 ))
11747 .count(),
11748 );
11749 snapshot.unusual_items = unusual_flags;
11750 }
11751
11752 {
11756 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11757
11758 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11759 let entity_codes: Vec<String> = self
11760 .config
11761 .companies
11762 .iter()
11763 .map(|c| c.code.clone())
11764 .collect();
11765 let current_period_label = format!("FY{fiscal_year}");
11766 let prior_period_label = format!("FY{}", fiscal_year - 1);
11767 let analytical_rels = ar_gen.generate_for_entities(
11768 &entity_codes,
11769 entries,
11770 ¤t_period_label,
11771 &prior_period_label,
11772 );
11773 let out_of_range = analytical_rels
11774 .iter()
11775 .filter(|r| !r.within_expected_range)
11776 .count();
11777 info!(
11778 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11779 analytical_rels.len(),
11780 out_of_range,
11781 );
11782 snapshot.analytical_relationships = analytical_rels;
11783 }
11784
11785 if let Some(pb) = pb {
11786 pb.finish_with_message(format!(
11787 "Audit data: {} engagements, {} workpapers, {} evidence, \
11788 {} confirmations, {} procedure steps, {} samples, \
11789 {} analytical, {} IA funcs, {} related parties, \
11790 {} component auditors, {} letters, {} subsequent events, \
11791 {} service orgs, {} going concern, {} accounting estimates, \
11792 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11793 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11794 {} unusual items, {} analytical relationships",
11795 snapshot.engagements.len(),
11796 snapshot.workpapers.len(),
11797 snapshot.evidence.len(),
11798 snapshot.confirmations.len(),
11799 snapshot.procedure_steps.len(),
11800 snapshot.samples.len(),
11801 snapshot.analytical_results.len(),
11802 snapshot.ia_functions.len(),
11803 snapshot.related_parties.len(),
11804 snapshot.component_auditors.len(),
11805 snapshot.engagement_letters.len(),
11806 snapshot.subsequent_events.len(),
11807 snapshot.service_organizations.len(),
11808 snapshot.going_concern_assessments.len(),
11809 snapshot.accounting_estimates.len(),
11810 snapshot.audit_opinions.len(),
11811 snapshot.key_audit_matters.len(),
11812 snapshot.sox_302_certifications.len(),
11813 snapshot.sox_404_assessments.len(),
11814 snapshot.materiality_calculations.len(),
11815 snapshot.combined_risk_assessments.len(),
11816 snapshot.sampling_plans.len(),
11817 snapshot.significant_transaction_classes.len(),
11818 snapshot.unusual_items.len(),
11819 snapshot.analytical_relationships.len(),
11820 ));
11821 }
11822
11823 {
11830 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11831 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11832 debug!(
11833 "PCAOB-ISA mappings generated: {} mappings",
11834 snapshot.isa_pcaob_mappings.len()
11835 );
11836 }
11837
11838 {
11845 use datasynth_standards::audit::isa_reference::IsaStandard;
11846 snapshot.isa_mappings = IsaStandard::standard_entries();
11847 debug!(
11848 "ISA standard entries generated: {} standards",
11849 snapshot.isa_mappings.len()
11850 );
11851 }
11852
11853 {
11856 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11857 .engagements
11858 .iter()
11859 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11860 .collect();
11861
11862 for rpt in &mut snapshot.related_party_transactions {
11863 if rpt.journal_entry_id.is_some() {
11864 continue; }
11866 let entity = engagement_by_id
11867 .get(&rpt.engagement_id.to_string())
11868 .copied()
11869 .unwrap_or("");
11870
11871 let best_je = entries
11873 .iter()
11874 .filter(|je| je.header.company_code == entity)
11875 .min_by_key(|je| {
11876 (je.header.posting_date - rpt.transaction_date)
11877 .num_days()
11878 .abs()
11879 });
11880
11881 if let Some(je) = best_je {
11882 rpt.journal_entry_id = Some(je.header.document_id.to_string());
11883 }
11884 }
11885
11886 let linked = snapshot
11887 .related_party_transactions
11888 .iter()
11889 .filter(|t| t.journal_entry_id.is_some())
11890 .count();
11891 debug!(
11892 "Linked {}/{} related party transactions to journal entries",
11893 linked,
11894 snapshot.related_party_transactions.len()
11895 );
11896 }
11897
11898 Ok(snapshot)
11899 }
11900
11901 fn generate_audit_data_with_fsm(
11908 &mut self,
11909 entries: &[JournalEntry],
11910 ) -> SynthResult<AuditSnapshot> {
11911 use datasynth_audit_fsm::{
11912 context::EngagementContext,
11913 engine::AuditFsmEngine,
11914 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11915 };
11916 use rand::SeedableRng;
11917 use rand_chacha::ChaCha8Rng;
11918
11919 info!("Audit FSM: generating audit data via FSM engine");
11920
11921 let fsm_config = self
11922 .config
11923 .audit
11924 .fsm
11925 .as_ref()
11926 .expect("FSM config must be present when FSM is enabled");
11927
11928 let bwp = match fsm_config.blueprint.as_str() {
11930 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11931 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11932 _ => {
11933 warn!(
11934 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11935 fsm_config.blueprint
11936 );
11937 BlueprintWithPreconditions::load_builtin_fsa()
11938 }
11939 }
11940 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11941
11942 let overlay = match fsm_config.overlay.as_str() {
11944 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11945 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11946 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11947 _ => {
11948 warn!(
11949 "Unknown FSM overlay '{}', falling back to builtin:default",
11950 fsm_config.overlay
11951 );
11952 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11953 }
11954 }
11955 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11956
11957 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11959 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11960 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11961
11962 let company = self.config.companies.first();
11964 let company_code = company
11965 .map(|c| c.code.clone())
11966 .unwrap_or_else(|| "UNKNOWN".to_string());
11967 let company_name = company
11968 .map(|c| c.name.clone())
11969 .unwrap_or_else(|| "Unknown Company".to_string());
11970 let currency = company
11971 .map(|c| c.currency.clone())
11972 .unwrap_or_else(|| "USD".to_string());
11973
11974 let entity_entries: Vec<_> = entries
11976 .iter()
11977 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11978 .cloned()
11979 .collect();
11980 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
11984 .iter()
11985 .flat_map(|e| e.lines.iter())
11986 .filter(|l| l.account_code.starts_with('4'))
11987 .map(|l| l.credit_amount - l.debit_amount)
11988 .sum();
11989
11990 let total_assets: rust_decimal::Decimal = entries
11991 .iter()
11992 .flat_map(|e| e.lines.iter())
11993 .filter(|l| l.account_code.starts_with('1'))
11994 .map(|l| l.debit_amount - l.credit_amount)
11995 .sum();
11996
11997 let total_expenses: rust_decimal::Decimal = entries
11998 .iter()
11999 .flat_map(|e| e.lines.iter())
12000 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12001 .map(|l| l.debit_amount)
12002 .sum();
12003
12004 let equity: rust_decimal::Decimal = entries
12005 .iter()
12006 .flat_map(|e| e.lines.iter())
12007 .filter(|l| l.account_code.starts_with('3'))
12008 .map(|l| l.credit_amount - l.debit_amount)
12009 .sum();
12010
12011 let total_debt: rust_decimal::Decimal = entries
12012 .iter()
12013 .flat_map(|e| e.lines.iter())
12014 .filter(|l| l.account_code.starts_with('2'))
12015 .map(|l| l.credit_amount - l.debit_amount)
12016 .sum();
12017
12018 let pretax_income = total_revenue - total_expenses;
12019
12020 let cogs: rust_decimal::Decimal = entries
12021 .iter()
12022 .flat_map(|e| e.lines.iter())
12023 .filter(|l| l.account_code.starts_with('5'))
12024 .map(|l| l.debit_amount)
12025 .sum();
12026 let gross_profit = total_revenue - cogs;
12027
12028 let current_assets: rust_decimal::Decimal = entries
12029 .iter()
12030 .flat_map(|e| e.lines.iter())
12031 .filter(|l| {
12032 l.account_code.starts_with("10")
12033 || l.account_code.starts_with("11")
12034 || l.account_code.starts_with("12")
12035 || l.account_code.starts_with("13")
12036 })
12037 .map(|l| l.debit_amount - l.credit_amount)
12038 .sum();
12039 let current_liabilities: rust_decimal::Decimal = entries
12040 .iter()
12041 .flat_map(|e| e.lines.iter())
12042 .filter(|l| {
12043 l.account_code.starts_with("20")
12044 || l.account_code.starts_with("21")
12045 || l.account_code.starts_with("22")
12046 })
12047 .map(|l| l.credit_amount - l.debit_amount)
12048 .sum();
12049 let working_capital = current_assets - current_liabilities;
12050
12051 let depreciation: rust_decimal::Decimal = entries
12052 .iter()
12053 .flat_map(|e| e.lines.iter())
12054 .filter(|l| l.account_code.starts_with("60"))
12055 .map(|l| l.debit_amount)
12056 .sum();
12057 let operating_cash_flow = pretax_income + depreciation;
12058
12059 let accounts: Vec<String> = self
12061 .coa
12062 .as_ref()
12063 .map(|coa| {
12064 coa.get_postable_accounts()
12065 .iter()
12066 .map(|acc| acc.account_code().to_string())
12067 .collect()
12068 })
12069 .unwrap_or_default();
12070
12071 let team_member_ids: Vec<String> = self
12073 .master_data
12074 .employees
12075 .iter()
12076 .take(8) .map(|e| e.employee_id.clone())
12078 .collect();
12079 let team_member_pairs: Vec<(String, String)> = self
12080 .master_data
12081 .employees
12082 .iter()
12083 .take(8)
12084 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12085 .collect();
12086
12087 let vendor_names: Vec<String> = self
12088 .master_data
12089 .vendors
12090 .iter()
12091 .map(|v| v.name.clone())
12092 .collect();
12093 let customer_names: Vec<String> = self
12094 .master_data
12095 .customers
12096 .iter()
12097 .map(|c| c.name.clone())
12098 .collect();
12099
12100 let entity_codes: Vec<String> = self
12101 .config
12102 .companies
12103 .iter()
12104 .map(|c| c.code.clone())
12105 .collect();
12106
12107 let journal_entry_ids: Vec<String> = entries
12109 .iter()
12110 .take(50)
12111 .map(|e| e.header.document_id.to_string())
12112 .collect();
12113
12114 let mut account_balances = std::collections::HashMap::<String, f64>::new();
12116 for entry in entries {
12117 for line in &entry.lines {
12118 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12119 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12120 *account_balances
12121 .entry(line.account_code.clone())
12122 .or_insert(0.0) += debit_f64 - credit_f64;
12123 }
12124 }
12125
12126 let control_ids: Vec<String> = Vec::new();
12131 let anomaly_refs: Vec<String> = Vec::new();
12132
12133 let mut context = EngagementContext {
12134 company_code,
12135 company_name,
12136 fiscal_year: start_date.year(),
12137 currency,
12138 total_revenue,
12139 total_assets,
12140 engagement_start: start_date,
12141 report_date: period_end,
12142 pretax_income,
12143 equity,
12144 gross_profit,
12145 working_capital,
12146 operating_cash_flow,
12147 total_debt,
12148 team_member_ids,
12149 team_member_pairs,
12150 accounts,
12151 vendor_names,
12152 customer_names,
12153 journal_entry_ids,
12154 account_balances,
12155 control_ids,
12156 anomaly_refs,
12157 journal_entries: entries.to_vec(),
12158 is_us_listed: false,
12159 entity_codes,
12160 auditor_firm_name: "DataSynth Audit LLP".into(),
12161 accounting_framework: self
12162 .config
12163 .accounting_standards
12164 .framework
12165 .map(|f| match f {
12166 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12167 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12168 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12169 "French GAAP"
12170 }
12171 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12172 "German GAAP"
12173 }
12174 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12175 "Dual Reporting"
12176 }
12177 })
12178 .unwrap_or("IFRS")
12179 .into(),
12180 };
12181
12182 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12184 let rng = ChaCha8Rng::seed_from_u64(seed);
12185 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12186
12187 let mut result = engine
12188 .run_engagement(&context)
12189 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12190
12191 info!(
12192 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12193 {} phases completed, duration {:.1}h",
12194 result.event_log.len(),
12195 result.artifacts.total_artifacts(),
12196 result.anomalies.len(),
12197 result.phases_completed.len(),
12198 result.total_duration_hours,
12199 );
12200
12201 let tb_entity = context.company_code.clone();
12203 let tb_fy = context.fiscal_year;
12204 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12205 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12206 entries,
12207 &tb_entity,
12208 tb_fy,
12209 self.coa.as_ref().map(|c| c.as_ref()),
12210 );
12211
12212 let bag = result.artifacts;
12214 let mut snapshot = AuditSnapshot {
12215 engagements: bag.engagements,
12216 engagement_letters: bag.engagement_letters,
12217 materiality_calculations: bag.materiality_calculations,
12218 risk_assessments: bag.risk_assessments,
12219 combined_risk_assessments: bag.combined_risk_assessments,
12220 workpapers: bag.workpapers,
12221 evidence: bag.evidence,
12222 findings: bag.findings,
12223 judgments: bag.judgments,
12224 sampling_plans: bag.sampling_plans,
12225 sampled_items: bag.sampled_items,
12226 analytical_results: bag.analytical_results,
12227 going_concern_assessments: bag.going_concern_assessments,
12228 subsequent_events: bag.subsequent_events,
12229 audit_opinions: bag.audit_opinions,
12230 key_audit_matters: bag.key_audit_matters,
12231 procedure_steps: bag.procedure_steps,
12232 samples: bag.samples,
12233 confirmations: bag.confirmations,
12234 confirmation_responses: bag.confirmation_responses,
12235 fsm_event_trail: Some(result.event_log),
12237 ..Default::default()
12239 };
12240
12241 {
12243 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12244 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12245 }
12246 {
12247 use datasynth_standards::audit::isa_reference::IsaStandard;
12248 snapshot.isa_mappings = IsaStandard::standard_entries();
12249 }
12250
12251 info!(
12252 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12253 {} risk assessments, {} findings, {} materiality calcs",
12254 snapshot.engagements.len(),
12255 snapshot.workpapers.len(),
12256 snapshot.evidence.len(),
12257 snapshot.risk_assessments.len(),
12258 snapshot.findings.len(),
12259 snapshot.materiality_calculations.len(),
12260 );
12261
12262 Ok(snapshot)
12263 }
12264
12265 fn export_graphs(
12272 &mut self,
12273 entries: &[JournalEntry],
12274 _coa: &Arc<ChartOfAccounts>,
12275 stats: &mut EnhancedGenerationStatistics,
12276 ) -> SynthResult<GraphExportSnapshot> {
12277 let pb = self.create_progress_bar(100, "Exporting Graphs");
12278
12279 let mut snapshot = GraphExportSnapshot::default();
12280
12281 let output_dir = self
12283 .output_path
12284 .clone()
12285 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12286 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12287
12288 for graph_type in &self.config.graph_export.graph_types {
12290 if let Some(pb) = &pb {
12291 pb.inc(10);
12292 }
12293
12294 let graph_config = TransactionGraphConfig {
12296 include_vendors: false,
12297 include_customers: false,
12298 create_debit_credit_edges: true,
12299 include_document_nodes: graph_type.include_document_nodes,
12300 min_edge_weight: graph_type.min_edge_weight,
12301 aggregate_parallel_edges: graph_type.aggregate_edges,
12302 framework: None,
12303 };
12304
12305 let mut builder = TransactionGraphBuilder::new(graph_config);
12306 builder.add_journal_entries(entries);
12307 let graph = builder.build();
12308
12309 stats.graph_node_count += graph.node_count();
12311 stats.graph_edge_count += graph.edge_count();
12312
12313 if let Some(pb) = &pb {
12314 pb.inc(40);
12315 }
12316
12317 for format in &self.config.graph_export.formats {
12319 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12320
12321 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12323 warn!("Failed to create graph output directory: {}", e);
12324 continue;
12325 }
12326
12327 match format {
12328 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12329 let pyg_config = PyGExportConfig {
12330 common: datasynth_graph::CommonExportConfig {
12331 export_node_features: true,
12332 export_edge_features: true,
12333 export_node_labels: true,
12334 export_edge_labels: true,
12335 export_masks: true,
12336 train_ratio: self.config.graph_export.train_ratio,
12337 val_ratio: self.config.graph_export.validation_ratio,
12338 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12339 },
12340 one_hot_categoricals: false,
12341 };
12342
12343 let exporter = PyGExporter::new(pyg_config);
12344 match exporter.export(&graph, &format_dir) {
12345 Ok(metadata) => {
12346 snapshot.exports.insert(
12347 format!("{}_{}", graph_type.name, "pytorch_geometric"),
12348 GraphExportInfo {
12349 name: graph_type.name.clone(),
12350 format: "pytorch_geometric".to_string(),
12351 output_path: format_dir.clone(),
12352 node_count: metadata.num_nodes,
12353 edge_count: metadata.num_edges,
12354 },
12355 );
12356 snapshot.graph_count += 1;
12357 }
12358 Err(e) => {
12359 warn!("Failed to export PyTorch Geometric graph: {}", e);
12360 }
12361 }
12362 }
12363 datasynth_config::schema::GraphExportFormat::Neo4j => {
12364 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12365
12366 let neo4j_config = Neo4jExportConfig {
12367 export_node_properties: true,
12368 export_edge_properties: true,
12369 export_features: true,
12370 generate_cypher: true,
12371 generate_admin_import: true,
12372 database_name: "synth".to_string(),
12373 cypher_batch_size: 1000,
12374 };
12375
12376 let exporter = Neo4jExporter::new(neo4j_config);
12377 match exporter.export(&graph, &format_dir) {
12378 Ok(metadata) => {
12379 snapshot.exports.insert(
12380 format!("{}_{}", graph_type.name, "neo4j"),
12381 GraphExportInfo {
12382 name: graph_type.name.clone(),
12383 format: "neo4j".to_string(),
12384 output_path: format_dir.clone(),
12385 node_count: metadata.num_nodes,
12386 edge_count: metadata.num_edges,
12387 },
12388 );
12389 snapshot.graph_count += 1;
12390 }
12391 Err(e) => {
12392 warn!("Failed to export Neo4j graph: {}", e);
12393 }
12394 }
12395 }
12396 datasynth_config::schema::GraphExportFormat::Dgl => {
12397 use datasynth_graph::{DGLExportConfig, DGLExporter};
12398
12399 let dgl_config = DGLExportConfig {
12400 common: datasynth_graph::CommonExportConfig {
12401 export_node_features: true,
12402 export_edge_features: true,
12403 export_node_labels: true,
12404 export_edge_labels: true,
12405 export_masks: true,
12406 train_ratio: self.config.graph_export.train_ratio,
12407 val_ratio: self.config.graph_export.validation_ratio,
12408 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12409 },
12410 heterogeneous: self.config.graph_export.dgl.heterogeneous,
12411 include_pickle_script: true, };
12413
12414 let exporter = DGLExporter::new(dgl_config);
12415 match exporter.export(&graph, &format_dir) {
12416 Ok(metadata) => {
12417 snapshot.exports.insert(
12418 format!("{}_{}", graph_type.name, "dgl"),
12419 GraphExportInfo {
12420 name: graph_type.name.clone(),
12421 format: "dgl".to_string(),
12422 output_path: format_dir.clone(),
12423 node_count: metadata.common.num_nodes,
12424 edge_count: metadata.common.num_edges,
12425 },
12426 );
12427 snapshot.graph_count += 1;
12428 }
12429 Err(e) => {
12430 warn!("Failed to export DGL graph: {}", e);
12431 }
12432 }
12433 }
12434 datasynth_config::schema::GraphExportFormat::RustGraph => {
12435 use datasynth_graph::{
12436 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12437 };
12438
12439 let rustgraph_config = RustGraphExportConfig {
12440 include_features: true,
12441 include_temporal: true,
12442 include_labels: true,
12443 source_name: "datasynth".to_string(),
12444 batch_id: None,
12445 output_format: RustGraphOutputFormat::JsonLines,
12446 export_node_properties: true,
12447 export_edge_properties: true,
12448 pretty_print: false,
12449 };
12450
12451 let exporter = RustGraphExporter::new(rustgraph_config);
12452 match exporter.export(&graph, &format_dir) {
12453 Ok(metadata) => {
12454 snapshot.exports.insert(
12455 format!("{}_{}", graph_type.name, "rustgraph"),
12456 GraphExportInfo {
12457 name: graph_type.name.clone(),
12458 format: "rustgraph".to_string(),
12459 output_path: format_dir.clone(),
12460 node_count: metadata.num_nodes,
12461 edge_count: metadata.num_edges,
12462 },
12463 );
12464 snapshot.graph_count += 1;
12465 }
12466 Err(e) => {
12467 warn!("Failed to export RustGraph: {}", e);
12468 }
12469 }
12470 }
12471 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12472 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12474 }
12475 }
12476 }
12477
12478 if let Some(pb) = &pb {
12479 pb.inc(40);
12480 }
12481 }
12482
12483 stats.graph_export_count = snapshot.graph_count;
12484 snapshot.exported = snapshot.graph_count > 0;
12485
12486 if let Some(pb) = pb {
12487 pb.finish_with_message(format!(
12488 "Graphs exported: {} graphs ({} nodes, {} edges)",
12489 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12490 ));
12491 }
12492
12493 Ok(snapshot)
12494 }
12495
12496 fn build_additional_graphs(
12501 &self,
12502 banking: &BankingSnapshot,
12503 intercompany: &IntercompanySnapshot,
12504 entries: &[JournalEntry],
12505 stats: &mut EnhancedGenerationStatistics,
12506 ) {
12507 let output_dir = self
12508 .output_path
12509 .clone()
12510 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12511 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12512
12513 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12515 info!("Phase 10c: Building banking network graph");
12516 let config = BankingGraphConfig::default();
12517 let mut builder = BankingGraphBuilder::new(config);
12518 builder.add_customers(&banking.customers);
12519 builder.add_accounts(&banking.accounts, &banking.customers);
12520 builder.add_transactions(&banking.transactions);
12521 let graph = builder.build();
12522
12523 let node_count = graph.node_count();
12524 let edge_count = graph.edge_count();
12525 stats.graph_node_count += node_count;
12526 stats.graph_edge_count += edge_count;
12527
12528 for format in &self.config.graph_export.formats {
12530 if matches!(
12531 format,
12532 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12533 ) {
12534 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12535 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12536 warn!("Failed to create banking graph output dir: {}", e);
12537 continue;
12538 }
12539 let pyg_config = PyGExportConfig::default();
12540 let exporter = PyGExporter::new(pyg_config);
12541 if let Err(e) = exporter.export(&graph, &format_dir) {
12542 warn!("Failed to export banking graph as PyG: {}", e);
12543 } else {
12544 info!(
12545 "Banking network graph exported: {} nodes, {} edges",
12546 node_count, edge_count
12547 );
12548 }
12549 }
12550 }
12551 }
12552
12553 let approval_entries: Vec<_> = entries
12555 .iter()
12556 .filter(|je| je.header.approval_workflow.is_some())
12557 .collect();
12558
12559 if !approval_entries.is_empty() {
12560 info!(
12561 "Phase 10c: Building approval network graph ({} entries with approvals)",
12562 approval_entries.len()
12563 );
12564 let config = ApprovalGraphConfig::default();
12565 let mut builder = ApprovalGraphBuilder::new(config);
12566
12567 for je in &approval_entries {
12568 if let Some(ref wf) = je.header.approval_workflow {
12569 for action in &wf.actions {
12570 let record = datasynth_core::models::ApprovalRecord {
12571 approval_id: format!(
12572 "APR-{}-{}",
12573 je.header.document_id, action.approval_level
12574 ),
12575 document_number: je.header.document_id.to_string(),
12576 document_type: "JE".to_string(),
12577 company_code: je.company_code().to_string(),
12578 requester_id: wf.preparer_id.clone(),
12579 requester_name: Some(wf.preparer_name.clone()),
12580 approver_id: action.actor_id.clone(),
12581 approver_name: action.actor_name.clone(),
12582 approval_date: je.posting_date(),
12583 action: format!("{:?}", action.action),
12584 amount: wf.amount,
12585 approval_limit: None,
12586 comments: action.comments.clone(),
12587 delegation_from: None,
12588 is_auto_approved: false,
12589 };
12590 builder.add_approval(&record);
12591 }
12592 }
12593 }
12594
12595 let graph = builder.build();
12596 let node_count = graph.node_count();
12597 let edge_count = graph.edge_count();
12598 stats.graph_node_count += node_count;
12599 stats.graph_edge_count += edge_count;
12600
12601 for format in &self.config.graph_export.formats {
12603 if matches!(
12604 format,
12605 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12606 ) {
12607 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12608 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12609 warn!("Failed to create approval graph output dir: {}", e);
12610 continue;
12611 }
12612 let pyg_config = PyGExportConfig::default();
12613 let exporter = PyGExporter::new(pyg_config);
12614 if let Err(e) = exporter.export(&graph, &format_dir) {
12615 warn!("Failed to export approval graph as PyG: {}", e);
12616 } else {
12617 info!(
12618 "Approval network graph exported: {} nodes, {} edges",
12619 node_count, edge_count
12620 );
12621 }
12622 }
12623 }
12624 }
12625
12626 if self.config.companies.len() >= 2 {
12628 info!(
12629 "Phase 10c: Building entity relationship graph ({} companies)",
12630 self.config.companies.len()
12631 );
12632
12633 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12634 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12635
12636 let parent_code = &self.config.companies[0].code;
12638 let mut companies: Vec<datasynth_core::models::Company> =
12639 Vec::with_capacity(self.config.companies.len());
12640
12641 let first = &self.config.companies[0];
12643 companies.push(datasynth_core::models::Company::parent(
12644 &first.code,
12645 &first.name,
12646 &first.country,
12647 &first.currency,
12648 ));
12649
12650 for cc in self.config.companies.iter().skip(1) {
12652 companies.push(datasynth_core::models::Company::subsidiary(
12653 &cc.code,
12654 &cc.name,
12655 &cc.country,
12656 &cc.currency,
12657 parent_code,
12658 rust_decimal::Decimal::from(100),
12659 ));
12660 }
12661
12662 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12664 self.config
12665 .companies
12666 .iter()
12667 .skip(1)
12668 .enumerate()
12669 .map(|(i, cc)| {
12670 let mut rel =
12671 datasynth_core::models::intercompany::IntercompanyRelationship::new(
12672 format!("REL{:03}", i + 1),
12673 parent_code.clone(),
12674 cc.code.clone(),
12675 rust_decimal::Decimal::from(100),
12676 start_date,
12677 );
12678 rel.functional_currency = cc.currency.clone();
12679 rel
12680 })
12681 .collect();
12682
12683 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12684 builder.add_companies(&companies);
12685 builder.add_ownership_relationships(&relationships);
12686
12687 for pair in &intercompany.matched_pairs {
12689 builder.add_intercompany_edge(
12690 &pair.seller_company,
12691 &pair.buyer_company,
12692 pair.amount,
12693 &format!("{:?}", pair.transaction_type),
12694 );
12695 }
12696
12697 let graph = builder.build();
12698 let node_count = graph.node_count();
12699 let edge_count = graph.edge_count();
12700 stats.graph_node_count += node_count;
12701 stats.graph_edge_count += edge_count;
12702
12703 for format in &self.config.graph_export.formats {
12705 if matches!(
12706 format,
12707 datasynth_config::schema::GraphExportFormat::PytorchGeometric
12708 ) {
12709 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12710 if let Err(e) = std::fs::create_dir_all(&format_dir) {
12711 warn!("Failed to create entity graph output dir: {}", e);
12712 continue;
12713 }
12714 let pyg_config = PyGExportConfig::default();
12715 let exporter = PyGExporter::new(pyg_config);
12716 if let Err(e) = exporter.export(&graph, &format_dir) {
12717 warn!("Failed to export entity graph as PyG: {}", e);
12718 } else {
12719 info!(
12720 "Entity relationship graph exported: {} nodes, {} edges",
12721 node_count, edge_count
12722 );
12723 }
12724 }
12725 }
12726 } else {
12727 debug!(
12728 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12729 self.config.companies.len()
12730 );
12731 }
12732 }
12733
12734 #[allow(clippy::too_many_arguments)]
12741 fn export_hypergraph(
12742 &self,
12743 coa: &Arc<ChartOfAccounts>,
12744 entries: &[JournalEntry],
12745 document_flows: &DocumentFlowSnapshot,
12746 sourcing: &SourcingSnapshot,
12747 hr: &HrSnapshot,
12748 manufacturing: &ManufacturingSnapshot,
12749 banking: &BankingSnapshot,
12750 audit: &AuditSnapshot,
12751 financial_reporting: &FinancialReportingSnapshot,
12752 ocpm: &OcpmSnapshot,
12753 compliance: &ComplianceRegulationsSnapshot,
12754 stats: &mut EnhancedGenerationStatistics,
12755 ) -> SynthResult<HypergraphExportInfo> {
12756 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12757 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12758 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12759 use datasynth_graph::models::hypergraph::AggregationStrategy;
12760
12761 let hg_settings = &self.config.graph_export.hypergraph;
12762
12763 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12765 "truncate" => AggregationStrategy::Truncate,
12766 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12767 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12768 "importance_sample" => AggregationStrategy::ImportanceSample,
12769 _ => AggregationStrategy::PoolByCounterparty,
12770 };
12771
12772 let builder_config = HypergraphConfig {
12773 max_nodes: hg_settings.max_nodes,
12774 aggregation_strategy,
12775 include_coso: hg_settings.governance_layer.include_coso,
12776 include_controls: hg_settings.governance_layer.include_controls,
12777 include_sox: hg_settings.governance_layer.include_sox,
12778 include_vendors: hg_settings.governance_layer.include_vendors,
12779 include_customers: hg_settings.governance_layer.include_customers,
12780 include_employees: hg_settings.governance_layer.include_employees,
12781 include_p2p: hg_settings.process_layer.include_p2p,
12782 include_o2c: hg_settings.process_layer.include_o2c,
12783 include_s2c: hg_settings.process_layer.include_s2c,
12784 include_h2r: hg_settings.process_layer.include_h2r,
12785 include_mfg: hg_settings.process_layer.include_mfg,
12786 include_bank: hg_settings.process_layer.include_bank,
12787 include_audit: hg_settings.process_layer.include_audit,
12788 include_r2r: hg_settings.process_layer.include_r2r,
12789 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12790 docs_per_counterparty_threshold: hg_settings
12791 .process_layer
12792 .docs_per_counterparty_threshold,
12793 include_accounts: hg_settings.accounting_layer.include_accounts,
12794 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12795 include_cross_layer_edges: hg_settings.cross_layer.enabled,
12796 include_compliance: self.config.compliance_regulations.enabled,
12797 include_tax: true,
12798 include_treasury: true,
12799 include_esg: true,
12800 include_project: true,
12801 include_intercompany: true,
12802 include_temporal_events: true,
12803 };
12804
12805 let mut builder = HypergraphBuilder::new(builder_config);
12806
12807 builder.add_coso_framework();
12809
12810 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12813 let controls = InternalControl::standard_controls();
12814 builder.add_controls(&controls);
12815 }
12816
12817 builder.add_vendors(&self.master_data.vendors);
12819 builder.add_customers(&self.master_data.customers);
12820 builder.add_employees(&self.master_data.employees);
12821
12822 builder.add_p2p_documents(
12824 &document_flows.purchase_orders,
12825 &document_flows.goods_receipts,
12826 &document_flows.vendor_invoices,
12827 &document_flows.payments,
12828 );
12829 builder.add_o2c_documents(
12830 &document_flows.sales_orders,
12831 &document_flows.deliveries,
12832 &document_flows.customer_invoices,
12833 );
12834 builder.add_s2c_documents(
12835 &sourcing.sourcing_projects,
12836 &sourcing.qualifications,
12837 &sourcing.rfx_events,
12838 &sourcing.bids,
12839 &sourcing.bid_evaluations,
12840 &sourcing.contracts,
12841 );
12842 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12843 builder.add_mfg_documents(
12844 &manufacturing.production_orders,
12845 &manufacturing.quality_inspections,
12846 &manufacturing.cycle_counts,
12847 );
12848 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12849 builder.add_audit_documents(
12850 &audit.engagements,
12851 &audit.workpapers,
12852 &audit.findings,
12853 &audit.evidence,
12854 &audit.risk_assessments,
12855 &audit.judgments,
12856 &audit.materiality_calculations,
12857 &audit.audit_opinions,
12858 &audit.going_concern_assessments,
12859 );
12860 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12861
12862 if let Some(ref event_log) = ocpm.event_log {
12864 builder.add_ocpm_events(event_log);
12865 }
12866
12867 if self.config.compliance_regulations.enabled
12869 && hg_settings.governance_layer.include_controls
12870 {
12871 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12873 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12874 .standard_records
12875 .iter()
12876 .filter_map(|r| {
12877 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12878 registry.get(&sid).cloned()
12879 })
12880 .collect();
12881
12882 builder.add_compliance_regulations(
12883 &standards,
12884 &compliance.findings,
12885 &compliance.filings,
12886 );
12887 }
12888
12889 builder.add_accounts(coa);
12891 builder.add_journal_entries_as_hyperedges(entries);
12892
12893 let hypergraph = builder.build();
12895
12896 let output_dir = self
12898 .output_path
12899 .clone()
12900 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12901 let hg_dir = output_dir
12902 .join(&self.config.graph_export.output_subdirectory)
12903 .join(&hg_settings.output_subdirectory);
12904
12905 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12907 "unified" => {
12908 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12909 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12910 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12911 })?;
12912 (
12913 metadata.num_nodes,
12914 metadata.num_edges,
12915 metadata.num_hyperedges,
12916 )
12917 }
12918 _ => {
12919 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12921 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12922 SynthError::generation(format!("Hypergraph export failed: {e}"))
12923 })?;
12924 (
12925 metadata.num_nodes,
12926 metadata.num_edges,
12927 metadata.num_hyperedges,
12928 )
12929 }
12930 };
12931
12932 #[cfg(feature = "streaming")]
12934 if let Some(ref target_url) = hg_settings.stream_target {
12935 use crate::stream_client::{StreamClient, StreamConfig};
12936 use std::io::Write as _;
12937
12938 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12939 let stream_config = StreamConfig {
12940 target_url: target_url.clone(),
12941 batch_size: hg_settings.stream_batch_size,
12942 api_key,
12943 ..StreamConfig::default()
12944 };
12945
12946 match StreamClient::new(stream_config) {
12947 Ok(mut client) => {
12948 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12949 match exporter.export_to_writer(&hypergraph, &mut client) {
12950 Ok(_) => {
12951 if let Err(e) = client.flush() {
12952 warn!("Failed to flush stream client: {}", e);
12953 } else {
12954 info!("Streamed {} records to {}", client.total_sent(), target_url);
12955 }
12956 }
12957 Err(e) => {
12958 warn!("Streaming export failed: {}", e);
12959 }
12960 }
12961 }
12962 Err(e) => {
12963 warn!("Failed to create stream client: {}", e);
12964 }
12965 }
12966 }
12967
12968 stats.graph_node_count += num_nodes;
12970 stats.graph_edge_count += num_edges;
12971 stats.graph_export_count += 1;
12972
12973 Ok(HypergraphExportInfo {
12974 node_count: num_nodes,
12975 edge_count: num_edges,
12976 hyperedge_count: num_hyperedges,
12977 output_path: hg_dir,
12978 })
12979 }
12980
12981 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12986 let pb = self.create_progress_bar(100, "Generating Banking Data");
12987
12988 let orchestrator = BankingOrchestratorBuilder::new()
12990 .config(self.config.banking.clone())
12991 .seed(self.seed + 9000)
12992 .country_pack(self.primary_pack().clone())
12993 .build();
12994
12995 if let Some(pb) = &pb {
12996 pb.inc(10);
12997 }
12998
12999 let result = orchestrator.generate();
13001
13002 if let Some(pb) = &pb {
13003 pb.inc(90);
13004 pb.finish_with_message(format!(
13005 "Banking: {} customers, {} transactions",
13006 result.customers.len(),
13007 result.transactions.len()
13008 ));
13009 }
13010
13011 let mut banking_customers = result.customers;
13016 let core_customers = &self.master_data.customers;
13017 if !core_customers.is_empty() {
13018 for (i, bc) in banking_customers.iter_mut().enumerate() {
13019 let core = &core_customers[i % core_customers.len()];
13020 bc.name = CustomerName::business(&core.name);
13021 bc.residence_country = core.country.clone();
13022 bc.enterprise_customer_id = Some(core.customer_id.clone());
13023 }
13024 debug!(
13025 "Cross-referenced {} banking customers with {} core customers",
13026 banking_customers.len(),
13027 core_customers.len()
13028 );
13029 }
13030
13031 Ok(BankingSnapshot {
13032 customers: banking_customers,
13033 accounts: result.accounts,
13034 transactions: result.transactions,
13035 transaction_labels: result.transaction_labels,
13036 customer_labels: result.customer_labels,
13037 account_labels: result.account_labels,
13038 relationship_labels: result.relationship_labels,
13039 narratives: result.narratives,
13040 suspicious_count: result.stats.suspicious_count,
13041 scenario_count: result.scenarios.len(),
13042 })
13043 }
13044
13045 fn calculate_total_transactions(&self) -> u64 {
13047 let months = self.config.global.period_months as f64;
13048 self.config
13049 .companies
13050 .iter()
13051 .map(|c| {
13052 let annual = c.annual_transaction_volume.count() as f64;
13053 let weighted = annual * c.volume_weight;
13054 (weighted * months / 12.0) as u64
13055 })
13056 .sum()
13057 }
13058
13059 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13061 if !self.phase_config.show_progress {
13062 return None;
13063 }
13064
13065 let pb = if let Some(mp) = &self.multi_progress {
13066 mp.add(ProgressBar::new(total))
13067 } else {
13068 ProgressBar::new(total)
13069 };
13070
13071 pb.set_style(
13072 ProgressStyle::default_bar()
13073 .template(&format!(
13074 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13075 ))
13076 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13077 .progress_chars("#>-"),
13078 );
13079
13080 Some(pb)
13081 }
13082
13083 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13085 self.coa.clone()
13086 }
13087
13088 pub fn get_master_data(&self) -> &MasterDataSnapshot {
13090 &self.master_data
13091 }
13092
13093 fn phase_compliance_regulations(
13095 &mut self,
13096 _stats: &mut EnhancedGenerationStatistics,
13097 ) -> SynthResult<ComplianceRegulationsSnapshot> {
13098 if !self.phase_config.generate_compliance_regulations {
13099 return Ok(ComplianceRegulationsSnapshot::default());
13100 }
13101
13102 info!("Phase: Generating Compliance Regulations Data");
13103
13104 let cr_config = &self.config.compliance_regulations;
13105
13106 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13108 self.config
13109 .companies
13110 .iter()
13111 .map(|c| c.country.clone())
13112 .collect::<std::collections::HashSet<_>>()
13113 .into_iter()
13114 .collect()
13115 } else {
13116 cr_config.jurisdictions.clone()
13117 };
13118
13119 let fallback_date =
13121 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13122 let reference_date = cr_config
13123 .reference_date
13124 .as_ref()
13125 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13126 .unwrap_or_else(|| {
13127 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13128 .unwrap_or(fallback_date)
13129 });
13130
13131 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13133 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13134 let cross_reference_records = reg_gen.generate_cross_reference_records();
13135 let jurisdiction_records =
13136 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13137
13138 info!(
13139 " Standards: {} records, {} cross-references, {} jurisdictions",
13140 standard_records.len(),
13141 cross_reference_records.len(),
13142 jurisdiction_records.len()
13143 );
13144
13145 let audit_procedures = if cr_config.audit_procedures.enabled {
13147 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13148 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13149 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13150 confidence_level: cr_config.audit_procedures.confidence_level,
13151 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13152 };
13153 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13154 self.seed + 9000,
13155 proc_config,
13156 );
13157 let registry = reg_gen.registry();
13158 let mut all_procs = Vec::new();
13159 for jurisdiction in &jurisdictions {
13160 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13161 all_procs.extend(procs);
13162 }
13163 info!(" Audit procedures: {}", all_procs.len());
13164 all_procs
13165 } else {
13166 Vec::new()
13167 };
13168
13169 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13171 let finding_config =
13172 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13173 finding_rate: cr_config.findings.finding_rate,
13174 material_weakness_rate: cr_config.findings.material_weakness_rate,
13175 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13176 generate_remediation: cr_config.findings.generate_remediation,
13177 };
13178 let mut finding_gen =
13179 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13180 self.seed + 9100,
13181 finding_config,
13182 );
13183 let mut all_findings = Vec::new();
13184 for company in &self.config.companies {
13185 let company_findings =
13186 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13187 all_findings.extend(company_findings);
13188 }
13189 info!(" Compliance findings: {}", all_findings.len());
13190 all_findings
13191 } else {
13192 Vec::new()
13193 };
13194
13195 let filings = if cr_config.filings.enabled {
13197 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13198 filing_types: cr_config.filings.filing_types.clone(),
13199 generate_status_progression: cr_config.filings.generate_status_progression,
13200 };
13201 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13202 self.seed + 9200,
13203 filing_config,
13204 );
13205 let company_codes: Vec<String> = self
13206 .config
13207 .companies
13208 .iter()
13209 .map(|c| c.code.clone())
13210 .collect();
13211 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13212 .unwrap_or(fallback_date);
13213 let filings = filing_gen.generate_filings(
13214 &company_codes,
13215 &jurisdictions,
13216 start_date,
13217 self.config.global.period_months,
13218 );
13219 info!(" Regulatory filings: {}", filings.len());
13220 filings
13221 } else {
13222 Vec::new()
13223 };
13224
13225 let compliance_graph = if cr_config.graph.enabled {
13227 let graph_config = datasynth_graph::ComplianceGraphConfig {
13228 include_standard_nodes: cr_config.graph.include_compliance_nodes,
13229 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13230 include_cross_references: cr_config.graph.include_cross_references,
13231 include_supersession_edges: cr_config.graph.include_supersession_edges,
13232 include_account_links: cr_config.graph.include_account_links,
13233 include_control_links: cr_config.graph.include_control_links,
13234 include_company_links: cr_config.graph.include_company_links,
13235 };
13236 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13237
13238 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13240 .iter()
13241 .map(|r| datasynth_graph::StandardNodeInput {
13242 standard_id: r.standard_id.clone(),
13243 title: r.title.clone(),
13244 category: r.category.clone(),
13245 domain: r.domain.clone(),
13246 is_active: r.is_active,
13247 features: vec![if r.is_active { 1.0 } else { 0.0 }],
13248 applicable_account_types: r.applicable_account_types.clone(),
13249 applicable_processes: r.applicable_processes.clone(),
13250 })
13251 .collect();
13252 builder.add_standards(&standard_inputs);
13253
13254 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13256 jurisdiction_records
13257 .iter()
13258 .map(|r| datasynth_graph::JurisdictionNodeInput {
13259 country_code: r.country_code.clone(),
13260 country_name: r.country_name.clone(),
13261 framework: r.accounting_framework.clone(),
13262 standard_count: r.standard_count,
13263 tax_rate: r.statutory_tax_rate,
13264 })
13265 .collect();
13266 builder.add_jurisdictions(&jurisdiction_inputs);
13267
13268 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13270 cross_reference_records
13271 .iter()
13272 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13273 from_standard: r.from_standard.clone(),
13274 to_standard: r.to_standard.clone(),
13275 relationship: r.relationship.clone(),
13276 convergence_level: r.convergence_level,
13277 })
13278 .collect();
13279 builder.add_cross_references(&xref_inputs);
13280
13281 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13283 .iter()
13284 .map(|r| datasynth_graph::JurisdictionMappingInput {
13285 country_code: r.jurisdiction.clone(),
13286 standard_id: r.standard_id.clone(),
13287 })
13288 .collect();
13289 builder.add_jurisdiction_mappings(&mapping_inputs);
13290
13291 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13293 .iter()
13294 .map(|p| datasynth_graph::ProcedureNodeInput {
13295 procedure_id: p.procedure_id.clone(),
13296 standard_id: p.standard_id.clone(),
13297 procedure_type: p.procedure_type.clone(),
13298 sample_size: p.sample_size,
13299 confidence_level: p.confidence_level,
13300 })
13301 .collect();
13302 builder.add_procedures(&proc_inputs);
13303
13304 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13306 .iter()
13307 .map(|f| datasynth_graph::FindingNodeInput {
13308 finding_id: f.finding_id.to_string(),
13309 standard_id: f
13310 .related_standards
13311 .first()
13312 .map(|s| s.as_str().to_string())
13313 .unwrap_or_default(),
13314 severity: f.severity.to_string(),
13315 deficiency_level: f.deficiency_level.to_string(),
13316 severity_score: f.deficiency_level.severity_score(),
13317 control_id: f.control_id.clone(),
13318 affected_accounts: f.affected_accounts.clone(),
13319 })
13320 .collect();
13321 builder.add_findings(&finding_inputs);
13322
13323 if cr_config.graph.include_account_links {
13325 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13326 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13327 for std_record in &standard_records {
13328 if let Some(std_obj) =
13329 registry.get(&datasynth_core::models::compliance::StandardId::parse(
13330 &std_record.standard_id,
13331 ))
13332 {
13333 for acct_type in &std_obj.applicable_account_types {
13334 account_links.push(datasynth_graph::AccountLinkInput {
13335 standard_id: std_record.standard_id.clone(),
13336 account_code: acct_type.clone(),
13337 account_name: acct_type.clone(),
13338 });
13339 }
13340 }
13341 }
13342 builder.add_account_links(&account_links);
13343 }
13344
13345 if cr_config.graph.include_control_links {
13347 let mut control_links = Vec::new();
13348 let sox_like_ids: Vec<String> = standard_records
13350 .iter()
13351 .filter(|r| {
13352 r.standard_id.starts_with("SOX")
13353 || r.standard_id.starts_with("PCAOB-AS-2201")
13354 })
13355 .map(|r| r.standard_id.clone())
13356 .collect();
13357 let control_ids = [
13359 ("C001", "Cash Controls"),
13360 ("C002", "Large Transaction Approval"),
13361 ("C010", "PO Approval"),
13362 ("C011", "Three-Way Match"),
13363 ("C020", "Revenue Recognition"),
13364 ("C021", "Credit Check"),
13365 ("C030", "Manual JE Approval"),
13366 ("C031", "Period Close Review"),
13367 ("C032", "Account Reconciliation"),
13368 ("C040", "Payroll Processing"),
13369 ("C050", "Fixed Asset Capitalization"),
13370 ("C060", "Intercompany Elimination"),
13371 ];
13372 for sox_id in &sox_like_ids {
13373 for (ctrl_id, ctrl_name) in &control_ids {
13374 control_links.push(datasynth_graph::ControlLinkInput {
13375 standard_id: sox_id.clone(),
13376 control_id: ctrl_id.to_string(),
13377 control_name: ctrl_name.to_string(),
13378 });
13379 }
13380 }
13381 builder.add_control_links(&control_links);
13382 }
13383
13384 if cr_config.graph.include_company_links {
13386 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13387 .iter()
13388 .enumerate()
13389 .map(|(i, f)| datasynth_graph::FilingNodeInput {
13390 filing_id: format!("F{:04}", i + 1),
13391 filing_type: f.filing_type.to_string(),
13392 company_code: f.company_code.clone(),
13393 jurisdiction: f.jurisdiction.clone(),
13394 status: format!("{:?}", f.status),
13395 })
13396 .collect();
13397 builder.add_filings(&filing_inputs);
13398 }
13399
13400 let graph = builder.build();
13401 info!(
13402 " Compliance graph: {} nodes, {} edges",
13403 graph.nodes.len(),
13404 graph.edges.len()
13405 );
13406 Some(graph)
13407 } else {
13408 None
13409 };
13410
13411 self.check_resources_with_log("post-compliance-regulations")?;
13412
13413 Ok(ComplianceRegulationsSnapshot {
13414 standard_records,
13415 cross_reference_records,
13416 jurisdiction_records,
13417 audit_procedures,
13418 findings,
13419 filings,
13420 compliance_graph,
13421 })
13422 }
13423
13424 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13426 use super::lineage::LineageGraphBuilder;
13427
13428 let mut builder = LineageGraphBuilder::new();
13429
13430 builder.add_config_section("config:global", "Global Config");
13432 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13433 builder.add_config_section("config:transactions", "Transaction Config");
13434
13435 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13437 builder.add_generator_phase("phase:je", "Journal Entry Generation");
13438
13439 builder.configured_by("phase:coa", "config:chart_of_accounts");
13441 builder.configured_by("phase:je", "config:transactions");
13442
13443 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13445 builder.produced_by("output:je", "phase:je");
13446
13447 if self.phase_config.generate_master_data {
13449 builder.add_config_section("config:master_data", "Master Data Config");
13450 builder.add_generator_phase("phase:master_data", "Master Data Generation");
13451 builder.configured_by("phase:master_data", "config:master_data");
13452 builder.input_to("phase:master_data", "phase:je");
13453 }
13454
13455 if self.phase_config.generate_document_flows {
13456 builder.add_config_section("config:document_flows", "Document Flow Config");
13457 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13458 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13459 builder.configured_by("phase:p2p", "config:document_flows");
13460 builder.configured_by("phase:o2c", "config:document_flows");
13461
13462 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13463 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13464 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13465 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13466 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13467
13468 builder.produced_by("output:po", "phase:p2p");
13469 builder.produced_by("output:gr", "phase:p2p");
13470 builder.produced_by("output:vi", "phase:p2p");
13471 builder.produced_by("output:so", "phase:o2c");
13472 builder.produced_by("output:ci", "phase:o2c");
13473 }
13474
13475 if self.phase_config.inject_anomalies {
13476 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13477 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13478 builder.configured_by("phase:anomaly", "config:fraud");
13479 builder.add_output_file(
13480 "output:labels",
13481 "Anomaly Labels",
13482 "labels/anomaly_labels.csv",
13483 );
13484 builder.produced_by("output:labels", "phase:anomaly");
13485 }
13486
13487 if self.phase_config.generate_audit {
13488 builder.add_config_section("config:audit", "Audit Config");
13489 builder.add_generator_phase("phase:audit", "Audit Data Generation");
13490 builder.configured_by("phase:audit", "config:audit");
13491 }
13492
13493 if self.phase_config.generate_banking {
13494 builder.add_config_section("config:banking", "Banking Config");
13495 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13496 builder.configured_by("phase:banking", "config:banking");
13497 }
13498
13499 if self.config.llm.enabled {
13500 builder.add_config_section("config:llm", "LLM Enrichment Config");
13501 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13502 builder.configured_by("phase:llm_enrichment", "config:llm");
13503 }
13504
13505 if self.config.diffusion.enabled {
13506 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13507 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13508 builder.configured_by("phase:diffusion", "config:diffusion");
13509 }
13510
13511 if self.config.causal.enabled {
13512 builder.add_config_section("config:causal", "Causal Generation Config");
13513 builder.add_generator_phase("phase:causal", "Causal Overlay");
13514 builder.configured_by("phase:causal", "config:causal");
13515 }
13516
13517 builder.build()
13518 }
13519
13520 fn compute_company_revenue(
13529 entries: &[JournalEntry],
13530 company_code: &str,
13531 ) -> rust_decimal::Decimal {
13532 use rust_decimal::Decimal;
13533 let mut revenue = Decimal::ZERO;
13534 for je in entries {
13535 if je.header.company_code != company_code {
13536 continue;
13537 }
13538 for line in &je.lines {
13539 if line.gl_account.starts_with('4') {
13540 revenue += line.credit_amount - line.debit_amount;
13542 }
13543 }
13544 }
13545 revenue.max(Decimal::ZERO)
13546 }
13547
13548 fn compute_entity_net_assets(
13552 entries: &[JournalEntry],
13553 entity_code: &str,
13554 ) -> rust_decimal::Decimal {
13555 use rust_decimal::Decimal;
13556 let mut asset_net = Decimal::ZERO;
13557 let mut liability_net = Decimal::ZERO;
13558 for je in entries {
13559 if je.header.company_code != entity_code {
13560 continue;
13561 }
13562 for line in &je.lines {
13563 if line.gl_account.starts_with('1') {
13564 asset_net += line.debit_amount - line.credit_amount;
13565 } else if line.gl_account.starts_with('2') {
13566 liability_net += line.credit_amount - line.debit_amount;
13567 }
13568 }
13569 }
13570 asset_net - liability_net
13571 }
13572}
13573
13574fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13576 match format {
13577 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13578 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13579 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13580 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13581 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13582 }
13583}
13584
13585fn compute_trial_balance_entries(
13590 entries: &[JournalEntry],
13591 entity_code: &str,
13592 fiscal_year: i32,
13593 coa: Option<&ChartOfAccounts>,
13594) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13595 use std::collections::BTreeMap;
13596
13597 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13598 BTreeMap::new();
13599
13600 for je in entries {
13601 for line in &je.lines {
13602 let entry = balances.entry(line.account_code.clone()).or_default();
13603 entry.0 += line.debit_amount;
13604 entry.1 += line.credit_amount;
13605 }
13606 }
13607
13608 balances
13609 .into_iter()
13610 .map(
13611 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13612 account_description: coa
13613 .and_then(|c| c.get_account(&account_code))
13614 .map(|a| a.description().to_string())
13615 .unwrap_or_else(|| account_code.clone()),
13616 account_code,
13617 debit_balance: debit,
13618 credit_balance: credit,
13619 net_balance: debit - credit,
13620 entity_code: entity_code.to_string(),
13621 period: format!("FY{}", fiscal_year),
13622 },
13623 )
13624 .collect()
13625}
13626
13627#[cfg(test)]
13628#[allow(clippy::unwrap_used)]
13629mod tests {
13630 use super::*;
13631 use datasynth_config::schema::*;
13632
13633 fn create_test_config() -> GeneratorConfig {
13634 GeneratorConfig {
13635 global: GlobalConfig {
13636 industry: IndustrySector::Manufacturing,
13637 start_date: "2024-01-01".to_string(),
13638 period_months: 1,
13639 seed: Some(42),
13640 parallel: false,
13641 group_currency: "USD".to_string(),
13642 presentation_currency: None,
13643 worker_threads: 0,
13644 memory_limit_mb: 0,
13645 fiscal_year_months: None,
13646 },
13647 companies: vec![CompanyConfig {
13648 code: "1000".to_string(),
13649 name: "Test Company".to_string(),
13650 currency: "USD".to_string(),
13651 functional_currency: None,
13652 country: "US".to_string(),
13653 annual_transaction_volume: TransactionVolume::TenK,
13654 volume_weight: 1.0,
13655 fiscal_year_variant: "K4".to_string(),
13656 }],
13657 chart_of_accounts: ChartOfAccountsConfig {
13658 complexity: CoAComplexity::Small,
13659 industry_specific: true,
13660 custom_accounts: None,
13661 min_hierarchy_depth: 2,
13662 max_hierarchy_depth: 4,
13663 },
13664 transactions: TransactionConfig::default(),
13665 output: OutputConfig::default(),
13666 fraud: FraudConfig::default(),
13667 internal_controls: InternalControlsConfig::default(),
13668 business_processes: BusinessProcessConfig::default(),
13669 user_personas: UserPersonaConfig::default(),
13670 templates: TemplateConfig::default(),
13671 approval: ApprovalConfig::default(),
13672 departments: DepartmentConfig::default(),
13673 master_data: MasterDataConfig::default(),
13674 document_flows: DocumentFlowConfig::default(),
13675 intercompany: IntercompanyConfig::default(),
13676 balance: BalanceConfig::default(),
13677 ocpm: OcpmConfig::default(),
13678 audit: AuditGenerationConfig::default(),
13679 banking: datasynth_banking::BankingConfig::default(),
13680 data_quality: DataQualitySchemaConfig::default(),
13681 scenario: ScenarioConfig::default(),
13682 temporal: TemporalDriftConfig::default(),
13683 graph_export: GraphExportConfig::default(),
13684 streaming: StreamingSchemaConfig::default(),
13685 rate_limit: RateLimitSchemaConfig::default(),
13686 temporal_attributes: TemporalAttributeSchemaConfig::default(),
13687 relationships: RelationshipSchemaConfig::default(),
13688 accounting_standards: AccountingStandardsConfig::default(),
13689 audit_standards: AuditStandardsConfig::default(),
13690 distributions: Default::default(),
13691 temporal_patterns: Default::default(),
13692 vendor_network: VendorNetworkSchemaConfig::default(),
13693 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13694 relationship_strength: RelationshipStrengthSchemaConfig::default(),
13695 cross_process_links: CrossProcessLinksSchemaConfig::default(),
13696 organizational_events: OrganizationalEventsSchemaConfig::default(),
13697 behavioral_drift: BehavioralDriftSchemaConfig::default(),
13698 market_drift: MarketDriftSchemaConfig::default(),
13699 drift_labeling: DriftLabelingSchemaConfig::default(),
13700 anomaly_injection: Default::default(),
13701 industry_specific: Default::default(),
13702 fingerprint_privacy: Default::default(),
13703 quality_gates: Default::default(),
13704 compliance: Default::default(),
13705 webhooks: Default::default(),
13706 llm: Default::default(),
13707 diffusion: Default::default(),
13708 causal: Default::default(),
13709 source_to_pay: Default::default(),
13710 financial_reporting: Default::default(),
13711 hr: Default::default(),
13712 manufacturing: Default::default(),
13713 sales_quotes: Default::default(),
13714 tax: Default::default(),
13715 treasury: Default::default(),
13716 project_accounting: Default::default(),
13717 esg: Default::default(),
13718 country_packs: None,
13719 scenarios: Default::default(),
13720 session: Default::default(),
13721 compliance_regulations: Default::default(),
13722 }
13723 }
13724
13725 #[test]
13726 fn test_enhanced_orchestrator_creation() {
13727 let config = create_test_config();
13728 let orchestrator = EnhancedOrchestrator::with_defaults(config);
13729 assert!(orchestrator.is_ok());
13730 }
13731
13732 #[test]
13733 fn test_minimal_generation() {
13734 let config = create_test_config();
13735 let phase_config = PhaseConfig {
13736 generate_master_data: false,
13737 generate_document_flows: false,
13738 generate_journal_entries: true,
13739 inject_anomalies: false,
13740 show_progress: false,
13741 ..Default::default()
13742 };
13743
13744 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13745 let result = orchestrator.generate();
13746
13747 assert!(result.is_ok());
13748 let result = result.unwrap();
13749 assert!(!result.journal_entries.is_empty());
13750 }
13751
13752 #[test]
13753 fn test_master_data_generation() {
13754 let config = create_test_config();
13755 let phase_config = PhaseConfig {
13756 generate_master_data: true,
13757 generate_document_flows: false,
13758 generate_journal_entries: false,
13759 inject_anomalies: false,
13760 show_progress: false,
13761 vendors_per_company: 5,
13762 customers_per_company: 5,
13763 materials_per_company: 10,
13764 assets_per_company: 5,
13765 employees_per_company: 10,
13766 ..Default::default()
13767 };
13768
13769 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13770 let result = orchestrator.generate().unwrap();
13771
13772 assert!(!result.master_data.vendors.is_empty());
13773 assert!(!result.master_data.customers.is_empty());
13774 assert!(!result.master_data.materials.is_empty());
13775 }
13776
13777 #[test]
13778 fn test_document_flow_generation() {
13779 let config = create_test_config();
13780 let phase_config = PhaseConfig {
13781 generate_master_data: true,
13782 generate_document_flows: true,
13783 generate_journal_entries: false,
13784 inject_anomalies: false,
13785 inject_data_quality: false,
13786 validate_balances: false,
13787 generate_ocpm_events: false,
13788 show_progress: false,
13789 vendors_per_company: 5,
13790 customers_per_company: 5,
13791 materials_per_company: 10,
13792 assets_per_company: 5,
13793 employees_per_company: 10,
13794 p2p_chains: 5,
13795 o2c_chains: 5,
13796 ..Default::default()
13797 };
13798
13799 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13800 let result = orchestrator.generate().unwrap();
13801
13802 assert!(!result.document_flows.p2p_chains.is_empty());
13804 assert!(!result.document_flows.o2c_chains.is_empty());
13805
13806 assert!(!result.document_flows.purchase_orders.is_empty());
13808 assert!(!result.document_flows.sales_orders.is_empty());
13809 }
13810
13811 #[test]
13812 fn test_anomaly_injection() {
13813 let config = create_test_config();
13814 let phase_config = PhaseConfig {
13815 generate_master_data: false,
13816 generate_document_flows: false,
13817 generate_journal_entries: true,
13818 inject_anomalies: true,
13819 show_progress: false,
13820 ..Default::default()
13821 };
13822
13823 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13824 let result = orchestrator.generate().unwrap();
13825
13826 assert!(!result.journal_entries.is_empty());
13828
13829 assert!(result.anomaly_labels.summary.is_some());
13832 }
13833
13834 #[test]
13835 fn test_full_generation_pipeline() {
13836 let config = create_test_config();
13837 let phase_config = PhaseConfig {
13838 generate_master_data: true,
13839 generate_document_flows: true,
13840 generate_journal_entries: true,
13841 inject_anomalies: false,
13842 inject_data_quality: false,
13843 validate_balances: true,
13844 generate_ocpm_events: false,
13845 show_progress: false,
13846 vendors_per_company: 3,
13847 customers_per_company: 3,
13848 materials_per_company: 5,
13849 assets_per_company: 3,
13850 employees_per_company: 5,
13851 p2p_chains: 3,
13852 o2c_chains: 3,
13853 ..Default::default()
13854 };
13855
13856 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13857 let result = orchestrator.generate().unwrap();
13858
13859 assert!(!result.master_data.vendors.is_empty());
13861 assert!(!result.master_data.customers.is_empty());
13862 assert!(!result.document_flows.p2p_chains.is_empty());
13863 assert!(!result.document_flows.o2c_chains.is_empty());
13864 assert!(!result.journal_entries.is_empty());
13865 assert!(result.statistics.accounts_count > 0);
13866
13867 assert!(!result.subledger.ap_invoices.is_empty());
13869 assert!(!result.subledger.ar_invoices.is_empty());
13870
13871 assert!(result.balance_validation.validated);
13873 assert!(result.balance_validation.entries_processed > 0);
13874 }
13875
13876 #[test]
13877 fn test_subledger_linking() {
13878 let config = create_test_config();
13879 let phase_config = PhaseConfig {
13880 generate_master_data: true,
13881 generate_document_flows: true,
13882 generate_journal_entries: false,
13883 inject_anomalies: false,
13884 inject_data_quality: false,
13885 validate_balances: false,
13886 generate_ocpm_events: false,
13887 show_progress: false,
13888 vendors_per_company: 5,
13889 customers_per_company: 5,
13890 materials_per_company: 10,
13891 assets_per_company: 3,
13892 employees_per_company: 5,
13893 p2p_chains: 5,
13894 o2c_chains: 5,
13895 ..Default::default()
13896 };
13897
13898 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13899 let result = orchestrator.generate().unwrap();
13900
13901 assert!(!result.document_flows.vendor_invoices.is_empty());
13903 assert!(!result.document_flows.customer_invoices.is_empty());
13904
13905 assert!(!result.subledger.ap_invoices.is_empty());
13907 assert!(!result.subledger.ar_invoices.is_empty());
13908
13909 assert_eq!(
13911 result.subledger.ap_invoices.len(),
13912 result.document_flows.vendor_invoices.len()
13913 );
13914
13915 assert_eq!(
13917 result.subledger.ar_invoices.len(),
13918 result.document_flows.customer_invoices.len()
13919 );
13920
13921 assert_eq!(
13923 result.statistics.ap_invoice_count,
13924 result.subledger.ap_invoices.len()
13925 );
13926 assert_eq!(
13927 result.statistics.ar_invoice_count,
13928 result.subledger.ar_invoices.len()
13929 );
13930 }
13931
13932 #[test]
13933 fn test_balance_validation() {
13934 let config = create_test_config();
13935 let phase_config = PhaseConfig {
13936 generate_master_data: false,
13937 generate_document_flows: false,
13938 generate_journal_entries: true,
13939 inject_anomalies: false,
13940 validate_balances: true,
13941 show_progress: false,
13942 ..Default::default()
13943 };
13944
13945 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13946 let result = orchestrator.generate().unwrap();
13947
13948 assert!(result.balance_validation.validated);
13950 assert!(result.balance_validation.entries_processed > 0);
13951
13952 assert!(!result.balance_validation.has_unbalanced_entries);
13954
13955 assert_eq!(
13957 result.balance_validation.total_debits,
13958 result.balance_validation.total_credits
13959 );
13960 }
13961
13962 #[test]
13963 fn test_statistics_accuracy() {
13964 let config = create_test_config();
13965 let phase_config = PhaseConfig {
13966 generate_master_data: true,
13967 generate_document_flows: false,
13968 generate_journal_entries: true,
13969 inject_anomalies: false,
13970 show_progress: false,
13971 vendors_per_company: 10,
13972 customers_per_company: 20,
13973 materials_per_company: 15,
13974 assets_per_company: 5,
13975 employees_per_company: 8,
13976 ..Default::default()
13977 };
13978
13979 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13980 let result = orchestrator.generate().unwrap();
13981
13982 assert_eq!(
13984 result.statistics.vendor_count,
13985 result.master_data.vendors.len()
13986 );
13987 assert_eq!(
13988 result.statistics.customer_count,
13989 result.master_data.customers.len()
13990 );
13991 assert_eq!(
13992 result.statistics.material_count,
13993 result.master_data.materials.len()
13994 );
13995 assert_eq!(
13996 result.statistics.total_entries as usize,
13997 result.journal_entries.len()
13998 );
13999 }
14000
14001 #[test]
14002 fn test_phase_config_defaults() {
14003 let config = PhaseConfig::default();
14004 assert!(config.generate_master_data);
14005 assert!(config.generate_document_flows);
14006 assert!(config.generate_journal_entries);
14007 assert!(!config.inject_anomalies);
14008 assert!(config.validate_balances);
14009 assert!(config.show_progress);
14010 assert!(config.vendors_per_company > 0);
14011 assert!(config.customers_per_company > 0);
14012 }
14013
14014 #[test]
14015 fn test_get_coa_before_generation() {
14016 let config = create_test_config();
14017 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
14018
14019 assert!(orchestrator.get_coa().is_none());
14021 }
14022
14023 #[test]
14024 fn test_get_coa_after_generation() {
14025 let config = create_test_config();
14026 let phase_config = PhaseConfig {
14027 generate_master_data: false,
14028 generate_document_flows: false,
14029 generate_journal_entries: true,
14030 inject_anomalies: false,
14031 show_progress: false,
14032 ..Default::default()
14033 };
14034
14035 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14036 let _ = orchestrator.generate().unwrap();
14037
14038 assert!(orchestrator.get_coa().is_some());
14040 }
14041
14042 #[test]
14043 fn test_get_master_data() {
14044 let config = create_test_config();
14045 let phase_config = PhaseConfig {
14046 generate_master_data: true,
14047 generate_document_flows: false,
14048 generate_journal_entries: false,
14049 inject_anomalies: false,
14050 show_progress: false,
14051 vendors_per_company: 5,
14052 customers_per_company: 5,
14053 materials_per_company: 5,
14054 assets_per_company: 5,
14055 employees_per_company: 5,
14056 ..Default::default()
14057 };
14058
14059 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14060 let result = orchestrator.generate().unwrap();
14061
14062 assert!(!result.master_data.vendors.is_empty());
14064 }
14065
14066 #[test]
14067 fn test_with_progress_builder() {
14068 let config = create_test_config();
14069 let orchestrator = EnhancedOrchestrator::with_defaults(config)
14070 .unwrap()
14071 .with_progress(false);
14072
14073 assert!(!orchestrator.phase_config.show_progress);
14075 }
14076
14077 #[test]
14078 fn test_multi_company_generation() {
14079 let mut config = create_test_config();
14080 config.companies.push(CompanyConfig {
14081 code: "2000".to_string(),
14082 name: "Subsidiary".to_string(),
14083 currency: "EUR".to_string(),
14084 functional_currency: None,
14085 country: "DE".to_string(),
14086 annual_transaction_volume: TransactionVolume::TenK,
14087 volume_weight: 0.5,
14088 fiscal_year_variant: "K4".to_string(),
14089 });
14090
14091 let phase_config = PhaseConfig {
14092 generate_master_data: true,
14093 generate_document_flows: false,
14094 generate_journal_entries: true,
14095 inject_anomalies: false,
14096 show_progress: false,
14097 vendors_per_company: 5,
14098 customers_per_company: 5,
14099 materials_per_company: 5,
14100 assets_per_company: 5,
14101 employees_per_company: 5,
14102 ..Default::default()
14103 };
14104
14105 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14106 let result = orchestrator.generate().unwrap();
14107
14108 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
14111 assert!(result.statistics.companies_count == 2);
14112 }
14113
14114 #[test]
14115 fn test_empty_master_data_skips_document_flows() {
14116 let config = create_test_config();
14117 let phase_config = PhaseConfig {
14118 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
14121 inject_anomalies: false,
14122 show_progress: false,
14123 ..Default::default()
14124 };
14125
14126 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14127 let result = orchestrator.generate().unwrap();
14128
14129 assert!(result.document_flows.p2p_chains.is_empty());
14131 assert!(result.document_flows.o2c_chains.is_empty());
14132 }
14133
14134 #[test]
14135 fn test_journal_entry_line_item_count() {
14136 let config = create_test_config();
14137 let phase_config = PhaseConfig {
14138 generate_master_data: false,
14139 generate_document_flows: false,
14140 generate_journal_entries: true,
14141 inject_anomalies: false,
14142 show_progress: false,
14143 ..Default::default()
14144 };
14145
14146 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14147 let result = orchestrator.generate().unwrap();
14148
14149 let calculated_line_items: u64 = result
14151 .journal_entries
14152 .iter()
14153 .map(|e| e.line_count() as u64)
14154 .sum();
14155 assert_eq!(result.statistics.total_line_items, calculated_line_items);
14156 }
14157
14158 #[test]
14159 fn test_audit_generation() {
14160 let config = create_test_config();
14161 let phase_config = PhaseConfig {
14162 generate_master_data: false,
14163 generate_document_flows: false,
14164 generate_journal_entries: true,
14165 inject_anomalies: false,
14166 show_progress: false,
14167 generate_audit: true,
14168 audit_engagements: 2,
14169 workpapers_per_engagement: 5,
14170 evidence_per_workpaper: 2,
14171 risks_per_engagement: 3,
14172 findings_per_engagement: 2,
14173 judgments_per_engagement: 2,
14174 ..Default::default()
14175 };
14176
14177 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14178 let result = orchestrator.generate().unwrap();
14179
14180 assert_eq!(result.audit.engagements.len(), 2);
14182 assert!(!result.audit.workpapers.is_empty());
14183 assert!(!result.audit.evidence.is_empty());
14184 assert!(!result.audit.risk_assessments.is_empty());
14185 assert!(!result.audit.findings.is_empty());
14186 assert!(!result.audit.judgments.is_empty());
14187
14188 assert!(
14190 !result.audit.confirmations.is_empty(),
14191 "ISA 505 confirmations should be generated"
14192 );
14193 assert!(
14194 !result.audit.confirmation_responses.is_empty(),
14195 "ISA 505 confirmation responses should be generated"
14196 );
14197 assert!(
14198 !result.audit.procedure_steps.is_empty(),
14199 "ISA 330 procedure steps should be generated"
14200 );
14201 assert!(
14203 !result.audit.analytical_results.is_empty(),
14204 "ISA 520 analytical procedures should be generated"
14205 );
14206 assert!(
14207 !result.audit.ia_functions.is_empty(),
14208 "ISA 610 IA functions should be generated (one per engagement)"
14209 );
14210 assert!(
14211 !result.audit.related_parties.is_empty(),
14212 "ISA 550 related parties should be generated"
14213 );
14214
14215 assert_eq!(
14217 result.statistics.audit_engagement_count,
14218 result.audit.engagements.len()
14219 );
14220 assert_eq!(
14221 result.statistics.audit_workpaper_count,
14222 result.audit.workpapers.len()
14223 );
14224 assert_eq!(
14225 result.statistics.audit_evidence_count,
14226 result.audit.evidence.len()
14227 );
14228 assert_eq!(
14229 result.statistics.audit_risk_count,
14230 result.audit.risk_assessments.len()
14231 );
14232 assert_eq!(
14233 result.statistics.audit_finding_count,
14234 result.audit.findings.len()
14235 );
14236 assert_eq!(
14237 result.statistics.audit_judgment_count,
14238 result.audit.judgments.len()
14239 );
14240 assert_eq!(
14241 result.statistics.audit_confirmation_count,
14242 result.audit.confirmations.len()
14243 );
14244 assert_eq!(
14245 result.statistics.audit_confirmation_response_count,
14246 result.audit.confirmation_responses.len()
14247 );
14248 assert_eq!(
14249 result.statistics.audit_procedure_step_count,
14250 result.audit.procedure_steps.len()
14251 );
14252 assert_eq!(
14253 result.statistics.audit_sample_count,
14254 result.audit.samples.len()
14255 );
14256 assert_eq!(
14257 result.statistics.audit_analytical_result_count,
14258 result.audit.analytical_results.len()
14259 );
14260 assert_eq!(
14261 result.statistics.audit_ia_function_count,
14262 result.audit.ia_functions.len()
14263 );
14264 assert_eq!(
14265 result.statistics.audit_ia_report_count,
14266 result.audit.ia_reports.len()
14267 );
14268 assert_eq!(
14269 result.statistics.audit_related_party_count,
14270 result.audit.related_parties.len()
14271 );
14272 assert_eq!(
14273 result.statistics.audit_related_party_transaction_count,
14274 result.audit.related_party_transactions.len()
14275 );
14276 }
14277
14278 #[test]
14279 fn test_new_phases_disabled_by_default() {
14280 let config = create_test_config();
14281 assert!(!config.llm.enabled);
14283 assert!(!config.diffusion.enabled);
14284 assert!(!config.causal.enabled);
14285
14286 let phase_config = PhaseConfig {
14287 generate_master_data: false,
14288 generate_document_flows: false,
14289 generate_journal_entries: true,
14290 inject_anomalies: false,
14291 show_progress: false,
14292 ..Default::default()
14293 };
14294
14295 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14296 let result = orchestrator.generate().unwrap();
14297
14298 assert_eq!(result.statistics.llm_enrichment_ms, 0);
14300 assert_eq!(result.statistics.llm_vendors_enriched, 0);
14301 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14302 assert_eq!(result.statistics.diffusion_samples_generated, 0);
14303 assert_eq!(result.statistics.causal_generation_ms, 0);
14304 assert_eq!(result.statistics.causal_samples_generated, 0);
14305 assert!(result.statistics.causal_validation_passed.is_none());
14306 assert_eq!(result.statistics.counterfactual_pair_count, 0);
14307 assert!(result.counterfactual_pairs.is_empty());
14308 }
14309
14310 #[test]
14311 fn test_counterfactual_generation_enabled() {
14312 let config = create_test_config();
14313 let phase_config = PhaseConfig {
14314 generate_master_data: false,
14315 generate_document_flows: false,
14316 generate_journal_entries: true,
14317 inject_anomalies: false,
14318 show_progress: false,
14319 generate_counterfactuals: true,
14320 generate_period_close: false, ..Default::default()
14322 };
14323
14324 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14325 let result = orchestrator.generate().unwrap();
14326
14327 if !result.journal_entries.is_empty() {
14329 assert_eq!(
14330 result.counterfactual_pairs.len(),
14331 result.journal_entries.len()
14332 );
14333 assert_eq!(
14334 result.statistics.counterfactual_pair_count,
14335 result.journal_entries.len()
14336 );
14337 let ids: std::collections::HashSet<_> = result
14339 .counterfactual_pairs
14340 .iter()
14341 .map(|p| p.pair_id.clone())
14342 .collect();
14343 assert_eq!(ids.len(), result.counterfactual_pairs.len());
14344 }
14345 }
14346
14347 #[test]
14348 fn test_llm_enrichment_enabled() {
14349 let mut config = create_test_config();
14350 config.llm.enabled = true;
14351 config.llm.max_vendor_enrichments = 3;
14352
14353 let phase_config = PhaseConfig {
14354 generate_master_data: true,
14355 generate_document_flows: false,
14356 generate_journal_entries: false,
14357 inject_anomalies: false,
14358 show_progress: false,
14359 vendors_per_company: 5,
14360 customers_per_company: 3,
14361 materials_per_company: 3,
14362 assets_per_company: 3,
14363 employees_per_company: 3,
14364 ..Default::default()
14365 };
14366
14367 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14368 let result = orchestrator.generate().unwrap();
14369
14370 assert!(result.statistics.llm_vendors_enriched > 0);
14372 assert!(result.statistics.llm_vendors_enriched <= 3);
14373 }
14374
14375 #[test]
14376 fn test_diffusion_enhancement_enabled() {
14377 let mut config = create_test_config();
14378 config.diffusion.enabled = true;
14379 config.diffusion.n_steps = 50;
14380 config.diffusion.sample_size = 20;
14381
14382 let phase_config = PhaseConfig {
14383 generate_master_data: false,
14384 generate_document_flows: false,
14385 generate_journal_entries: true,
14386 inject_anomalies: false,
14387 show_progress: false,
14388 ..Default::default()
14389 };
14390
14391 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14392 let result = orchestrator.generate().unwrap();
14393
14394 assert_eq!(result.statistics.diffusion_samples_generated, 20);
14396 }
14397
14398 #[test]
14399 fn test_causal_overlay_enabled() {
14400 let mut config = create_test_config();
14401 config.causal.enabled = true;
14402 config.causal.template = "fraud_detection".to_string();
14403 config.causal.sample_size = 100;
14404 config.causal.validate = true;
14405
14406 let phase_config = PhaseConfig {
14407 generate_master_data: false,
14408 generate_document_flows: false,
14409 generate_journal_entries: true,
14410 inject_anomalies: false,
14411 show_progress: false,
14412 ..Default::default()
14413 };
14414
14415 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14416 let result = orchestrator.generate().unwrap();
14417
14418 assert_eq!(result.statistics.causal_samples_generated, 100);
14420 assert!(result.statistics.causal_validation_passed.is_some());
14422 }
14423
14424 #[test]
14425 fn test_causal_overlay_revenue_cycle_template() {
14426 let mut config = create_test_config();
14427 config.causal.enabled = true;
14428 config.causal.template = "revenue_cycle".to_string();
14429 config.causal.sample_size = 50;
14430 config.causal.validate = false;
14431
14432 let phase_config = PhaseConfig {
14433 generate_master_data: false,
14434 generate_document_flows: false,
14435 generate_journal_entries: true,
14436 inject_anomalies: false,
14437 show_progress: false,
14438 ..Default::default()
14439 };
14440
14441 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14442 let result = orchestrator.generate().unwrap();
14443
14444 assert_eq!(result.statistics.causal_samples_generated, 50);
14446 assert!(result.statistics.causal_validation_passed.is_none());
14448 }
14449
14450 #[test]
14451 fn test_all_new_phases_enabled_together() {
14452 let mut config = create_test_config();
14453 config.llm.enabled = true;
14454 config.llm.max_vendor_enrichments = 2;
14455 config.diffusion.enabled = true;
14456 config.diffusion.n_steps = 20;
14457 config.diffusion.sample_size = 10;
14458 config.causal.enabled = true;
14459 config.causal.sample_size = 50;
14460 config.causal.validate = true;
14461
14462 let phase_config = PhaseConfig {
14463 generate_master_data: true,
14464 generate_document_flows: false,
14465 generate_journal_entries: true,
14466 inject_anomalies: false,
14467 show_progress: false,
14468 vendors_per_company: 5,
14469 customers_per_company: 3,
14470 materials_per_company: 3,
14471 assets_per_company: 3,
14472 employees_per_company: 3,
14473 ..Default::default()
14474 };
14475
14476 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14477 let result = orchestrator.generate().unwrap();
14478
14479 assert!(result.statistics.llm_vendors_enriched > 0);
14481 assert_eq!(result.statistics.diffusion_samples_generated, 10);
14482 assert_eq!(result.statistics.causal_samples_generated, 50);
14483 assert!(result.statistics.causal_validation_passed.is_some());
14484 }
14485
14486 #[test]
14487 fn test_statistics_serialization_with_new_fields() {
14488 let stats = EnhancedGenerationStatistics {
14489 total_entries: 100,
14490 total_line_items: 500,
14491 llm_enrichment_ms: 42,
14492 llm_vendors_enriched: 10,
14493 diffusion_enhancement_ms: 100,
14494 diffusion_samples_generated: 50,
14495 causal_generation_ms: 200,
14496 causal_samples_generated: 100,
14497 causal_validation_passed: Some(true),
14498 ..Default::default()
14499 };
14500
14501 let json = serde_json::to_string(&stats).unwrap();
14502 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14503
14504 assert_eq!(deserialized.llm_enrichment_ms, 42);
14505 assert_eq!(deserialized.llm_vendors_enriched, 10);
14506 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14507 assert_eq!(deserialized.diffusion_samples_generated, 50);
14508 assert_eq!(deserialized.causal_generation_ms, 200);
14509 assert_eq!(deserialized.causal_samples_generated, 100);
14510 assert_eq!(deserialized.causal_validation_passed, Some(true));
14511 }
14512
14513 #[test]
14514 fn test_statistics_backward_compat_deserialization() {
14515 let old_json = r#"{
14517 "total_entries": 100,
14518 "total_line_items": 500,
14519 "accounts_count": 50,
14520 "companies_count": 1,
14521 "period_months": 12,
14522 "vendor_count": 10,
14523 "customer_count": 20,
14524 "material_count": 15,
14525 "asset_count": 5,
14526 "employee_count": 8,
14527 "p2p_chain_count": 5,
14528 "o2c_chain_count": 5,
14529 "ap_invoice_count": 5,
14530 "ar_invoice_count": 5,
14531 "ocpm_event_count": 0,
14532 "ocpm_object_count": 0,
14533 "ocpm_case_count": 0,
14534 "audit_engagement_count": 0,
14535 "audit_workpaper_count": 0,
14536 "audit_evidence_count": 0,
14537 "audit_risk_count": 0,
14538 "audit_finding_count": 0,
14539 "audit_judgment_count": 0,
14540 "anomalies_injected": 0,
14541 "data_quality_issues": 0,
14542 "banking_customer_count": 0,
14543 "banking_account_count": 0,
14544 "banking_transaction_count": 0,
14545 "banking_suspicious_count": 0,
14546 "graph_export_count": 0,
14547 "graph_node_count": 0,
14548 "graph_edge_count": 0
14549 }"#;
14550
14551 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14552
14553 assert_eq!(stats.llm_enrichment_ms, 0);
14555 assert_eq!(stats.llm_vendors_enriched, 0);
14556 assert_eq!(stats.diffusion_enhancement_ms, 0);
14557 assert_eq!(stats.diffusion_samples_generated, 0);
14558 assert_eq!(stats.causal_generation_ms, 0);
14559 assert_eq!(stats.causal_samples_generated, 0);
14560 assert!(stats.causal_validation_passed.is_none());
14561 }
14562}